fix grapheme test to work on unmodified data file

This commit is contained in:
Steven G. Johnson 2020-03-29 08:53:11 -04:00
parent 7e834d7702
commit 11bb3d9dc7
3 changed files with 6 additions and 7 deletions

View File

@ -66,8 +66,7 @@ if(UTF8PROC_ENABLE_TESTING)
file(MAKE_DIRECTORY data) file(MAKE_DIRECTORY data)
set(UNICODE_VERSION 13.0.0) set(UNICODE_VERSION 13.0.0)
file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt data/NormalizationTest.txt SHOW_PROGRESS) file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt data/NormalizationTest.txt SHOW_PROGRESS)
file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTestOrg.txt SHOW_PROGRESS) file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTest.txt SHOW_PROGRESS)
execute_process(COMMAND bash -c "cat data/GraphemeBreakTestOrg.txt | /usr/bin/perl -pe 's,÷,/,g;s,×,+,g' && rm -f data/GraphemeBreakTestOrg.txt" OUTPUT_FILE data/GraphemeBreakTest.txt)
add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c) add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c)
target_link_libraries(case utf8proc) target_link_libraries(case utf8proc)
add_executable(custom test/tests.h test/tests.c utf8proc.h test/custom.c) add_executable(custom test/tests.h test/tests.c utf8proc.h test/custom.c)

View File

@ -46,7 +46,7 @@ NormalizationTest.txt:
$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/NormalizationTest.txt $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/NormalizationTest.txt
GraphemeBreakTest.txt: GraphemeBreakTest.txt:
$(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@ $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt
emoji-data.txt: emoji-data.txt:
$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt

View File

@ -18,12 +18,12 @@ int main(int argc, char **argv)
while (buf[bi]) { while (buf[bi]) {
bi = skipspaces(buf, bi); bi = skipspaces(buf, bi);
if (buf[bi] == '/') { /* grapheme break */ if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0xb7) { /* U+00f7 = grapheme break */
src[si++] = '/'; src[si++] = '/';
bi++; bi += 2;
} }
else if (buf[bi] == '+') { /* no break */ else if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0x97) { /* U+00d7 = no break */
bi++; bi += 2;
} }
else if (buf[bi] == '#') { /* start of comments */ else if (buf[bi] == '#') { /* start of comments */
break; break;