diff --git a/CMakeLists.txt b/CMakeLists.txt index 12bfda1..4c5649e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,8 +66,7 @@ if(UTF8PROC_ENABLE_TESTING) file(MAKE_DIRECTORY data) set(UNICODE_VERSION 13.0.0) file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt data/NormalizationTest.txt SHOW_PROGRESS) - file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTestOrg.txt SHOW_PROGRESS) - execute_process(COMMAND bash -c "cat data/GraphemeBreakTestOrg.txt | /usr/bin/perl -pe 's,÷,/,g;s,×,+,g' && rm -f data/GraphemeBreakTestOrg.txt" OUTPUT_FILE data/GraphemeBreakTest.txt) + file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTest.txt SHOW_PROGRESS) add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c) target_link_libraries(case utf8proc) add_executable(custom test/tests.h test/tests.c utf8proc.h test/custom.c) diff --git a/data/Makefile b/data/Makefile index 8c6470f..aeef269 100644 --- a/data/Makefile +++ b/data/Makefile @@ -46,7 +46,7 @@ NormalizationTest.txt: $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/NormalizationTest.txt GraphemeBreakTest.txt: - $(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@ + $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt emoji-data.txt: $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt diff --git a/test/graphemetest.c b/test/graphemetest.c index 337a030..93a7d03 100644 --- a/test/graphemetest.c +++ b/test/graphemetest.c @@ -18,12 +18,12 @@ int main(int argc, char **argv) while (buf[bi]) { bi = skipspaces(buf, bi); - if (buf[bi] == '/') { /* grapheme break */ + if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0xb7) { /* U+00f7 = grapheme break */ src[si++] = '/'; - bi++; + bi += 2; } - else if (buf[bi] == '+') { /* no break */ - bi++; + else if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0x97) { /* U+00d7 = no break */ + bi += 2; } else if (buf[bi] == '#') { /* start of comments */ break;