update graphemes for Unicode 7, add utf8proc_grapheme_break function

This commit is contained in:
Steven G. Johnson
2014-12-12 16:27:49 -05:00
parent 539d2cc202
commit 397a1eabea
8 changed files with 10691 additions and 10393 deletions

View File

@@ -29,16 +29,19 @@ clean:
$(MAKE) -C bench clean
update: utf8proc_data.c.new
cp -f utf8proc_data.c.new utf8proc_data.c
# real targets
utf8proc_data.c.new: UnicodeData.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt
utf8proc_data.c.new: data_generator.rb UnicodeData.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt
$(RUBY) data_generator.rb < UnicodeData.txt > utf8proc_data.c.new
UnicodeData.txt:
$(CURL) -O http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
GraphemeBreakProperty.txt:
$(CURL) -O http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt
DerivedCoreProperties.txt:
$(CURL) -O http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
@@ -72,10 +75,13 @@ GraphemeBreakTest.txt:
$(CURL) http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@
normtest: normtest.c utf8proc.o mojibake.h tests.h
$(cc) normtest.c utf8proc.o -o normtest
$(cc) normtest.c utf8proc.o -o $@
graphemetest: graphemetest.c utf8proc.o mojibake.h tests.h
$(cc) graphemetest.c utf8proc.o -o graphemetest
$(cc) graphemetest.c utf8proc.o -o $@
printproperty: printproperty.c utf8proc.o mojibake.h tests.h
$(cc) printproperty.c utf8proc.o -o $@
check: normtest NormalizationTest.txt graphemetest GraphemeBreakTest.txt
./normtest