update data and algorithms for Unicode 11 (#140)
This commit is contained in:
committed by
GitHub
parent
02f4e1890c
commit
d4a58cfec5
@@ -16,11 +16,11 @@ CURLFLAGS = --retry 5 --location
|
||||
|
||||
.DELETE_ON_ERROR:
|
||||
|
||||
utf8proc_data.c.new: data_generator.rb UnicodeData.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt CharWidths.txt
|
||||
utf8proc_data.c.new: data_generator.rb UnicodeData.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt CharWidths.txt emoji-data.txt
|
||||
$(RUBY) data_generator.rb < UnicodeData.txt > $@
|
||||
|
||||
# GNU Unifont version for font metric calculations:
|
||||
UNIFONT_VERSION=10.0.07
|
||||
UNIFONT_VERSION=11.0.01
|
||||
|
||||
unifont.ttf:
|
||||
$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://mirrors.kernel.org/gnu/unifont/unifont-$(UNIFONT_VERSION)/unifont-$(UNIFONT_VERSION).ttf
|
||||
@@ -35,7 +35,7 @@ CharWidths.txt: charwidths.jl unifont.sfd unifont_upper.sfd EastAsianWidth.txt
|
||||
$(JULIA) charwidths.jl > $@
|
||||
|
||||
# Unicode data version
|
||||
UNICODE_VERSION=10.0.0
|
||||
UNICODE_VERSION=11.0.0
|
||||
|
||||
UnicodeData.txt:
|
||||
$(CURL) $(CURLFLAGS) -o $@ -O http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
|
||||
@@ -61,6 +61,9 @@ NormalizationTest.txt:
|
||||
GraphemeBreakTest.txt:
|
||||
$(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@
|
||||
|
||||
emoji-data.txt:
|
||||
$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://unicode.org/Public/emoji/`echo $(UNICODE_VERSION) | cut -d. -f1-2`/emoji-data.txt
|
||||
|
||||
clean:
|
||||
rm -f UnicodeData.txt EastAsianWidth.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt CharWidths.txt unifont*.ttf unifont*.sfd
|
||||
rm -f UnicodeData.txt EastAsianWidth.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt CharWidths.txt unifont*.ttf unifont*.sfd emoji-data.txt
|
||||
rm -f utf8proc_data.c.new
|
||||
|
||||
@@ -85,6 +85,19 @@ $grapheme_boundclass_list.each_line do |entry|
|
||||
end
|
||||
end
|
||||
|
||||
$emoji_data_list = File.read("emoji-data.txt")
|
||||
$emoji_data_list.each_line do |entry|
|
||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Extended_Pictographic\W/
|
||||
$1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC" }
|
||||
elsif entry =~ /^([0-9A-F]+)\s*;\s*Extended_Pictographic\W/
|
||||
$grapheme_boundclass[$1.hex] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC"
|
||||
elsif entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Emoji_Modifier\W/
|
||||
$1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTEND" }
|
||||
elsif entry =~ /^([0-9A-F]+)\s*;\s*Emoji_Modifier\W/
|
||||
$grapheme_boundclass[$1.hex] = "UTF8PROC_BOUNDCLASS_EXTEND"
|
||||
end
|
||||
end
|
||||
|
||||
$charwidth_list = File.read("CharWidths.txt")
|
||||
$charwidth = Hash.new(0)
|
||||
$charwidth_list.each_line do |entry|
|
||||
|
||||
Reference in New Issue
Block a user