Unicode 13 support (#179)

* exclude Sk from zero-width chars (closes #167)

* update for Unicode 13
This commit is contained in:
Steven G. Johnson
2020-03-27 17:06:06 -04:00
committed by GitHub
parent 47edf655b3
commit b48f5d074f
5 changed files with 5478 additions and 5480 deletions

View File

@@ -22,10 +22,10 @@ CharWidths.txt: charwidths.jl EastAsianWidth.txt
$(JULIA) charwidths.jl > $@
# Unicode data version (must also update utf8proc_unicode_version function)
UNICODE_VERSION=12.1.0
UNICODE_VERSION=13.0.0
# Unicode emoji version (managed separately from UNICODE_VERSION)
UNICODE_EMOJI_VERSION=12.0
UNICODE_EMOJI_VERSION=13.0
UnicodeData.txt:
$(CURL) $(CURLFLAGS) -o $@ -O http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt

View File

@@ -60,7 +60,7 @@ zerowidth = Set{Int}() # categories that may contain zero-width chars
push!(zerowidth, UTF8PROC_CATEGORY_MN)
push!(zerowidth, UTF8PROC_CATEGORY_MC)
push!(zerowidth, UTF8PROC_CATEGORY_ME)
push!(zerowidth, UTF8PROC_CATEGORY_SK)
# push!(zerowidth, UTF8PROC_CATEGORY_SK) # see issue #167
push!(zerowidth, UTF8PROC_CATEGORY_ZL)
push!(zerowidth, UTF8PROC_CATEGORY_ZP)
push!(zerowidth, UTF8PROC_CATEGORY_CC)