add islower/isupper functions (#196)

* add islower/isupper functions

* added test

* more tests + bugfix

* Makefile fix

* rm iscase test on make clean
This commit is contained in:
Steven G. Johnson
2020-08-25 16:42:59 -04:00
committed by GitHub
parent 08f9999a06
commit 5622a0a51b
9 changed files with 7440 additions and 5851 deletions

View File

@@ -51,6 +51,13 @@ GraphemeBreakTest.txt:
emoji-data.txt:
$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt
Uppercase.txt: DerivedCoreProperties.txt
$(RUBY) -e 'puts File.read("DerivedCoreProperties.txt")[/# Derived Property: Uppercase.*?# Total code points:/m]' > $@
Lowercase.txt: DerivedCoreProperties.txt
$(RUBY) -e 'puts File.read("DerivedCoreProperties.txt")[/# Derived Property: Lowercase.*?# Total code points:/m]' > $@
clean:
rm -f UnicodeData.txt EastAsianWidth.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt CharWidths.txt emoji-data.txt
rm -f Uppercase.txt Lowercase.txt
rm -f utf8proc_data.c.new

View File

@@ -77,6 +77,26 @@ $ignorable_list.each_line do |entry|
end
end
$uppercase_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Uppercase.*?# Total code points:/m]
$uppercase = []
$uppercase_list.each_line do |entry|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
$1.hex.upto($2.hex) { |e2| $uppercase << e2 }
elsif entry =~ /^[0-9A-F]+/
$uppercase << $&.hex
end
end
$lowercase_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Lowercase.*?# Total code points:/m]
$lowercase = []
$lowercase_list.each_line do |entry|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
$1.hex.upto($2.hex) { |e2| $lowercase << e2 }
elsif entry =~ /^[0-9A-F]+/
$lowercase << $&.hex
end
end
$grapheme_boundclass_list = File.read("GraphemeBreakProperty.txt")
$grapheme_boundclass = Hash.new("UTF8PROC_BOUNDCLASS_OTHER")
$grapheme_boundclass_list.each_line do |entry|
@@ -204,8 +224,10 @@ class UnicodeChar
$8.split.collect { |element| element.hex }
@bidi_mirrored = ($13=='Y') ? true : false
# issue #130: use nonstandard uppercase ß -> ẞ
@uppercase_mapping = ($16=='') ? (code==0x00df ? 0x1e9e : nil) : $16.hex
@lowercase_mapping = ($17=='') ? nil : $17.hex
# issue #195: if character is uppercase but has no lowercase mapping,
# then make lowercase mapping = itself (vice versa for lowercase)
@uppercase_mapping = ($16=='') ? (code==0x00df ? 0x1e9e : ($17=='' && $lowercase.include?(code) ? code : nil)) : $16.hex
@lowercase_mapping = ($17=='') ? ($16=='' && $uppercase.include?(code) ? code : nil) : $17.hex
@titlecase_mapping = ($18=='') ? (code==0x00df ? 0x1e9e : nil) : $18.hex
end
def case_folding