Mark Grapheme_Extend data
This commit is contained in:
parent
7633bd03b6
commit
aa9823f540
@ -83,157 +83,10 @@ $ignorable_list.each do |entry|
|
||||
end
|
||||
|
||||
$grapheme_extend_list = <<END_OF_LIST
|
||||
0300..036F ; Grapheme_Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
|
||||
0483..0486 ; Grapheme_Extend # Mn [4] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PSILI PNEUMATA
|
||||
0488..0489 ; Grapheme_Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
|
||||
0591..05BD ; Grapheme_Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
|
||||
05BF ; Grapheme_Extend # Mn HEBREW POINT RAFE
|
||||
05C1..05C2 ; Grapheme_Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
|
||||
05C4..05C5 ; Grapheme_Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
|
||||
05C7 ; Grapheme_Extend # Mn HEBREW POINT QAMATS QATAN
|
||||
0610..0615 ; Grapheme_Extend # Mn [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH
|
||||
064B..065E ; Grapheme_Extend # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
|
||||
0670 ; Grapheme_Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
06D6..06DC ; Grapheme_Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
|
||||
06DE ; Grapheme_Extend # Me ARABIC START OF RUB EL HIZB
|
||||
06DF..06E4 ; Grapheme_Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
|
||||
06E7..06E8 ; Grapheme_Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
|
||||
06EA..06ED ; Grapheme_Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
|
||||
0711 ; Grapheme_Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
|
||||
0730..074A ; Grapheme_Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
|
||||
07A6..07B0 ; Grapheme_Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN
|
||||
07EB..07F3 ; Grapheme_Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
|
||||
0901..0902 ; Grapheme_Extend # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
093C ; Grapheme_Extend # Mn DEVANAGARI SIGN NUKTA
|
||||
0941..0948 ; Grapheme_Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
||||
094D ; Grapheme_Extend # Mn DEVANAGARI SIGN VIRAMA
|
||||
0951..0954 ; Grapheme_Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
|
||||
0962..0963 ; Grapheme_Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
|
||||
0981 ; Grapheme_Extend # Mn BENGALI SIGN CANDRABINDU
|
||||
09BC ; Grapheme_Extend # Mn BENGALI SIGN NUKTA
|
||||
09BE ; Grapheme_Extend # Mc BENGALI VOWEL SIGN AA
|
||||
09C1..09C4 ; Grapheme_Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
|
||||
09CD ; Grapheme_Extend # Mn BENGALI SIGN VIRAMA
|
||||
09D7 ; Grapheme_Extend # Mc BENGALI AU LENGTH MARK
|
||||
09E2..09E3 ; Grapheme_Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
|
||||
0A01..0A02 ; Grapheme_Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
|
||||
0A3C ; Grapheme_Extend # Mn GURMUKHI SIGN NUKTA
|
||||
0A41..0A42 ; Grapheme_Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
|
||||
0A47..0A48 ; Grapheme_Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
|
||||
0A4B..0A4D ; Grapheme_Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
|
||||
0A70..0A71 ; Grapheme_Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
|
||||
0A81..0A82 ; Grapheme_Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
|
||||
0ABC ; Grapheme_Extend # Mn GUJARATI SIGN NUKTA
|
||||
0AC1..0AC5 ; Grapheme_Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
|
||||
0AC7..0AC8 ; Grapheme_Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
|
||||
0ACD ; Grapheme_Extend # Mn GUJARATI SIGN VIRAMA
|
||||
0AE2..0AE3 ; Grapheme_Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
|
||||
0B01 ; Grapheme_Extend # Mn ORIYA SIGN CANDRABINDU
|
||||
0B3C ; Grapheme_Extend # Mn ORIYA SIGN NUKTA
|
||||
0B3E ; Grapheme_Extend # Mc ORIYA VOWEL SIGN AA
|
||||
0B3F ; Grapheme_Extend # Mn ORIYA VOWEL SIGN I
|
||||
0B41..0B43 ; Grapheme_Extend # Mn [3] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC R
|
||||
0B4D ; Grapheme_Extend # Mn ORIYA SIGN VIRAMA
|
||||
0B56 ; Grapheme_Extend # Mn ORIYA AI LENGTH MARK
|
||||
0B57 ; Grapheme_Extend # Mc ORIYA AU LENGTH MARK
|
||||
0B82 ; Grapheme_Extend # Mn TAMIL SIGN ANUSVARA
|
||||
0BBE ; Grapheme_Extend # Mc TAMIL VOWEL SIGN AA
|
||||
0BC0 ; Grapheme_Extend # Mn TAMIL VOWEL SIGN II
|
||||
0BCD ; Grapheme_Extend # Mn TAMIL SIGN VIRAMA
|
||||
0BD7 ; Grapheme_Extend # Mc TAMIL AU LENGTH MARK
|
||||
0C3E..0C40 ; Grapheme_Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
|
||||
0C46..0C48 ; Grapheme_Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
|
||||
0C4A..0C4D ; Grapheme_Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
|
||||
0C55..0C56 ; Grapheme_Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
|
||||
0CBC ; Grapheme_Extend # Mn KANNADA SIGN NUKTA
|
||||
0CBF ; Grapheme_Extend # Mn KANNADA VOWEL SIGN I
|
||||
0CC2 ; Grapheme_Extend # Mc KANNADA VOWEL SIGN UU
|
||||
0CC6 ; Grapheme_Extend # Mn KANNADA VOWEL SIGN E
|
||||
0CCC..0CCD ; Grapheme_Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
|
||||
0CD5..0CD6 ; Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
|
||||
0CE2..0CE3 ; Grapheme_Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
|
||||
0D3E ; Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA
|
||||
0D41..0D43 ; Grapheme_Extend # Mn [3] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC R
|
||||
0D4D ; Grapheme_Extend # Mn MALAYALAM SIGN VIRAMA
|
||||
0D57 ; Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK
|
||||
0DCA ; Grapheme_Extend # Mn SINHALA SIGN AL-LAKUNA
|
||||
0DCF ; Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA
|
||||
0DD2..0DD4 ; Grapheme_Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
|
||||
0DD6 ; Grapheme_Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
|
||||
0DDF ; Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA
|
||||
0E31 ; Grapheme_Extend # Mn THAI CHARACTER MAI HAN-AKAT
|
||||
0E34..0E3A ; Grapheme_Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
|
||||
0E47..0E4E ; Grapheme_Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
|
||||
0EB1 ; Grapheme_Extend # Mn LAO VOWEL SIGN MAI KAN
|
||||
0EB4..0EB9 ; Grapheme_Extend # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
|
||||
0EBB..0EBC ; Grapheme_Extend # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
|
||||
0EC8..0ECD ; Grapheme_Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
|
||||
0F18..0F19 ; Grapheme_Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
|
||||
0F35 ; Grapheme_Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
|
||||
0F37 ; Grapheme_Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
|
||||
0F39 ; Grapheme_Extend # Mn TIBETAN MARK TSA -PHRU
|
||||
0F71..0F7E ; Grapheme_Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
|
||||
0F80..0F84 ; Grapheme_Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
|
||||
0F86..0F87 ; Grapheme_Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
|
||||
0F90..0F97 ; Grapheme_Extend # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
|
||||
0F99..0FBC ; Grapheme_Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
|
||||
0FC6 ; Grapheme_Extend # Mn TIBETAN SYMBOL PADMA GDAN
|
||||
102D..1030 ; Grapheme_Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
|
||||
1032 ; Grapheme_Extend # Mn MYANMAR VOWEL SIGN AI
|
||||
1036..1037 ; Grapheme_Extend # Mn [2] MYANMAR SIGN ANUSVARA..MYANMAR SIGN DOT BELOW
|
||||
1039 ; Grapheme_Extend # Mn MYANMAR SIGN VIRAMA
|
||||
1058..1059 ; Grapheme_Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
|
||||
135F ; Grapheme_Extend # Mn ETHIOPIC COMBINING GEMINATION MARK
|
||||
1712..1714 ; Grapheme_Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
|
||||
1732..1734 ; Grapheme_Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
|
||||
1752..1753 ; Grapheme_Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
|
||||
1772..1773 ; Grapheme_Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
|
||||
17B7..17BD ; Grapheme_Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
|
||||
17C6 ; Grapheme_Extend # Mn KHMER SIGN NIKAHIT
|
||||
17C9..17D3 ; Grapheme_Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
|
||||
17DD ; Grapheme_Extend # Mn KHMER SIGN ATTHACAN
|
||||
180B..180D ; Grapheme_Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
|
||||
18A9 ; Grapheme_Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
|
||||
1920..1922 ; Grapheme_Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
|
||||
1927..1928 ; Grapheme_Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
|
||||
1932 ; Grapheme_Extend # Mn LIMBU SMALL LETTER ANUSVARA
|
||||
1939..193B ; Grapheme_Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
|
||||
1A17..1A18 ; Grapheme_Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
|
||||
1B00..1B03 ; Grapheme_Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
|
||||
1B34 ; Grapheme_Extend # Mn BALINESE SIGN REREKAN
|
||||
1B36..1B3A ; Grapheme_Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
|
||||
1B3C ; Grapheme_Extend # Mn BALINESE VOWEL SIGN LA LENGA
|
||||
1B42 ; Grapheme_Extend # Mn BALINESE VOWEL SIGN PEPET
|
||||
1B6B..1B73 ; Grapheme_Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
|
||||
1DC0..1DCA ; Grapheme_Extend # Mn [11] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER R BELOW
|
||||
1DFE..1DFF ; Grapheme_Extend # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C..200D ; Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
20D0..20DC ; Grapheme_Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20DD..20E0 ; Grapheme_Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
|
||||
20E1 ; Grapheme_Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
|
||||
20E2..20E4 ; Grapheme_Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
|
||||
20E5..20EF ; Grapheme_Extend # Mn [11] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW
|
||||
302A..302F ; Grapheme_Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
3099..309A ; Grapheme_Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
A806 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN HASANTA
|
||||
A80B ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
|
||||
A825..A826 ; Grapheme_Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
|
||||
FB1E ; Grapheme_Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
FE00..FE0F ; Grapheme_Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
|
||||
FE20..FE23 ; Grapheme_Extend # Mn [4] COMBINING LIGATURE LEFT HALF..COMBINING DOUBLE TILDE RIGHT HALF
|
||||
10A01..10A03 ; Grapheme_Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
|
||||
10A05..10A06 ; Grapheme_Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
|
||||
10A0C..10A0F ; Grapheme_Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
|
||||
10A38..10A3A ; Grapheme_Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
|
||||
10A3F ; Grapheme_Extend # Mn KHAROSHTHI VIRAMA
|
||||
1D165 ; Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM
|
||||
1D167..1D169 ; Grapheme_Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D16E..1D172 ; Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
1D17B..1D182 ; Grapheme_Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
|
||||
1D185..1D18B ; Grapheme_Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
|
||||
1D1AA..1D1AD ; Grapheme_Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
1D242..1D244 ; Grapheme_Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
|
||||
E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
#From:
|
||||
# http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
|
||||
#Section:
|
||||
# Derived Property: Grapheme_Extend_List
|
||||
END_OF_LIST
|
||||
|
||||
$grapheme_extend = []
|
||||
|
||||
Loading…
Reference in New Issue
Block a user