uppercase mapping ß (U+00df) to ẞ (U+1E9E) (#134)
* uppercase(0x00df) = 0x1e9e * tests for titlecase and u+00df uppercase * NEWS, another test
This commit is contained in:
parent
8639450134
commit
d81308faba
4
NEWS.md
4
NEWS.md
@ -14,6 +14,9 @@
|
||||
|
||||
- `cmake` fix to avoid defining `UTF8PROC_EXPORTS` globally ([#121]).
|
||||
|
||||
- `toupper` of ß (U+00df) now yields ẞ (U+1E9E) ([#134]), similar to musl;
|
||||
case-folding still yields the standard "ss" mapping.
|
||||
|
||||
## Version 2.1.1 ##
|
||||
|
||||
2018-04-27
|
||||
@ -332,3 +335,4 @@ Release of version 1.0.1
|
||||
[#128]: https://github.com/JuliaLang/utf8proc/issues/128
|
||||
[#132]: https://github.com/JuliaLang/utf8proc/issues/132
|
||||
[#133]: https://github.com/JuliaLang/utf8proc/issues/133
|
||||
[#134]: https://github.com/JuliaLang/utf8proc/issues/134
|
||||
|
||||
@ -188,9 +188,10 @@ class UnicodeChar
|
||||
@decomp_mapping = ($8=='') ? nil :
|
||||
$8.split.collect { |element| element.hex }
|
||||
@bidi_mirrored = ($13=='Y') ? true : false
|
||||
@uppercase_mapping = ($16=='') ? nil : $16.hex
|
||||
# issue #130: use nonstandard uppercase ß -> ẞ
|
||||
@uppercase_mapping = ($16=='') ? (code==0x00df ? 0x1e9e : nil) : $16.hex
|
||||
@lowercase_mapping = ($17=='') ? nil : $17.hex
|
||||
@titlecase_mapping = ($18=='') ? nil : $18.hex
|
||||
@titlecase_mapping = ($18=='') ? (code==0x00df ? 0x1e9e : nil) : $18.hex
|
||||
end
|
||||
def case_folding
|
||||
$case_folding[code]
|
||||
@ -408,4 +409,3 @@ comb1st_indicies.keys.each_index do |a|
|
||||
$stdout << "\n"
|
||||
end
|
||||
$stdout << "};\n\n"
|
||||
|
||||
|
||||
21
test/case.c
21
test/case.c
@ -13,9 +13,16 @@ int main(int argc, char **argv)
|
||||
for (c = 0; c <= 0x110000; ++c) {
|
||||
utf8proc_int32_t l = utf8proc_tolower(c);
|
||||
utf8proc_int32_t u = utf8proc_toupper(c);
|
||||
utf8proc_int32_t t = utf8proc_totitle(c);
|
||||
|
||||
check(l == c || utf8proc_codepoint_valid(l), "invalid tolower");
|
||||
check(u == c || utf8proc_codepoint_valid(u), "invalid toupper");
|
||||
check(t == c || utf8proc_codepoint_valid(t), "invalid totitle");
|
||||
|
||||
if (utf8proc_codepoint_valid(c) && (l == u) != (l == t)) {
|
||||
fprintf(stderr, "unexpected titlecase %x for lowercase %x / uppercase %x\n", t, l, c);
|
||||
++error;
|
||||
}
|
||||
|
||||
if (sizeof(wint_t) > 2 || c < (1<<16)) {
|
||||
wint_t l0 = towlower(c), u0 = towupper(c);
|
||||
@ -44,6 +51,20 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
check(!error, "utf8proc case conversion FAILED %d tests.", error);
|
||||
|
||||
/* issue #130 */
|
||||
check(utf8proc_toupper(0x00df) == 0x1e9e &&
|
||||
utf8proc_totitle(0x00df) == 0x1e9e &&
|
||||
utf8proc_tolower(0x00df) == 0x00df &&
|
||||
utf8proc_tolower(0x1e9e) == 0x00df &&
|
||||
utf8proc_toupper(0x1e9e) == 0x1e9e,
|
||||
"incorrect 0x00df/0x1e9e case conversions");
|
||||
utf8proc_uint8_t str_00df[] = {0xc3, 0x9f, 0x00};
|
||||
utf8proc_uint8_t str_1e9e[] = {0xe1, 0xba, 0x9e, 0x00};
|
||||
check(!strcmp((char*)utf8proc_NFKC_Casefold(str_00df), "ss") &&
|
||||
!strcmp((char*)utf8proc_NFKC_Casefold(str_1e9e), "ss"),
|
||||
"incorrect 0x00df/0x1e9e casefold normalization");
|
||||
|
||||
printf("More up-to-date than OS unicode tables for %d tests.\n", better);
|
||||
printf("utf8proc case conversion tests SUCCEEDED.\n");
|
||||
return 0;
|
||||
|
||||
@ -4,46 +4,57 @@
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 1; i < argc; ++i) {
|
||||
unsigned int c;
|
||||
if (!strcmp(argv[i], "-V")) {
|
||||
printf("utf8proc version %s\n", utf8proc_version());
|
||||
continue;
|
||||
}
|
||||
check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
|
||||
const utf8proc_property_t *p = utf8proc_get_property(c);
|
||||
printf("U+%s:\n"
|
||||
" category = %s\n"
|
||||
" combining_class = %d\n"
|
||||
" bidi_class = %d\n"
|
||||
" decomp_type = %d\n"
|
||||
" uppercase_mapping = %x\n"
|
||||
" lowercase_mapping = %x\n"
|
||||
" titlecase_mapping = %x\n"
|
||||
" comb_index = %d\n"
|
||||
" bidi_mirrored = %d\n"
|
||||
" comp_exclusion = %d\n"
|
||||
" ignorable = %d\n"
|
||||
" control_boundary = %d\n"
|
||||
" boundclass = %d\n"
|
||||
" charwidth = %d\n",
|
||||
argv[i],
|
||||
utf8proc_category_string(c),
|
||||
p->combining_class,
|
||||
p->bidi_class,
|
||||
p->decomp_type,
|
||||
utf8proc_toupper(c),
|
||||
utf8proc_tolower(c),
|
||||
utf8proc_totitle(c),
|
||||
p->comb_index,
|
||||
p->bidi_mirrored,
|
||||
p->comp_exclusion,
|
||||
p->ignorable,
|
||||
p->control_boundary,
|
||||
p->boundclass,
|
||||
utf8proc_charwidth(c));
|
||||
}
|
||||
return 0;
|
||||
for (i = 1; i < argc; ++i) {
|
||||
utf8proc_uint8_t cstr[16], *map;
|
||||
unsigned int c;
|
||||
if (!strcmp(argv[i], "-V")) {
|
||||
printf("utf8proc version %s\n", utf8proc_version());
|
||||
continue;
|
||||
}
|
||||
check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
|
||||
const utf8proc_property_t *p = utf8proc_get_property(c);
|
||||
|
||||
if (utf8proc_codepoint_valid(c))
|
||||
cstr[utf8proc_encode_char(c, cstr)] = 0;
|
||||
else
|
||||
strcat((char*)cstr, "N/A");
|
||||
utf8proc_map(cstr, 0, &map, UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD);
|
||||
|
||||
printf("U+%s: %s\n"
|
||||
" category = %s\n"
|
||||
" combining_class = %d\n"
|
||||
" bidi_class = %d\n"
|
||||
" decomp_type = %d\n"
|
||||
" uppercase_mapping = %x\n"
|
||||
" lowercase_mapping = %x\n"
|
||||
" titlecase_mapping = %x\n"
|
||||
" casefold = %s\n"
|
||||
" comb_index = %d\n"
|
||||
" bidi_mirrored = %d\n"
|
||||
" comp_exclusion = %d\n"
|
||||
" ignorable = %d\n"
|
||||
" control_boundary = %d\n"
|
||||
" boundclass = %d\n"
|
||||
" charwidth = %d\n",
|
||||
argv[i], (char*) cstr,
|
||||
utf8proc_category_string(c),
|
||||
p->combining_class,
|
||||
p->bidi_class,
|
||||
p->decomp_type,
|
||||
utf8proc_toupper(c),
|
||||
utf8proc_tolower(c),
|
||||
utf8proc_totitle(c),
|
||||
(char *) map,
|
||||
p->comb_index,
|
||||
p->bidi_mirrored,
|
||||
p->comp_exclusion,
|
||||
p->ignorable,
|
||||
p->control_boundary,
|
||||
p->boundclass,
|
||||
utf8proc_charwidth(c));
|
||||
free(map);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
2442
utf8proc_data.c
2442
utf8proc_data.c
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user