uppercase mapping ß (U+00df) to ẞ (U+1E9E) (#134)

* uppercase(0x00df) = 0x1e9e * tests for titlecase and u+00df uppercase * NEWS, another test
2018-05-02 14:18:26 -04:00
parent 8639450134
commit d81308faba
5 changed files with 1312 additions and 1276 deletions
--- a/test/case.c
+++ b/test/case.c
@@ -13,13 +13,20 @@ int main(int argc, char **argv)
     for (c = 0; c <= 0x110000; ++c) {
          utf8proc_int32_t l = utf8proc_tolower(c);
          utf8proc_int32_t u = utf8proc_toupper(c);
+          utf8proc_int32_t t = utf8proc_totitle(c);

          check(l == c || utf8proc_codepoint_valid(l), "invalid tolower");
          check(u == c || utf8proc_codepoint_valid(u), "invalid toupper");
+          check(t == c || utf8proc_codepoint_valid(t), "invalid totitle");
+
+          if (utf8proc_codepoint_valid(c) && (l == u) != (l == t)) {
+               fprintf(stderr, "unexpected titlecase %x for lowercase %x / uppercase %x\n", t, l, c);
+               ++error;
+          }

          if (sizeof(wint_t) > 2 || c < (1<<16)) {
               wint_t l0 = towlower(c), u0 = towupper(c);
-               
+
               /* OS unicode tables may be out of date.  But if they
                  do have a lower/uppercase mapping, hopefully it
                  is correct? */
@@ -44,6 +51,20 @@ int main(int argc, char **argv)
          }
     }
     check(!error, "utf8proc case conversion FAILED %d tests.", error);
+
+     /* issue #130 */
+     check(utf8proc_toupper(0x00df) == 0x1e9e &&
+           utf8proc_totitle(0x00df) == 0x1e9e &&
+           utf8proc_tolower(0x00df) == 0x00df &&
+           utf8proc_tolower(0x1e9e) == 0x00df &&
+           utf8proc_toupper(0x1e9e) == 0x1e9e,
+           "incorrect 0x00df/0x1e9e case conversions");
+     utf8proc_uint8_t str_00df[] = {0xc3, 0x9f, 0x00};
+     utf8proc_uint8_t str_1e9e[] = {0xe1, 0xba, 0x9e, 0x00};
+     check(!strcmp((char*)utf8proc_NFKC_Casefold(str_00df), "ss") &&
+           !strcmp((char*)utf8proc_NFKC_Casefold(str_1e9e), "ss"),
+           "incorrect 0x00df/0x1e9e casefold normalization");
+
     printf("More up-to-date than OS unicode tables for %d tests.\n", better);
     printf("utf8proc case conversion tests SUCCEEDED.\n");
     return 0;
--- a/test/printproperty.c
+++ b/test/printproperty.c
@@ -4,46 +4,57 @@

 int main(int argc, char **argv)
 {
-     int i;
+    int i;

-     for (i = 1; i < argc; ++i) {
-          unsigned int c;
-          if (!strcmp(argv[i], "-V")) {
-               printf("utf8proc version %s\n", utf8proc_version());
-               continue;
-          }
-          check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
-          const utf8proc_property_t *p = utf8proc_get_property(c);
-          printf("U+%s:\n"
-                 "  category = %s\n"
-                 "  combining_class = %d\n"
-                 "  bidi_class = %d\n"
-                 "  decomp_type = %d\n"
-                 "  uppercase_mapping = %x\n"
-                 "  lowercase_mapping = %x\n"
-                 "  titlecase_mapping = %x\n"
-                 "  comb_index = %d\n"
-                 "  bidi_mirrored = %d\n"
-                 "  comp_exclusion = %d\n"
-                 "  ignorable = %d\n"
-                 "  control_boundary = %d\n"
-                 "  boundclass = %d\n"
-                 "  charwidth = %d\n",
-                 argv[i],
-                 utf8proc_category_string(c),
-                 p->combining_class,
-                 p->bidi_class,
-                 p->decomp_type,
-                 utf8proc_toupper(c),
-                 utf8proc_tolower(c),
-                 utf8proc_totitle(c),
-                 p->comb_index,
-                 p->bidi_mirrored,
-                 p->comp_exclusion,
-                 p->ignorable,
-                 p->control_boundary,
-                 p->boundclass,
-                 utf8proc_charwidth(c));
-     }
-     return 0;
+    for (i = 1; i < argc; ++i) {
+        utf8proc_uint8_t cstr[16], *map;
+        unsigned int c;
+        if (!strcmp(argv[i], "-V")) {
+            printf("utf8proc version %s\n", utf8proc_version());
+            continue;
+        }
+        check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
+        const utf8proc_property_t *p = utf8proc_get_property(c);
+
+        if (utf8proc_codepoint_valid(c))
+            cstr[utf8proc_encode_char(c, cstr)] = 0;
+        else
+            strcat((char*)cstr, "N/A");
+        utf8proc_map(cstr, 0, &map, UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD);
+
+        printf("U+%s: %s\n"
+            "  category = %s\n"
+            "  combining_class = %d\n"
+            "  bidi_class = %d\n"
+            "  decomp_type = %d\n"
+            "  uppercase_mapping = %x\n"
+            "  lowercase_mapping = %x\n"
+            "  titlecase_mapping = %x\n"
+            "  casefold = %s\n"
+            "  comb_index = %d\n"
+            "  bidi_mirrored = %d\n"
+            "  comp_exclusion = %d\n"
+            "  ignorable = %d\n"
+            "  control_boundary = %d\n"
+            "  boundclass = %d\n"
+            "  charwidth = %d\n",
+        argv[i], (char*) cstr,
+        utf8proc_category_string(c),
+        p->combining_class,
+        p->bidi_class,
+        p->decomp_type,
+        utf8proc_toupper(c),
+        utf8proc_tolower(c),
+        utf8proc_totitle(c),
+        (char *) map,
+        p->comb_index,
+        p->bidi_mirrored,
+        p->comp_exclusion,
+        p->ignorable,
+        p->control_boundary,
+        p->boundclass,
+        utf8proc_charwidth(c));
+        free(map);
+    }
+    return 0;
 }