fix #2: add charwidth function

This commit is contained in:
Steven G. Johnson
2015-03-08 17:23:43 -04:00
parent 08f101a9e8
commit a4c84d2063
11 changed files with 10488 additions and 9867 deletions

61
test/charwidth.c Normal file
View File

@@ -0,0 +1,61 @@
#include "tests.h"
#include <ctype.h>
#include <wchar.h>
int my_isprint(int c) {
int cat = utf8proc_get_property(c)->category;
return (UTF8PROC_CATEGORY_LU <= cat && cat <= UTF8PROC_CATEGORY_ZS) ||
(c == 0x0601 || c == 0x0602 || c == 0x0603 || c == 0x06dd);
}
int main(int argc, char **argv)
{
int prevc, c, error = 0;
(void) argc; /* unused */
(void) argv; /* unused */
/* some simple sanity tests of the character widths */
for (c = 0; c <= 0x110000; ++c) {
int cat = utf8proc_get_property(c)->category;
int w = utf8proc_charwidth(c);
if ((cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_ME) &&
w > 0) {
fprintf(stderr, "nonzero width %d for combining char %x\n", w, c);
error = 1;
}
if (c <= 127 && ((!isprint(c) && w > 0) ||
(isprint(c) && wcwidth(c) != w))) {
fprintf(stderr, "wcwidth %d mismatch %d for %s ASCII %x\n",
wcwidth(c), w,
isprint(c) ? "printable" : "non-printable", c);
error = 1;
}
if (!my_isprint(c) && w > 0) {
fprintf(stderr, "non-printing %x had width %d\n", c, w);
error = 1;
}
}
check(!error, "utf8proc_charwidth FAILED tests.");
/* print some other information by compariing with system wcwidth */
printf("Mismatches with system wcwidth (not necessarily errors):\n");
for (c = 0; c <= 0x110000; ++c) {
int w = utf8proc_charwidth(c);
int wc = wcwidth(c);
if (sizeof(wchar_t) == 2 && c >= (1<<16)) continue;
#if 0
/* lots of these errors for out-of-date system unicode tables */
if (wc == -1 && my_isprint(c) && w > 0)
printf(" wcwidth(%x) = -1 for printable char\n", c);
#endif
if (wc == -1 && !my_isprint(c) && w > 0)
printf(" wcwidth(%x) = -1 for non-printable width-%d char\n", c, w);
if (wc >= 0 && wc != w)
printf(" wcwidth(%x) = %d != charwidth %d\n", c, wc, w);
}
printf("Character-width tests SUCCEEDED.\n");
return 0;
}

View File

@@ -11,7 +11,7 @@ int main(int argc, char **argv)
check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
const utf8proc_property_t *p = utf8proc_get_property(c);
printf("U+%s:\n"
" category = %d\n"
" category = %s\n"
" combining_class = %d\n"
" bidi_class = %d\n"
" decomp_type = %d\n"
@@ -24,9 +24,10 @@ int main(int argc, char **argv)
" comp_exclusion = %d\n"
" ignorable = %d\n"
" control_boundary = %d\n"
" boundclass = %d\n",
" boundclass = %d\n"
" charwidth = %d\n",
argv[i],
p->category,
utf8proc_category_string(c),
p->combining_class,
p->bidi_class,
p->decomp_type,
@@ -39,7 +40,8 @@ int main(int argc, char **argv)
p->comp_exclusion,
p->ignorable,
p->control_boundary,
p->boundclass);
p->boundclass,
utf8proc_charwidth(c));
}
return 0;
}