* JuliaStrings#169 turn on sign-conversion warnings Signed-off-by: Mike Glorioso <mike.glorioso@gmail.com> * JuliaStrings#169 fix sign-conversion warnings for utf8proc.c fix sign-converstion warnings for utf8proc_iterate uc requires at most 21 bits to identify a unicode codepoint, so there is no need for it to be unsigned multiple locations use, modify, or store uc with a signed value the only exception is line 137 where uc is compared with an unsigned value fix sign-converstion warnings for utf8proc_tolower, utf8proc_toupper, utf8proc_totitle all three methods have sign conversion warnings when calling seqindex_decode_index seqindex_decode_index uses the passed value as an index to an array utf8proc_sequences as utf8proc_sequences is hard-coded and smaller than 2^31 - 1 we can safely cast to unsigned fix sign-converstion warnings for utf8proc_decompose_char lines with this warning use the defined function utf8proc_decompose_lump in the function, a hardcoded unsigned value (1<<12) is complemented then cast as a signed value as the intent is to remove the 12th bit flag from options, a signed value, and explicit cast is safe fix sign-conversion warnings for utf8proc_map_custom result is declared as signed, but is only expected to contain values between 0 and 4 sizeof returns an unsigned value. result must be cast to unsigned Signed-off-by: Mike Glorioso <mike.glorioso@gmail.com> * JuliaStrings#169 fix sign-conversion warnings for test/* fix sign-conversion warnings for test/tests.c encode change type for d to match return value of utf8proc_encode_char fix sign-conversion warnings for test/graphemetest.c checkline si, i, and j are unsigned size types, utf8proc_map and utf8proc_iterate accept and return signed size types utf8proc_map treats negative strlen values as 0. the strlen used by the test must be similarly limited utf8proc_iterate treats negative strlen values as 4 which will be less than the unsigned size fix unused-but-set-variable warning by checking the glen value fix sign-conversion warnings for test/case.c main the if block ensures that tested codepoint fits in wint_t, but needs to include u and l as well c, u, and l can be safely cast to wint_t fix sign-conversion warnings for test/iterate.c all values used for len are below 8, so an explicit cast is safe updated types for more portable test code fix sign-conversion warnings for test/printproperty.c main change type of c to signed to resolve all sign-converstion warnings. replace sscanf(... &c) wiht sscanf(... &x) followed by explicit sign converstion Signed-off-by: Mike Glorioso <mike.glorioso@gmail.com>
77 lines
3.1 KiB
C
77 lines
3.1 KiB
C
#include "tests.h"
|
|
#include <wctype.h>
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
int error = 0, better = 0;
|
|
utf8proc_int32_t c;
|
|
|
|
(void) argc; /* unused */
|
|
(void) argv; /* unused */
|
|
|
|
/* some simple sanity tests of the character widths */
|
|
for (c = 0; c <= 0x110000; ++c) {
|
|
utf8proc_int32_t l = utf8proc_tolower(c);
|
|
utf8proc_int32_t u = utf8proc_toupper(c);
|
|
utf8proc_int32_t t = utf8proc_totitle(c);
|
|
|
|
check(l == c || utf8proc_codepoint_valid(l), "invalid tolower");
|
|
check(u == c || utf8proc_codepoint_valid(u), "invalid toupper");
|
|
check(t == c || utf8proc_codepoint_valid(t), "invalid totitle");
|
|
|
|
if (utf8proc_codepoint_valid(c) && (l == u) != (l == t) &&
|
|
/* Unicode 11: Georgian Mkhedruli chars have uppercase but no titlecase. */
|
|
!(((c >= 0x10d0 && c <= 0x10fa) || c >= (0x10fd && c <= 0x10ff)) && l != u)) {
|
|
fprintf(stderr, "unexpected titlecase %x for lowercase %x / uppercase %x\n", t, l, c);
|
|
++error;
|
|
}
|
|
|
|
if (sizeof(wint_t) > 2 || (c < (1<<16) && u < (1<<16) && l < (1<<16))) {
|
|
wint_t l0 = towlower((wint_t)c), u0 = towupper((wint_t)c);
|
|
|
|
/* OS unicode tables may be out of date. But if they
|
|
do have a lower/uppercase mapping, hopefully it
|
|
is correct? */
|
|
if (l0 != (wint_t)c && l0 != (wint_t)l) {
|
|
fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
|
|
l, c, l0);
|
|
++error;
|
|
}
|
|
else if (l0 != (wint_t)l) { /* often true for out-of-date OS unicode */
|
|
++better;
|
|
/* printf("%x != towlower(%x) == %x\n", l, c, l0); */
|
|
}
|
|
if (u0 != (wint_t)c && u0 != (wint_t)u) {
|
|
fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
|
|
u, c, u0);
|
|
++error;
|
|
}
|
|
else if (u0 != (wint_t)u) { /* often true for out-of-date OS unicode */
|
|
++better;
|
|
/* printf("%x != towupper(%x) == %x\n", u, c, u0); */
|
|
}
|
|
}
|
|
}
|
|
check(!error, "utf8proc case conversion FAILED %d tests.", error);
|
|
|
|
/* issue #130 */
|
|
check(utf8proc_toupper(0x00df) == 0x1e9e &&
|
|
utf8proc_totitle(0x00df) == 0x1e9e &&
|
|
utf8proc_tolower(0x00df) == 0x00df &&
|
|
utf8proc_tolower(0x1e9e) == 0x00df &&
|
|
utf8proc_toupper(0x1e9e) == 0x1e9e,
|
|
"incorrect 0x00df/0x1e9e case conversions");
|
|
utf8proc_uint8_t str_00df[] = {0xc3, 0x9f, 0x00};
|
|
utf8proc_uint8_t str_1e9e[] = {0xe1, 0xba, 0x9e, 0x00};
|
|
utf8proc_uint8_t *s1 = utf8proc_NFKC_Casefold(str_00df);
|
|
utf8proc_uint8_t *s2 = utf8proc_NFKC_Casefold(str_1e9e);
|
|
check(!strcmp((char*)s1, "ss") &&
|
|
!strcmp((char*)s2, "ss"),
|
|
"incorrect 0x00df/0x1e9e casefold normalization");
|
|
free(s1);
|
|
free(s2);
|
|
printf("More up-to-date than OS unicode tables for %d tests.\n", better);
|
|
printf("utf8proc case conversion tests SUCCEEDED.\n");
|
|
return 0;
|
|
}
|