Fix Sign-Conversion warnings in library and test code (#214)
* JuliaStrings#169 turn on sign-conversion warnings Signed-off-by: Mike Glorioso <mike.glorioso@gmail.com> * JuliaStrings#169 fix sign-conversion warnings for utf8proc.c fix sign-converstion warnings for utf8proc_iterate uc requires at most 21 bits to identify a unicode codepoint, so there is no need for it to be unsigned multiple locations use, modify, or store uc with a signed value the only exception is line 137 where uc is compared with an unsigned value fix sign-converstion warnings for utf8proc_tolower, utf8proc_toupper, utf8proc_totitle all three methods have sign conversion warnings when calling seqindex_decode_index seqindex_decode_index uses the passed value as an index to an array utf8proc_sequences as utf8proc_sequences is hard-coded and smaller than 2^31 - 1 we can safely cast to unsigned fix sign-converstion warnings for utf8proc_decompose_char lines with this warning use the defined function utf8proc_decompose_lump in the function, a hardcoded unsigned value (1<<12) is complemented then cast as a signed value as the intent is to remove the 12th bit flag from options, a signed value, and explicit cast is safe fix sign-conversion warnings for utf8proc_map_custom result is declared as signed, but is only expected to contain values between 0 and 4 sizeof returns an unsigned value. result must be cast to unsigned Signed-off-by: Mike Glorioso <mike.glorioso@gmail.com> * JuliaStrings#169 fix sign-conversion warnings for test/* fix sign-conversion warnings for test/tests.c encode change type for d to match return value of utf8proc_encode_char fix sign-conversion warnings for test/graphemetest.c checkline si, i, and j are unsigned size types, utf8proc_map and utf8proc_iterate accept and return signed size types utf8proc_map treats negative strlen values as 0. the strlen used by the test must be similarly limited utf8proc_iterate treats negative strlen values as 4 which will be less than the unsigned size fix unused-but-set-variable warning by checking the glen value fix sign-conversion warnings for test/case.c main the if block ensures that tested codepoint fits in wint_t, but needs to include u and l as well c, u, and l can be safely cast to wint_t fix sign-conversion warnings for test/iterate.c all values used for len are below 8, so an explicit cast is safe updated types for more portable test code fix sign-conversion warnings for test/printproperty.c main change type of c to signed to resolve all sign-converstion warnings. replace sscanf(... &c) wiht sscanf(... &x) followed by explicit sign converstion Signed-off-by: Mike Glorioso <mike.glorioso@gmail.com>
This commit is contained in:
parent
0520d6f724
commit
610730f231
2
Makefile
2
Makefile
@ -11,7 +11,7 @@ PERL=perl
|
|||||||
CFLAGS ?= -O2
|
CFLAGS ?= -O2
|
||||||
PICFLAG = -fPIC
|
PICFLAG = -fPIC
|
||||||
C99FLAG = -std=c99
|
C99FLAG = -std=c99
|
||||||
WCFLAGS = -Wall -Wextra -pedantic
|
WCFLAGS = -Wsign-conversion -Wall -Wextra -pedantic
|
||||||
UCFLAGS = $(CPPFLAGS) $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS $(UTF8PROC_DEFINES)
|
UCFLAGS = $(CPPFLAGS) $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS $(UTF8PROC_DEFINES)
|
||||||
LDFLAG_SHARED = -shared
|
LDFLAG_SHARED = -shared
|
||||||
SOFLAG = -Wl,-soname
|
SOFLAG = -Wl,-soname
|
||||||
|
|||||||
12
test/case.c
12
test/case.c
@ -26,27 +26,27 @@ int main(int argc, char **argv)
|
|||||||
++error;
|
++error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sizeof(wint_t) > 2 || c < (1<<16)) {
|
if (sizeof(wint_t) > 2 || (c < (1<<16) && u < (1<<16) && l < (1<<16))) {
|
||||||
wint_t l0 = towlower(c), u0 = towupper(c);
|
wint_t l0 = towlower((wint_t)c), u0 = towupper((wint_t)c);
|
||||||
|
|
||||||
/* OS unicode tables may be out of date. But if they
|
/* OS unicode tables may be out of date. But if they
|
||||||
do have a lower/uppercase mapping, hopefully it
|
do have a lower/uppercase mapping, hopefully it
|
||||||
is correct? */
|
is correct? */
|
||||||
if (l0 != c && l0 != l) {
|
if (l0 != (wint_t)c && l0 != (wint_t)l) {
|
||||||
fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
|
fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
|
||||||
l, c, l0);
|
l, c, l0);
|
||||||
++error;
|
++error;
|
||||||
}
|
}
|
||||||
else if (l0 != l) { /* often true for out-of-date OS unicode */
|
else if (l0 != (wint_t)l) { /* often true for out-of-date OS unicode */
|
||||||
++better;
|
++better;
|
||||||
/* printf("%x != towlower(%x) == %x\n", l, c, l0); */
|
/* printf("%x != towlower(%x) == %x\n", l, c, l0); */
|
||||||
}
|
}
|
||||||
if (u0 != c && u0 != u) {
|
if (u0 != (wint_t)c && u0 != (wint_t)u) {
|
||||||
fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
|
fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
|
||||||
u, c, u0);
|
u, c, u0);
|
||||||
++error;
|
++error;
|
||||||
}
|
}
|
||||||
else if (u0 != u) { /* often true for out-of-date OS unicode */
|
else if (u0 != (wint_t)u) { /* often true for out-of-date OS unicode */
|
||||||
++better;
|
++better;
|
||||||
/* printf("%x != towupper(%x) == %x\n", u, c, u0); */
|
/* printf("%x != towupper(%x) == %x\n", u, c, u0); */
|
||||||
}
|
}
|
||||||
|
|||||||
@ -43,7 +43,7 @@ void checkline(const char *_buf, bool verbose) {
|
|||||||
else
|
else
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
|
glen = utf8proc_map(utf8, (utf8proc_ssize_t)j, &g, UTF8PROC_CHARBOUND);
|
||||||
if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
|
if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
|
||||||
/* the test file contains surrogate codepoints, which are only for UTF-16 */
|
/* the test file contains surrogate codepoints, which are only for UTF-16 */
|
||||||
printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
|
printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
|
||||||
@ -66,7 +66,7 @@ void checkline(const char *_buf, bool verbose) {
|
|||||||
utf8proc_bool expectbreak = false;
|
utf8proc_bool expectbreak = false;
|
||||||
do {
|
do {
|
||||||
utf8proc_int32_t codepoint;
|
utf8proc_int32_t codepoint;
|
||||||
i += utf8proc_iterate(src + i, si - i, &codepoint);
|
i += (size_t)utf8proc_iterate(src + i, (utf8proc_ssize_t)(si - i), &codepoint);
|
||||||
check(codepoint >= 0, "invalid UTF-8 data");
|
check(codepoint >= 0, "invalid UTF-8 data");
|
||||||
if (codepoint == 0x002F)
|
if (codepoint == 0x002F)
|
||||||
expectbreak = true;
|
expectbreak = true;
|
||||||
@ -110,6 +110,7 @@ int main(int argc, char **argv)
|
|||||||
utf8proc_uint8_t *g;
|
utf8proc_uint8_t *g;
|
||||||
glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
|
glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
|
||||||
check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
|
check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
|
||||||
|
check(glen != 6, "mishandled u+ffff and u+fffe grapheme breaks");
|
||||||
free(g);
|
free(g);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -8,7 +8,7 @@ static int error;
|
|||||||
#define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
|
#define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
|
||||||
#define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
|
#define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
|
||||||
|
|
||||||
static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line)
|
static void testbytes(utf8proc_uint8_t *buf, utf8proc_ssize_t len, utf8proc_ssize_t retval, int line)
|
||||||
{
|
{
|
||||||
utf8proc_int32_t out[16];
|
utf8proc_int32_t out[16];
|
||||||
utf8proc_ssize_t ret;
|
utf8proc_ssize_t ret;
|
||||||
@ -16,13 +16,13 @@ static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int
|
|||||||
/* Make a copy to ensure that memory is left uninitialized after "len"
|
/* Make a copy to ensure that memory is left uninitialized after "len"
|
||||||
* bytes. This way, Valgrind can detect overreads.
|
* bytes. This way, Valgrind can detect overreads.
|
||||||
*/
|
*/
|
||||||
unsigned char tmp[16];
|
utf8proc_uint8_t tmp[16];
|
||||||
memcpy(tmp, buf, len);
|
memcpy(tmp, buf, (unsigned long int)len);
|
||||||
|
|
||||||
tests++;
|
tests++;
|
||||||
if ((ret = utf8proc_iterate(tmp, len, out)) != retval) {
|
if ((ret = utf8proc_iterate(tmp, len, out)) != retval) {
|
||||||
fprintf(stderr, "Failed (%d):", line);
|
fprintf(stderr, "Failed (%d):", line);
|
||||||
for (int i = 0; i < len ; i++) {
|
for (utf8proc_ssize_t i = 0; i < len ; i++) {
|
||||||
fprintf(stderr, " 0x%02x", tmp[i]);
|
fprintf(stderr, " 0x%02x", tmp[i]);
|
||||||
}
|
}
|
||||||
fprintf(stderr, " -> %zd\n", ret);
|
fprintf(stderr, " -> %zd\n", ret);
|
||||||
@ -32,8 +32,8 @@ static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int
|
|||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
uint32_t byt;
|
utf8proc_int32_t byt;
|
||||||
unsigned char buf[16];
|
utf8proc_uint8_t buf[16];
|
||||||
|
|
||||||
(void) argc; (void) argv; /* unused */
|
(void) argc; (void) argv; /* unused */
|
||||||
|
|
||||||
|
|||||||
@ -8,12 +8,14 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
for (i = 1; i < argc; ++i) {
|
for (i = 1; i < argc; ++i) {
|
||||||
utf8proc_uint8_t cstr[16], *map;
|
utf8proc_uint8_t cstr[16], *map;
|
||||||
unsigned int c;
|
utf8proc_uint32_t x;
|
||||||
|
utf8proc_int32_t c;
|
||||||
if (!strcmp(argv[i], "-V")) {
|
if (!strcmp(argv[i], "-V")) {
|
||||||
printf("utf8proc version %s\n", utf8proc_version());
|
printf("utf8proc version %s\n", utf8proc_version());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
|
check(sscanf(argv[i],"%x", &x) == 1, "invalid hex input %s", argv[i]);
|
||||||
|
c = (utf8proc_int32_t)x;
|
||||||
const utf8proc_property_t *p = utf8proc_get_property(c);
|
const utf8proc_property_t *p = utf8proc_get_property(c);
|
||||||
|
|
||||||
if (utf8proc_codepoint_valid(c))
|
if (utf8proc_codepoint_valid(c))
|
||||||
|
|||||||
@ -29,7 +29,8 @@ size_t skipspaces(const unsigned char *buf, size_t i)
|
|||||||
in dest, returning the number of bytes read from buf */
|
in dest, returning the number of bytes read from buf */
|
||||||
size_t encode(unsigned char *dest, const unsigned char *buf)
|
size_t encode(unsigned char *dest, const unsigned char *buf)
|
||||||
{
|
{
|
||||||
size_t i = 0, j, d = 0;
|
size_t i = 0, j;
|
||||||
|
utf8proc_ssize_t d = 0;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
int c;
|
int c;
|
||||||
i = skipspaces(buf, i);
|
i = skipspaces(buf, i);
|
||||||
|
|||||||
14
utf8proc.c
14
utf8proc.c
@ -125,7 +125,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
|||||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
||||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
|
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
|
||||||
) {
|
) {
|
||||||
utf8proc_uint32_t uc;
|
utf8proc_int32_t uc;
|
||||||
const utf8proc_uint8_t *end;
|
const utf8proc_uint8_t *end;
|
||||||
|
|
||||||
*dst = -1;
|
*dst = -1;
|
||||||
@ -137,7 +137,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
// Must be between 0xc2 and 0xf4 inclusive to be valid
|
// Must be between 0xc2 and 0xf4 inclusive to be valid
|
||||||
if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
|
if ((utf8proc_uint32_t)(uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
if (uc < 0xe0) { // 2-byte sequence
|
if (uc < 0xe0) { // 2-byte sequence
|
||||||
// Must have valid continuation character
|
// Must have valid continuation character
|
||||||
if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
|
if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
@ -376,19 +376,19 @@ static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqinde
|
|||||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
|
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
|
||||||
{
|
{
|
||||||
utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
|
utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
|
||||||
return cl != UINT16_MAX ? seqindex_decode_index(cl) : c;
|
return cl != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cl) : c;
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
|
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
|
||||||
{
|
{
|
||||||
utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
|
utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
|
||||||
return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
|
return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
|
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
|
||||||
{
|
{
|
||||||
utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
|
utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
|
||||||
return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
|
return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c)
|
UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c)
|
||||||
@ -420,7 +420,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
|
|||||||
|
|
||||||
#define utf8proc_decompose_lump(replacement_uc) \
|
#define utf8proc_decompose_lump(replacement_uc) \
|
||||||
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
||||||
options & ~UTF8PROC_LUMP, last_boundclass)
|
options & ~(unsigned int)UTF8PROC_LUMP, last_boundclass)
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
||||||
const utf8proc_property_t *property;
|
const utf8proc_property_t *property;
|
||||||
@ -735,7 +735,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
|
|||||||
*dstptr = NULL;
|
*dstptr = NULL;
|
||||||
result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
|
result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
|
||||||
if (result < 0) return result;
|
if (result < 0) return result;
|
||||||
buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
|
buffer = (utf8proc_int32_t *) malloc(((utf8proc_size_t)result) * sizeof(utf8proc_int32_t) + 1);
|
||||||
if (!buffer) return UTF8PROC_ERROR_NOMEM;
|
if (!buffer) return UTF8PROC_ERROR_NOMEM;
|
||||||
result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
|
result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
|
||||||
if (result < 0) {
|
if (result < 0) {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user