add toupper/tolower functions (for JuliaLang/julia#11471)
This commit is contained in:
parent
35ec8e32e7
commit
a8fb4b1772
1
.gitignore
vendored
1
.gitignore
vendored
@ -23,3 +23,4 @@ printproperty
|
|||||||
charwidth
|
charwidth
|
||||||
valid
|
valid
|
||||||
iterate
|
iterate
|
||||||
|
case
|
||||||
|
|||||||
6
Makefile
6
Makefile
@ -111,10 +111,14 @@ test/valid: test/valid.c utf8proc.o utf8proc.h test/tests.h
|
|||||||
test/iterate: test/iterate.c utf8proc.o utf8proc.h test/tests.h
|
test/iterate: test/iterate.c utf8proc.o utf8proc.h test/tests.h
|
||||||
$(cc) test/iterate.c utf8proc.o -o $@
|
$(cc) test/iterate.c utf8proc.o -o $@
|
||||||
|
|
||||||
check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/charwidth test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
|
test/case: test/case.c utf8proc.o utf8proc.h test/tests.h
|
||||||
|
$(cc) test/case.c utf8proc.o -o $@
|
||||||
|
|
||||||
|
check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/charwidth test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
|
||||||
$(MAKE) -C bench
|
$(MAKE) -C bench
|
||||||
test/normtest data/NormalizationTest.txt
|
test/normtest data/NormalizationTest.txt
|
||||||
test/graphemetest data/GraphemeBreakTest.txt
|
test/graphemetest data/GraphemeBreakTest.txt
|
||||||
test/charwidth
|
test/charwidth
|
||||||
test/valid
|
test/valid
|
||||||
test/iterate
|
test/iterate
|
||||||
|
test/case
|
||||||
|
|||||||
50
test/case.c
Normal file
50
test/case.c
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#include "tests.h"
|
||||||
|
#include <wctype.h>
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
int error = 0, better = 0;
|
||||||
|
utf8proc_int32_t c;
|
||||||
|
|
||||||
|
(void) argc; /* unused */
|
||||||
|
(void) argv; /* unused */
|
||||||
|
|
||||||
|
/* some simple sanity tests of the character widths */
|
||||||
|
for (c = 0; c <= 0x110000; ++c) {
|
||||||
|
utf8proc_int32_t l = utf8proc_tolower(c);
|
||||||
|
utf8proc_int32_t u = utf8proc_toupper(c);
|
||||||
|
|
||||||
|
check(l == c || utf8proc_codepoint_valid(l), "invalid tolower");
|
||||||
|
check(u == c || utf8proc_codepoint_valid(u), "invalid toupper");
|
||||||
|
|
||||||
|
if (sizeof(wint_t) > 2 || c < (1<<16)) {
|
||||||
|
wint_t l0 = towlower(c), u0 = towupper(c);
|
||||||
|
|
||||||
|
/* OS unicode tables may be out of date. But if they
|
||||||
|
do have a lower/uppercase mapping, hopefully it
|
||||||
|
is correct? */
|
||||||
|
if (l0 != c && l0 != l) {
|
||||||
|
fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
|
||||||
|
l, c, l0);
|
||||||
|
++error;
|
||||||
|
}
|
||||||
|
else if (l0 != l) { /* often true for out-of-date OS unicode */
|
||||||
|
++better;
|
||||||
|
/* printf("%x != towlower(%x) == %x\n", l, c, l0); */
|
||||||
|
}
|
||||||
|
if (u0 != c && u0 != u) {
|
||||||
|
fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
|
||||||
|
u, c, u0);
|
||||||
|
++error;
|
||||||
|
}
|
||||||
|
else if (u0 != u) { /* often true for out-of-date OS unicode */
|
||||||
|
++better;
|
||||||
|
/* printf("%x != towupper(%x) == %x\n", u, c, u0); */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
check(!error, "utf8proc case conversion FAILED %d tests.", error);
|
||||||
|
printf("More up-to-date than OS unicode tables for %d tests.\n", better);
|
||||||
|
printf("utf8proc case conversion tests SUCCEEDED.\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
12
utf8proc.c
12
utf8proc.c
@ -264,6 +264,18 @@ UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t c1, ut
|
|||||||
utf8proc_get_property(c2)->boundclass);
|
utf8proc_get_property(c2)->boundclass);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
|
||||||
|
{
|
||||||
|
utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_mapping;
|
||||||
|
return cl >= 0 ? cl : c;
|
||||||
|
}
|
||||||
|
|
||||||
|
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
|
||||||
|
{
|
||||||
|
utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_mapping;
|
||||||
|
return cu >= 0 ? cu : c;
|
||||||
|
}
|
||||||
|
|
||||||
/* return a character width analogous to wcwidth (except portable and
|
/* return a character width analogous to wcwidth (except portable and
|
||||||
hopefully less buggy than most system wcwidth functions). */
|
hopefully less buggy than most system wcwidth functions). */
|
||||||
UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
|
UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
|
||||||
|
|||||||
15
utf8proc.h
15
utf8proc.h
@ -511,6 +511,21 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
|
|||||||
*/
|
*/
|
||||||
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
|
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a codepoint `c`, return the codepoint of the corresponding
|
||||||
|
* lower-case character, if any; otherwise (if there is no lower-case
|
||||||
|
* variant, or if `c` is not a valid codepoint) return `c`.
|
||||||
|
*/
|
||||||
|
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a codepoint `c`, return the codepoint of the corresponding
|
||||||
|
* upper-case character, if any; otherwise (if there is no upper-case
|
||||||
|
* variant, or if `c` is not a valid codepoint) return `c`.
|
||||||
|
*/
|
||||||
|
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
|
* Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
|
||||||
* except that a width of 0 is returned for non-printable codepoints
|
* except that a width of 0 is returned for non-printable codepoints
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user