directory cleanup: move tests and data into subdirectories
This commit is contained in:
72
test/graphemetest.c
Normal file
72
test/graphemetest.c
Normal file
@@ -0,0 +1,72 @@
|
||||
#include "tests.h"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char *buf = NULL;
|
||||
size_t bufsize = 0;
|
||||
FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
|
||||
uint8_t src[1024];
|
||||
|
||||
check(f != NULL, "error opening GraphemeBreakTest.txt");
|
||||
while (getline(&buf, &bufsize, f) > 0) {
|
||||
size_t bi = 0, si = 0;
|
||||
lineno += 1;
|
||||
|
||||
if (lineno % 100 == 0)
|
||||
printf("checking line %zd...\n", lineno);
|
||||
|
||||
if (buf[0] == '#') continue;
|
||||
|
||||
while (buf[bi]) {
|
||||
bi = skipspaces(buf, bi);
|
||||
if (buf[bi] == '/') { /* grapheme break */
|
||||
src[si++] = '/';
|
||||
bi++;
|
||||
}
|
||||
else if (buf[bi] == '+') { /* no break */
|
||||
bi++;
|
||||
}
|
||||
else if (buf[bi] == '#') { /* start of comments */
|
||||
break;
|
||||
}
|
||||
else { /* hex-encoded codepoint */
|
||||
bi += encode((char*) (src + si), buf + bi) - 1;
|
||||
while (src[si]) ++si; /* advance to NUL termination */
|
||||
}
|
||||
}
|
||||
if (si && src[si-1] == '/')
|
||||
--si; /* no break after final grapheme */
|
||||
src[si] = 0; /* NUL-terminate */
|
||||
|
||||
if (si) {
|
||||
uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
|
||||
size_t i = 0, j = 0;
|
||||
ssize_t glen;
|
||||
uint8_t *g; /* utf8proc_map grapheme results */
|
||||
while (i < si) {
|
||||
if (src[i] != '/')
|
||||
utf8[j++] = src[i++];
|
||||
else
|
||||
i++;
|
||||
}
|
||||
glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
|
||||
if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
|
||||
/* the test file contains surrogate codepoints, which are only for UTF-16 */
|
||||
printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
|
||||
}
|
||||
else {
|
||||
check(glen >= 0, "utf8proc_map error = %s",
|
||||
utf8proc_errmsg(glen));
|
||||
for (i = 0; i <= glen; ++i)
|
||||
if (g[i] == 0xff)
|
||||
g[i] = '/'; /* easier-to-read output (/ is not in test strings) */
|
||||
check(!strcmp((char*)g, (char*)src),
|
||||
"grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src);
|
||||
}
|
||||
free(g);
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
printf("Passed tests after %zd lines!\n", lineno);
|
||||
return 0;
|
||||
}
|
||||
64
test/normtest.c
Normal file
64
test/normtest.c
Normal file
@@ -0,0 +1,64 @@
|
||||
#include "tests.h"
|
||||
|
||||
#define CHECK_NORM(NRM, norm, src) { \
|
||||
char *src_norm = (char*) utf8proc_ ## NRM((uint8_t*) src); \
|
||||
check(!strcmp(norm, src_norm), \
|
||||
"normalization failed for %s -> %s", src, norm); \
|
||||
free(src_norm); \
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char *buf = NULL;
|
||||
size_t bufsize = 0;
|
||||
FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
|
||||
char source[1024], NFC[1024], NFD[1024], NFKC[1024], NFKD[1024];
|
||||
|
||||
check(f != NULL, "error opening NormalizationTest.txt");
|
||||
while (getline(&buf, &bufsize, f) > 0) {
|
||||
size_t offset;
|
||||
lineno += 1;
|
||||
|
||||
if (buf[0] == '@') {
|
||||
printf("line %zd: %s", lineno, buf + 1);
|
||||
continue;
|
||||
}
|
||||
else if (lineno % 1000 == 0)
|
||||
printf("checking line %zd...\n", lineno);
|
||||
|
||||
if (buf[0] == '#') continue;
|
||||
|
||||
offset = encode(source, buf);
|
||||
offset += encode(NFC, buf + offset);
|
||||
offset += encode(NFD, buf + offset);
|
||||
offset += encode(NFKC, buf + offset);
|
||||
offset += encode(NFKD, buf + offset);
|
||||
|
||||
CHECK_NORM(NFC, NFC, source);
|
||||
CHECK_NORM(NFC, NFC, NFC);
|
||||
CHECK_NORM(NFC, NFC, NFD);
|
||||
CHECK_NORM(NFC, NFKC, NFKC);
|
||||
CHECK_NORM(NFC, NFKC, NFKD);
|
||||
|
||||
CHECK_NORM(NFD, NFD, source);
|
||||
CHECK_NORM(NFD, NFD, NFC);
|
||||
CHECK_NORM(NFD, NFD, NFD);
|
||||
CHECK_NORM(NFD, NFKD, NFKC);
|
||||
CHECK_NORM(NFD, NFKD, NFKD);
|
||||
|
||||
CHECK_NORM(NFKC, NFKC, source);
|
||||
CHECK_NORM(NFKC, NFKC, NFC);
|
||||
CHECK_NORM(NFKC, NFKC, NFD);
|
||||
CHECK_NORM(NFKC, NFKC, NFKC);
|
||||
CHECK_NORM(NFKC, NFKC, NFKD);
|
||||
|
||||
CHECK_NORM(NFKD, NFKD, source);
|
||||
CHECK_NORM(NFKD, NFKD, NFC);
|
||||
CHECK_NORM(NFKD, NFKD, NFD);
|
||||
CHECK_NORM(NFKD, NFKD, NFKC);
|
||||
CHECK_NORM(NFKD, NFKD, NFKD);
|
||||
}
|
||||
fclose(f);
|
||||
printf("Passed tests after %zd lines!\n", lineno);
|
||||
return 0;
|
||||
}
|
||||
45
test/printproperty.c
Normal file
45
test/printproperty.c
Normal file
@@ -0,0 +1,45 @@
|
||||
/* simple test program to print out the utf8proc properties for a codepoint */
|
||||
|
||||
#include "tests.h"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 1; i < argc; ++i) {
|
||||
int c;
|
||||
check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
|
||||
const utf8proc_property_t *p = utf8proc_get_property(c);
|
||||
printf("U+%s:\n"
|
||||
" category = %d\n"
|
||||
" combining_class = %d\n"
|
||||
" bidi_class = %d\n"
|
||||
" decomp_type = %d\n"
|
||||
" uppercase_mapping = %x\n"
|
||||
" lowercase_mapping = %x\n"
|
||||
" titlecase_mapping = %x\n"
|
||||
" comb1st_index = %d\n"
|
||||
" comb2nd_index = %d\n"
|
||||
" bidi_mirrored = %d\n"
|
||||
" comp_exclusion = %d\n"
|
||||
" ignorable = %d\n"
|
||||
" control_boundary = %d\n"
|
||||
" boundclass = %d\n",
|
||||
argv[i],
|
||||
p->category,
|
||||
p->combining_class,
|
||||
p->bidi_class,
|
||||
p->decomp_type,
|
||||
p->uppercase_mapping,
|
||||
p->lowercase_mapping,
|
||||
p->titlecase_mapping,
|
||||
p->comb1st_index,
|
||||
p->comb2nd_index,
|
||||
p->bidi_mirrored,
|
||||
p->comp_exclusion,
|
||||
p->ignorable,
|
||||
p->control_boundary,
|
||||
p->boundclass);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
53
test/tests.h
Normal file
53
test/tests.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/* Common functions and includes for our test programs. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "../utf8proc.h"
|
||||
|
||||
size_t lineno = 0;
|
||||
|
||||
void check(int cond, const char *format, ...)
|
||||
{
|
||||
if (!cond) {
|
||||
va_list args;
|
||||
fprintf(stderr, "line %zd: ", lineno);
|
||||
va_start(args, format);
|
||||
vfprintf(stderr, format, args);
|
||||
va_end(args);
|
||||
fprintf(stderr, "\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
size_t skipspaces(const char *buf, size_t i)
|
||||
{
|
||||
while (isspace(buf[i])) ++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
/* if buf points to a sequence of codepoints encoded as hexadecimal strings,
|
||||
separated by whitespace, and terminated by any character not in
|
||||
[0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
|
||||
in dest, returning the number of bytes read from buf */
|
||||
size_t encode(char *dest, const char *buf)
|
||||
{
|
||||
size_t i = 0, j, d = 0;
|
||||
do {
|
||||
int c;
|
||||
i = skipspaces(buf, i);
|
||||
for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j)
|
||||
; /* find end of hex input */
|
||||
if (j == i) { /* no codepoint found */
|
||||
dest[d] = 0; /* NUL-terminate destination string */
|
||||
return i + 1;
|
||||
}
|
||||
check(sscanf(buf + i, "%x", &c) == 1, "invalid hex input %s", buf+i);
|
||||
i = j; /* skip to char after hex input */
|
||||
d += utf8proc_encode_char(c, (uint8_t *) (dest + d));
|
||||
} while (1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user