Merge branch 'master' of https://github.com/JuliaLang/utf8proc
This commit is contained in:
commit
94395db282
@ -70,5 +70,17 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
fclose(f);
|
fclose(f);
|
||||||
printf("Passed tests after %zd lines!\n", lineno);
|
printf("Passed tests after %zd lines!\n", lineno);
|
||||||
|
|
||||||
|
/* issue 144 */
|
||||||
|
{
|
||||||
|
utf8proc_uint8_t input[] = {0xef,0xbf,0xbf,0xef,0xbf,0xbe,0x00}; /* "\uffff\ufffe" */
|
||||||
|
utf8proc_uint8_t output[] = {0xff,0xef,0xbf,0xbf,0xff,0xef,0xbf,0xbe,0x00}; /* with 0xff grapheme markers */
|
||||||
|
utf8proc_ssize_t glen;
|
||||||
|
utf8proc_uint8_t *g;
|
||||||
|
glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
|
||||||
|
check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
|
||||||
|
free(g);
|
||||||
|
};
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
18
utf8proc.c
18
utf8proc.c
@ -196,9 +196,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, ut
|
|||||||
} else return 0;
|
} else return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* internal "unsafe" version that does not check whether uc is in range */
|
/* internal version used for inserting 0xff bytes between graphemes */
|
||||||
static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
|
static utf8proc_ssize_t charbound_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
|
||||||
if (uc < 0x00) {
|
if (uc < 0x00) {
|
||||||
|
if (uc == -1) { /* internal value used for grapheme breaks */
|
||||||
|
dst[0] = (utf8proc_uint8_t)0xFF;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
} else if (uc < 0x80) {
|
} else if (uc < 0x80) {
|
||||||
dst[0] = (utf8proc_uint8_t)uc;
|
dst[0] = (utf8proc_uint8_t)uc;
|
||||||
@ -207,12 +211,6 @@ static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t
|
|||||||
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
|
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
|
||||||
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||||
return 2;
|
return 2;
|
||||||
} else if (uc == 0xFFFF) {
|
|
||||||
dst[0] = (utf8proc_uint8_t)0xFF;
|
|
||||||
return 1;
|
|
||||||
} else if (uc == 0xFFFE) {
|
|
||||||
dst[0] = (utf8proc_uint8_t)0xFE;
|
|
||||||
return 1;
|
|
||||||
} else if (uc < 0x10000) {
|
} else if (uc < 0x10000) {
|
||||||
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
|
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
|
||||||
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
||||||
@ -480,7 +478,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
|
|||||||
int tbc = property->boundclass;
|
int tbc = property->boundclass;
|
||||||
boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
|
boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
|
||||||
if (boundary) {
|
if (boundary) {
|
||||||
if (bufsize >= 1) dst[0] = 0xFFFF;
|
if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
|
||||||
if (bufsize >= 2) dst[1] = uc;
|
if (bufsize >= 2) dst[1] = uc;
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
@ -686,7 +684,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
|
|||||||
if (options & UTF8PROC_CHARBOUND) {
|
if (options & UTF8PROC_CHARBOUND) {
|
||||||
for (rpos = 0; rpos < length; rpos++) {
|
for (rpos = 0; rpos < length; rpos++) {
|
||||||
uc = buffer[rpos];
|
uc = buffer[rpos];
|
||||||
wpos += unsafe_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
|
wpos += charbound_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (rpos = 0; rpos < length; rpos++) {
|
for (rpos = 0; rpos < length; rpos++) {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user