remove requirement that get_property and decompose_char argument be in range 0x0 to 0x10ffff
This commit is contained in:
parent
128c04e3d0
commit
3822984606
24
utf8proc.c
24
utf8proc.c
@ -182,7 +182,8 @@ DLLEXPORT ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) {
|
|||||||
} else return 0;
|
} else return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc) {
|
/* internal "unsafe" version that does not check whether uc is in range */
|
||||||
|
static const utf8proc_property_t *get_property(int32_t uc) {
|
||||||
/* ASSERT: uc >= 0 && uc < 0x110000 */
|
/* ASSERT: uc >= 0 && uc < 0x110000 */
|
||||||
return utf8proc_properties + (
|
return utf8proc_properties + (
|
||||||
utf8proc_stage2table[
|
utf8proc_stage2table[
|
||||||
@ -191,6 +192,10 @@ DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc) {
|
||||||
|
return uc < 0 || uc >= 0x110000 ? utf8proc_properties : get_property(uc);
|
||||||
|
}
|
||||||
|
|
||||||
/* return whether there is a grapheme break between boundclasses lbc and tbc */
|
/* return whether there is a grapheme break between boundclasses lbc and tbc */
|
||||||
static bool grapheme_break(int lbc, int tbc) {
|
static bool grapheme_break(int lbc, int tbc) {
|
||||||
return
|
return
|
||||||
@ -242,13 +247,12 @@ DLLEXPORT const char *utf8proc_category_string(int32_t c) {
|
|||||||
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
||||||
options & ~UTF8PROC_LUMP, last_boundclass)
|
options & ~UTF8PROC_LUMP, last_boundclass)
|
||||||
|
|
||||||
DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize,
|
DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize, int options, int *last_boundclass) {
|
||||||
int options, int *last_boundclass) {
|
|
||||||
/* ASSERT: uc >= 0 && uc < 0x110000 */
|
|
||||||
const utf8proc_property_t *property;
|
const utf8proc_property_t *property;
|
||||||
utf8proc_propval_t category;
|
utf8proc_propval_t category;
|
||||||
int32_t hangul_sindex;
|
int32_t hangul_sindex;
|
||||||
property = utf8proc_get_property(uc);
|
if (uc < 0 || uc >= 0x110000) return UTF8PROC_ERROR_NOTASSIGNED;
|
||||||
|
property = get_property(uc);
|
||||||
category = property->category;
|
category = property->category;
|
||||||
hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
|
hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
|
||||||
if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
|
if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
|
||||||
@ -394,8 +398,8 @@ DLLEXPORT ssize_t utf8proc_decompose(
|
|||||||
const utf8proc_property_t *property1, *property2;
|
const utf8proc_property_t *property1, *property2;
|
||||||
uc1 = buffer[pos];
|
uc1 = buffer[pos];
|
||||||
uc2 = buffer[pos+1];
|
uc2 = buffer[pos+1];
|
||||||
property1 = utf8proc_get_property(uc1);
|
property1 = get_property(uc1);
|
||||||
property2 = utf8proc_get_property(uc2);
|
property2 = get_property(uc2);
|
||||||
if (property1->combining_class > property2->combining_class &&
|
if (property1->combining_class > property2->combining_class &&
|
||||||
property2->combining_class > 0) {
|
property2->combining_class > 0) {
|
||||||
buffer[pos] = uc2;
|
buffer[pos] = uc2;
|
||||||
@ -453,7 +457,7 @@ DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options
|
|||||||
int32_t composition;
|
int32_t composition;
|
||||||
for (rpos = 0; rpos < length; rpos++) {
|
for (rpos = 0; rpos < length; rpos++) {
|
||||||
current_char = buffer[rpos];
|
current_char = buffer[rpos];
|
||||||
current_property = utf8proc_get_property(current_char);
|
current_property = get_property(current_char);
|
||||||
if (starter && current_property->combining_class > max_combining_class) {
|
if (starter && current_property->combining_class > max_combining_class) {
|
||||||
/* combination perhaps possible */
|
/* combination perhaps possible */
|
||||||
int32_t hangul_lindex;
|
int32_t hangul_lindex;
|
||||||
@ -482,7 +486,7 @@ DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!starter_property) {
|
if (!starter_property) {
|
||||||
starter_property = utf8proc_get_property(*starter);
|
starter_property = get_property(*starter);
|
||||||
}
|
}
|
||||||
if (starter_property->comb1st_index >= 0 &&
|
if (starter_property->comb1st_index >= 0 &&
|
||||||
current_property->comb2nd_index >= 0) {
|
current_property->comb2nd_index >= 0) {
|
||||||
@ -491,7 +495,7 @@ DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options
|
|||||||
current_property->comb2nd_index
|
current_property->comb2nd_index
|
||||||
];
|
];
|
||||||
if (composition >= 0 && (!(options & UTF8PROC_STABLE) ||
|
if (composition >= 0 && (!(options & UTF8PROC_STABLE) ||
|
||||||
!(utf8proc_get_property(composition)->comp_exclusion))) {
|
!(get_property(composition)->comp_exclusion))) {
|
||||||
*starter = composition;
|
*starter = composition;
|
||||||
starter_property = NULL;
|
starter_property = NULL;
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
@ -310,8 +310,6 @@ DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc);
|
|||||||
* the unicode char with the given code point 'uc'.
|
* the unicode char with the given code point 'uc'.
|
||||||
* If the character is not existent a pointer to a special struct is
|
* If the character is not existent a pointer to a special struct is
|
||||||
* returned, where 'category' is 0 (UTF8PROC_CATEGORY_CN).
|
* returned, where 'category' is 0 (UTF8PROC_CATEGORY_CN).
|
||||||
* WARNING: The parameter 'uc' has to be in the range of 0x0000 to
|
|
||||||
* 0x10FFFF, otherwise the program might crash!
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
DLLEXPORT ssize_t utf8proc_decompose_char(
|
DLLEXPORT ssize_t utf8proc_decompose_char(
|
||||||
@ -338,8 +336,6 @@ DLLEXPORT ssize_t utf8proc_decompose_char(
|
|||||||
* If the number of written chars would be bigger than 'bufsize',
|
* If the number of written chars would be bigger than 'bufsize',
|
||||||
* the buffer (up to 'bufsize') has inpredictable data, and the needed
|
* the buffer (up to 'bufsize') has inpredictable data, and the needed
|
||||||
* buffer size is returned.
|
* buffer size is returned.
|
||||||
* WARNING: The parameter 'uc' has to be in the range of 0x0000 to
|
|
||||||
* 0x10FFFF, otherwise the program might crash!
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
DLLEXPORT ssize_t utf8proc_decompose(
|
DLLEXPORT ssize_t utf8proc_decompose(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user