From 8239639e3fe1192c8b0c3f45ed7eb5be02853476 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 15 Dec 2020 15:26:56 -0500 Subject: [PATCH] fix NULL args in grapheme_break_stateful --- NEWS.md | 7 +++++++ test/graphemetest.c | 3 +++ utf8proc.c | 18 +++++++++++------- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 0f92be5..6428b8c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # utf8proc release history # +## Version 2.6.1 ## + +2020-12-15 + + - Bugfix in `utf8proc_grapheme_break_stateful` for `NULL` state argument, which + also broke `utf8proc_grapheme_break`. + ## Version 2.6 ## 2020-11-23 diff --git a/test/graphemetest.c b/test/graphemetest.c index 95e7dc0..22880fc 100644 --- a/test/graphemetest.c +++ b/test/graphemetest.c @@ -118,6 +118,9 @@ int main(int argc, char **argv) checkline("/ 1f926 1f3fc 200d 2642 fe0f /", true); /* facepalm + pale skin + zwj + male sign + FE0F */ checkline("/ 1f468 1f3fb 200d 1f91d 200d 1f468 1f3fd /", true); /* man face + pale skin + zwj + hand holding + zwj + man face + dark skin */ + check(utf8proc_grapheme_break(0x03b1, 0x03b2), "failed 03b1 / 03b2 test"); + check(!utf8proc_grapheme_break(0x03b1, 0x0302), "failed 03b1 0302 test"); + printf("Passed regression tests!\n"); return 0; diff --git a/utf8proc.c b/utf8proc.c index 5a9fbf3..1af3456 100644 --- a/utf8proc.c +++ b/utf8proc.c @@ -290,13 +290,14 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) { static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state) { - int lbc_override; - if (*state == UTF8PROC_BOUNDCLASS_START) - *state = lbc_override = lbc; - else - lbc_override = *state; - utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc); if (state) { + int lbc_override; + if (*state == UTF8PROC_BOUNDCLASS_START) + *state = lbc_override = lbc; + else + lbc_override = *state; + utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc); + // Special support for GB 12/13 made possible by GB999. After two RI // class codepoints we want to force a break. Do this by resetting the // second RI's bound class to UTF8PROC_BOUNDCLASS_OTHER, to force a break @@ -315,8 +316,11 @@ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t } else *state = tbc; + + return break_permitted; } - return break_permitted; + else + return grapheme_break_simple(lbc, tbc); } UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(