Prefix other C99 typedefs with utf8proc_

2015-04-06 22:36:33 -07:00
parent ad27722923
commit 0a818c7003
7 changed files with 102 additions and 94 deletions
--- a/data/data_generator.rb
+++ b/data/data_generator.rb
@@ -268,7 +268,7 @@ for code in 0...0x110000
  end
 end

-$stdout << "const int32_t utf8proc_sequences[] = {\n  "
+$stdout << "const utf8proc_int32_t utf8proc_sequences[] = {\n  "
 i = 0
 $int_array.each do |entry|
  i += 1
@@ -280,7 +280,7 @@ $int_array.each do |entry|
 end
 $stdout << "};\n\n"

-$stdout << "const uint16_t utf8proc_stage1table[] = {\n  "
+$stdout << "const utf8proc_uint16_t utf8proc_stage1table[] = {\n  "
 i = 0
 stage1.each do |entry|
  i += 1
@@ -292,7 +292,7 @@ stage1.each do |entry|
 end
 $stdout << "};\n\n"

-$stdout << "const uint16_t utf8proc_stage2table[] = {\n  "
+$stdout << "const utf8proc_uint16_t utf8proc_stage2table[] = {\n  "
 i = 0
 stage2.flatten.each do |entry|
  i += 1
@@ -311,7 +311,7 @@ properties.each { |line|
 }
 $stdout << "};\n\n"

-$stdout << "const int32_t utf8proc_combinations[] = {\n  "
+$stdout << "const utf8proc_int32_t utf8proc_combinations[] = {\n  "
 i = 0
 comb1st_indicies.keys.each_index do |a|
  comb2nd_indicies.keys.each_index do |b|
--- a/test/graphemetest.c
+++ b/test/graphemetest.c
@@ -5,7 +5,7 @@ int main(int argc, char **argv)
    char *buf = NULL;
    size_t bufsize = 0;
    FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
-    uint8_t src[1024];
+    utf8proc_uint8_t src[1024];
    
    check(f != NULL, "error opening GraphemeBreakTest.txt");
    while (getline(&buf, &bufsize, f) > 0) {
@@ -39,10 +39,10 @@ int main(int argc, char **argv)
        src[si] = 0; /* NUL-terminate */
        
        if (si) {
-            uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
+            utf8proc_uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
            size_t i = 0, j = 0;
            utf8proc_ssize_t glen;
-            uint8_t *g; /* utf8proc_map grapheme results */
+            utf8proc_uint8_t *g; /* utf8proc_map grapheme results */
            while (i < si) {
                if (src[i] != '/')
                    utf8[j++] = src[i++];
--- a/test/normtest.c
+++ b/test/normtest.c
@@ -1,7 +1,7 @@
 #include "tests.h"

 #define CHECK_NORM(NRM, norm, src) {                                 \
-    char *src_norm = (char*) utf8proc_ ## NRM((uint8_t*) src);      \
+    char *src_norm = (char*) utf8proc_ ## NRM((utf8proc_uint8_t*) src);      \
    check(!strcmp(norm, src_norm),                                  \
          "normalization failed for %s -> %s", src, norm);          \
    free(src_norm);                                                 \
--- a/test/tests.h
+++ b/test/tests.h
@@ -47,7 +47,7 @@ size_t encode(char *dest, const char *buf)
          }
          check(sscanf(buf + i, "%x", &c) == 1, "invalid hex input %s", buf+i);
          i = j; /* skip to char after hex input */
-          d += utf8proc_encode_char(c, (uint8_t *) (dest + d));
+          d += utf8proc_encode_char(c, (utf8proc_uint8_t *) (dest + d));
     } while (1);
 }

--- a/utf8proc.c
+++ b/utf8proc.c
@@ -44,7 +44,7 @@
 #include "utf8proc_data.c"


-UTF8PROC_DLLEXPORT const int8_t utf8proc_utf8class[256] = {
+UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = {
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -109,11 +109,11 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
 }

 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
-  const uint8_t *str, utf8proc_ssize_t strlen, int32_t *dst
+  const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
 ) {
  int length;
  int i;
-  int32_t uc = -1;
+  utf8proc_int32_t uc = -1;
  *dst = -1;
  if (!strlen) return 0;
  length = utf8proc_utf8class[str[0]];
@@ -148,14 +148,14 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
  return length;
 }

-UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t uc) {
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t uc) {
  if (uc < 0 || uc >= 0x110000 ||
    ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) ||
    (uc >= 0xFDD0 && uc < 0xFDF0)) return false;
  else return true;
 }

-UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) {
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
  if (uc < 0x00) {
    return 0;
  } else if (uc < 0x80) {
@@ -186,7 +186,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(int32_t uc, uint8_t *ds
 }

 /* internal "unsafe" version that does not check whether uc is in range */
-static const utf8proc_property_t *get_property(int32_t uc) {
+static const utf8proc_property_t *get_property(utf8proc_int32_t uc) {
  /* ASSERT: uc >= 0 && uc < 0x110000 */
  return utf8proc_properties + (
    utf8proc_stage2table[
@@ -195,12 +195,12 @@ static const utf8proc_property_t *get_property(int32_t uc) {
  );
 }

-UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc) {
+UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t uc) {
  return uc < 0 || uc >= 0x110000 ? utf8proc_properties : get_property(uc);
 }

 /* return whether there is a grapheme break between boundclasses lbc and tbc */
-static bool grapheme_break(int lbc, int tbc) {
+static utf8proc_bool grapheme_break(int lbc, int tbc) {
  return 
    (lbc == UTF8PROC_BOUNDCLASS_START) ? true :
    (lbc == UTF8PROC_BOUNDCLASS_CR &&
@@ -226,22 +226,22 @@ static bool grapheme_break(int lbc, int tbc) {
 }

 /* return whether there is a grapheme break between codepoints c1 and c2 */
-UTF8PROC_DLLEXPORT bool utf8proc_grapheme_break(int32_t c1, int32_t c2) {
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t c1, utf8proc_int32_t c2) {
  return grapheme_break(utf8proc_get_property(c1)->boundclass,
                        utf8proc_get_property(c2)->boundclass);
 }

 /* return a character width analogous to wcwidth (except portable and
   hopefully less buggy than most system wcwidth functions). */
-UTF8PROC_DLLEXPORT int utf8proc_charwidth(int32_t c) {
+UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
  return utf8proc_get_property(c)->charwidth;
 }

-UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(int32_t c) {
+UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
  return utf8proc_get_property(c)->category;
 }

-UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t c) {
+UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
  static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
  return s[utf8proc_category(c)];
 }
@@ -250,17 +250,17 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t c) {
  return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
  options & ~UTF8PROC_LUMP, last_boundclass)

-UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
  const utf8proc_property_t *property;
  utf8proc_propval_t category;
-  int32_t hangul_sindex;
+  utf8proc_int32_t hangul_sindex;
  if (uc < 0 || uc >= 0x110000) return UTF8PROC_ERROR_NOTASSIGNED;
  property = get_property(uc);
  category = property->category;
  hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
  if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
    if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
-      int32_t hangul_tindex;
+      utf8proc_int32_t hangul_tindex;
      if (bufsize >= 1) {
        dst[0] = UTF8PROC_HANGUL_LBASE +
          hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
@@ -312,7 +312,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(int32_t uc, int32_t
  }
  if (options & UTF8PROC_CASEFOLD) {
    if (property->casefold_mapping) {
-      const int32_t *casefold_entry;
+      const utf8proc_int32_t *casefold_entry;
      utf8proc_ssize_t written = 0;
      for (casefold_entry = property->casefold_mapping;
          *casefold_entry >= 0; casefold_entry++) {
@@ -327,7 +327,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(int32_t uc, int32_t
  if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
    if (property->decomp_mapping &&
        (!property->decomp_type || (options & UTF8PROC_COMPAT))) {
-      const int32_t *decomp_entry;
+      const utf8proc_int32_t *decomp_entry;
      utf8proc_ssize_t written = 0;
      for (decomp_entry = property->decomp_mapping;
          *decomp_entry >= 0; decomp_entry++) {
@@ -340,7 +340,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(int32_t uc, int32_t
    }
  }
  if (options & UTF8PROC_CHARBOUND) {
-    bool boundary;
+    utf8proc_bool boundary;
    int tbc = property->boundclass;
    boundary = grapheme_break(*last_boundclass, tbc);
    *last_boundclass = tbc;
@@ -355,8 +355,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(int32_t uc, int32_t
 }

 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
-  const uint8_t *str, utf8proc_ssize_t strlen,
-  int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
+  const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
+  utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
 ) {
  /* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
  utf8proc_ssize_t wpos = 0;
@@ -366,7 +366,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
      !(options & UTF8PROC_COMPOSE) && !(options & UTF8PROC_DECOMPOSE))
    return UTF8PROC_ERROR_INVALIDOPTS;
  {
-    int32_t uc;
+    utf8proc_int32_t uc;
    utf8proc_ssize_t rpos = 0;
    utf8proc_ssize_t decomp_result;
    int boundclass = UTF8PROC_BOUNDCLASS_START;
@@ -390,14 +390,14 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
      if (decomp_result < 0) return decomp_result;
      wpos += decomp_result;
      /* prohibiting integer overflows due to too long strings: */
-      if (wpos < 0 || wpos > SSIZE_MAX/sizeof(int32_t)/2)
+      if (wpos < 0 || wpos > SSIZE_MAX/sizeof(utf8proc_int32_t)/2)
        return UTF8PROC_ERROR_OVERFLOW;
    }
  }
  if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) {
    utf8proc_ssize_t pos = 0;
    while (pos < wpos-1) {
-      int32_t uc1, uc2;
+      utf8proc_int32_t uc1, uc2;
      const utf8proc_property_t *property1, *property2;
      uc1 = buffer[pos];
      uc2 = buffer[pos+1];
@@ -416,13 +416,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
  return wpos;
 }

-UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
  /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
     ASSERT: 'buffer' has one spare byte of free space at the end! */
  if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
    utf8proc_ssize_t rpos;
    utf8proc_ssize_t wpos = 0;
-    int32_t uc;
+    utf8proc_int32_t uc;
    for (rpos = 0; rpos < length; rpos++) {
      uc = buffer[rpos];
      if (uc == 0x000D && rpos < length-1 && buffer[rpos+1] == 0x000A) rpos++;
@@ -451,23 +451,23 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(int32_t *buffer, utf8proc_
    length = wpos;
  }
  if (options & UTF8PROC_COMPOSE) {
-    int32_t *starter = NULL;
-    int32_t current_char;
+    utf8proc_int32_t *starter = NULL;
+    utf8proc_int32_t current_char;
    const utf8proc_property_t *starter_property = NULL, *current_property;
    utf8proc_propval_t max_combining_class = -1;
    utf8proc_ssize_t rpos;
    utf8proc_ssize_t wpos = 0;
-    int32_t composition;
+    utf8proc_int32_t composition;
    for (rpos = 0; rpos < length; rpos++) {
      current_char = buffer[rpos];
      current_property = get_property(current_char);
      if (starter && current_property->combining_class > max_combining_class) {
        /* combination perhaps possible */
-        int32_t hangul_lindex;
-        int32_t hangul_sindex;
+        utf8proc_int32_t hangul_lindex;
+        utf8proc_int32_t hangul_sindex;
        hangul_lindex = *starter - UTF8PROC_HANGUL_LBASE;
        if (hangul_lindex >= 0 && hangul_lindex < UTF8PROC_HANGUL_LCOUNT) {
-          int32_t hangul_vindex;
+          utf8proc_int32_t hangul_vindex;
          hangul_vindex = current_char - UTF8PROC_HANGUL_VBASE;
          if (hangul_vindex >= 0 && hangul_vindex < UTF8PROC_HANGUL_VCOUNT) {
            *starter = UTF8PROC_HANGUL_SBASE +
@@ -480,7 +480,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(int32_t *buffer, utf8proc_
        hangul_sindex = *starter - UTF8PROC_HANGUL_SBASE;
        if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT &&
            (hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) {
-          int32_t hangul_tindex;
+          utf8proc_int32_t hangul_tindex;
          hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE;
          if (hangul_tindex >= 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
            *starter += hangul_tindex;
@@ -521,25 +521,25 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(int32_t *buffer, utf8proc_
  }
  {
    utf8proc_ssize_t rpos, wpos = 0;
-    int32_t uc;
+    utf8proc_int32_t uc;
    for (rpos = 0; rpos < length; rpos++) {
      uc = buffer[rpos];
-      wpos += utf8proc_encode_char(uc, ((uint8_t *)buffer) + wpos);
+      wpos += utf8proc_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
    }
-    ((uint8_t *)buffer)[wpos] = 0;
+    ((utf8proc_uint8_t *)buffer)[wpos] = 0;
    return wpos;
  }
 }

 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
-  const uint8_t *str, utf8proc_ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options
+  const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
 ) {
-  int32_t *buffer;
+  utf8proc_int32_t *buffer;
  utf8proc_ssize_t result;
  *dstptr = NULL;
  result = utf8proc_decompose(str, strlen, NULL, 0, options);
  if (result < 0) return result;
-  buffer = (int32_t *) malloc(result * sizeof(int32_t) + 1);
+  buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
  if (!buffer) return UTF8PROC_ERROR_NOMEM;
  result = utf8proc_decompose(str, strlen, buffer, result, options);
  if (result < 0) {
@@ -552,37 +552,37 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
    return result;
  }
  {
-    int32_t *newptr;
-    newptr = (int32_t *) realloc(buffer, (size_t)result+1);
+    utf8proc_int32_t *newptr;
+    newptr = (utf8proc_int32_t *) realloc(buffer, (size_t)result+1);
    if (newptr) buffer = newptr;
  }
-  *dstptr = (uint8_t *)buffer;
+  *dstptr = (utf8proc_uint8_t *)buffer;
  return result;
 }

-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFD(const uint8_t *str) {
-  uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) {
+  utf8proc_uint8_t *retval;
  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
    UTF8PROC_DECOMPOSE);
  return retval;
 }

-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFC(const uint8_t *str) {
-  uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) {
+  utf8proc_uint8_t *retval;
  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
    UTF8PROC_COMPOSE);
  return retval;
 }

-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKD(const uint8_t *str) {
-  uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) {
+  utf8proc_uint8_t *retval;
  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
    UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
  return retval;
 }

-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKC(const uint8_t *str) {
-  uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) {
+  utf8proc_uint8_t *retval;
  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
    UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
  return retval;
--- a/utf8proc.h
+++ b/utf8proc.h
@@ -77,24 +77,32 @@
 #include <stdlib.h>
 #include <sys/types.h>
 #ifdef _MSC_VER
-typedef signed char int8_t;
-typedef unsigned char uint8_t;
-typedef short int16_t;
-typedef unsigned short uint16_t;
-typedef int int32_t;
+typedef signed char utf8proc_int8_t;
+typedef unsigned char utf8proc_uint8_t;
+typedef short utf8proc_int16_t;
+typedef unsigned short utf8proc_uint16_t;
+typedef int utf8proc_int32_t;
 #  ifdef _WIN64
 typedef __int64 utf8proc_ssize_t;
 #  else
 typedef int utf8proc_ssize_t;
 #  endif
 #  ifndef __cplusplus
-typedef unsigned char bool;
+typedef unsigned char utf8proc_bool;
 enum {false, true};
+#  else
+typedef bool utf8proc_bool;
 #  endif
 #else
 #  include <stdbool.h>
 #  include <inttypes.h>
+typedef int8_t utf8proc_int8_t;
+typedef uint8_t utf8proc_uint8_t;
+typedef int16_t utf8proc_int16_t;
+typedef uint16_t utf8proc_uint16_t;
+typedef int32_t utf8proc_int32_t;
 typedef ssize_t utf8proc_ssize_t;
+typedef bool utf8proc_bool;
 #endif
 #include <limits.h>

@@ -204,7 +212,7 @@ typedef enum {
 /* @name Types */

 /** Holds the value of a property. */
-typedef int16_t utf8proc_propval_t;
+typedef utf8proc_int16_t utf8proc_propval_t;

 /** Struct containing information about a codepoint. */
 typedef struct utf8proc_property_struct {
@@ -224,13 +232,13 @@ typedef struct utf8proc_property_struct {
   * @see utf8proc_decomp_type_t.
   */
  utf8proc_propval_t decomp_type;
-  const int32_t *decomp_mapping;
-  const int32_t *casefold_mapping;
-  int32_t uppercase_mapping;
-  int32_t lowercase_mapping;
-  int32_t titlecase_mapping;
-  int32_t comb1st_index;
-  int32_t comb2nd_index;
+  const utf8proc_int32_t *decomp_mapping;
+  const utf8proc_int32_t *casefold_mapping;
+  utf8proc_int32_t uppercase_mapping;
+  utf8proc_int32_t lowercase_mapping;
+  utf8proc_int32_t titlecase_mapping;
+  utf8proc_int32_t comb1st_index;
+  utf8proc_int32_t comb2nd_index;
  unsigned bidi_mirrored:1;
  unsigned comp_exclusion:1;
  /**
@@ -352,7 +360,7 @@ typedef enum {
 * Array containing the byte lengths of a UTF-8 encoded codepoint based
 * on the first byte.
 */
-UTF8PROC_DLLEXPORT extern const int8_t utf8proc_utf8class[256];
+UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256];

 /**
 * Returns the utf8proc API version as a string MAJOR.MINOR.PATCH
@@ -377,7 +385,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode);
 * In case of success, the number of bytes read is returned; otherwise, a
 * negative error code is returned.
 */
-UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const uint8_t *str, utf8proc_ssize_t strlen, int32_t *codepoint_ref);
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref);

 /**
 * Check if a codepoint is valid (regardless of whether it has been
@@ -385,7 +393,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const uint8_t *str, utf8pro
 *
 * @return 1 if the given `codepoint` is valid and otherwise return 0.
 */
-UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t codepoint);
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint);

 /**
 * Encodes the codepoint as an UTF-8 string in the byte array pointed
@@ -396,7 +404,7 @@ UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t codepoint);
 *
 * This function does not check whether `codepoint` is valid Unicode.
 */
-UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(int32_t codepoint, uint8_t *dst);
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst);

 /**
 * Look up the properties for a given codepoint.
@@ -410,7 +418,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(int32_t codepoint, uint
 * If the codepoint is unassigned or invalid, a pointer to a special struct is
 * returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN).
 */
-UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t codepoint);
+UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint);

 /** Decompose a codepoint into an array of codepoints.
 *
@@ -440,7 +448,7 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t code
 * undefined data.
 */
 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
-  int32_t codepoint, int32_t *dst, utf8proc_ssize_t bufsize,
+  utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize,
  utf8proc_option_t options, int *last_boundclass
 );

@@ -461,8 +469,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
 * undefined data.
 */
 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
-  const uint8_t *str, utf8proc_ssize_t strlen,
-  int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
+  const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
+  utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
 );

 /**
@@ -490,13 +498,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
 *          entries of the array pointed to by `str` have to be in the
 *          range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
 */
-UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);

 /**
 * Given a pair of consecutive codepoints, return whether a grapheme break is
 * permitted between them (as defined by the extended grapheme clusters in UAX#29).
 */
-UTF8PROC_DLLEXPORT bool utf8proc_grapheme_break(int32_t codepoint1, int32_t codepoint2);
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);

 /**
 * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
@@ -506,19 +514,19 @@ UTF8PROC_DLLEXPORT bool utf8proc_grapheme_break(int32_t codepoint1, int32_t code
 * @note
 * If you want to check for particular types of non-printable characters,
 * (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */
-UTF8PROC_DLLEXPORT int utf8proc_charwidth(int32_t codepoint);
+UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t codepoint);

 /**
 * Return the Unicode category for the codepoint (one of the
 * @ref utf8proc_category_t constants.)
 */
-UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(int32_t codepoint);
+UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint);

 /**
 * Return the two-letter (nul-terminated) Unicode category string for
 * the codepoint (e.g. `"Lu"` or `"Co"`).
 */
-UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t codepoint);
+UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoint);

 /**
 * Maps the given UTF-8 string pointed to by `str` to a new UTF-8
@@ -539,7 +547,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t codepoint);
 * with `malloc`, and should therefore be deallocated with `free`.
 */
 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
-  const uint8_t *str, utf8proc_ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options
+  const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
 );

 /** @name Unicode normalization
@@ -551,13 +559,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
 */
 /** @{ */
 /** NFD normalization (@ref UTF8PROC_DECOMPOSE). */
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFD(const uint8_t *str);
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str);
 /** NFC normalization (@ref UTF8PROC_COMPOSE). */
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFC(const uint8_t *str);
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
 /** NFD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKD(const uint8_t *str);
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
 /** NFD normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKC(const uint8_t *str);
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
 /** @} */

 #ifdef __cplusplus
--- a/utf8proc_data.c
+++ b/utf8proc_data.c
@@ -1,4 +1,4 @@
-const int32_t utf8proc_sequences[] = {
+const utf8proc_int32_t utf8proc_sequences[] = {
  97, -1, 98, -1, 99, -1, 100, 
  -1, 101, -1, 102, -1, 103, -1, 104, 
  -1, 105, -1, 106, -1, 107, -1, 108, 
@@ -1523,7 +1523,7 @@ const int32_t utf8proc_sequences[] = {
  172689, -1, 19798, -1, 40702, -1, 40709, -1, 
  40719, -1, 40726, -1, 173568, -1, };

-const uint16_t utf8proc_stage1table[] = {
+const utf8proc_uint16_t utf8proc_stage1table[] = {
  0, 256, 512, 768, 1024, 1280, 1536, 
  1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584, 
  3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, 
@@ -2070,7 +2070,7 @@ const uint16_t utf8proc_stage1table[] = {
  18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, 
  35584, };

-const uint16_t utf8proc_stage2table[] = {
+const utf8proc_uint16_t utf8proc_stage2table[] = {
  1, 2, 2, 2, 2, 2, 2, 
  2, 2, 3, 4, 3, 5, 6, 2, 
  2, 2, 2, 2, 2, 2, 2, 2, 
@@ -13003,7 +13003,7 @@ const utf8proc_property_t utf8proc_properties[] = {
  {UTF8PROC_CATEGORY_LO, 0, UTF8PROC_BIDI_CLASS_L, 0, utf8proc_sequences + 12179, NULL, -1, -1, -1, -1, -1, false, false, false, false, UTF8PROC_BOUNDCLASS_OTHER, 2},
 };

-const int32_t utf8proc_combinations[] = {
+const utf8proc_int32_t utf8proc_combinations[] = {
  192, 193, 194, 195, 196, 197, -1, 
  256, 258, 260, 550, 461, -1, -1, 512, 
  514, -1, -1, -1, -1, -1, -1, -1,