Use a new typedef utf8proc_ssize_t to avoid define collisions
with MSVC
This commit is contained in:
parent
498ecbddd8
commit
ad27722923
@ -41,7 +41,7 @@ int main(int argc, char **argv)
|
|||||||
if (si) {
|
if (si) {
|
||||||
uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
|
uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
|
||||||
size_t i = 0, j = 0;
|
size_t i = 0, j = 0;
|
||||||
ssize_t glen;
|
utf8proc_ssize_t glen;
|
||||||
uint8_t *g; /* utf8proc_map grapheme results */
|
uint8_t *g; /* utf8proc_map grapheme results */
|
||||||
while (i < si) {
|
while (i < si) {
|
||||||
if (src[i] != '/')
|
if (src[i] != '/')
|
||||||
|
|||||||
46
utf8proc.c
46
utf8proc.c
@ -91,7 +91,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
|
|||||||
return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
|
return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode) {
|
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
||||||
switch (errcode) {
|
switch (errcode) {
|
||||||
case UTF8PROC_ERROR_NOMEM:
|
case UTF8PROC_ERROR_NOMEM:
|
||||||
return "Memory for processing UTF-8 data could not be allocated.";
|
return "Memory for processing UTF-8 data could not be allocated.";
|
||||||
@ -108,8 +108,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_iterate(
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
||||||
const uint8_t *str, ssize_t strlen, int32_t *dst
|
const uint8_t *str, utf8proc_ssize_t strlen, int32_t *dst
|
||||||
) {
|
) {
|
||||||
int length;
|
int length;
|
||||||
int i;
|
int i;
|
||||||
@ -155,7 +155,7 @@ UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t uc) {
|
|||||||
else return true;
|
else return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) {
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) {
|
||||||
if (uc < 0x00) {
|
if (uc < 0x00) {
|
||||||
return 0;
|
return 0;
|
||||||
} else if (uc < 0x80) {
|
} else if (uc < 0x80) {
|
||||||
@ -250,7 +250,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t c) {
|
|||||||
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
||||||
options & ~UTF8PROC_LUMP, last_boundclass)
|
options & ~UTF8PROC_LUMP, last_boundclass)
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
||||||
const utf8proc_property_t *property;
|
const utf8proc_property_t *property;
|
||||||
utf8proc_propval_t category;
|
utf8proc_propval_t category;
|
||||||
int32_t hangul_sindex;
|
int32_t hangul_sindex;
|
||||||
@ -313,7 +313,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssi
|
|||||||
if (options & UTF8PROC_CASEFOLD) {
|
if (options & UTF8PROC_CASEFOLD) {
|
||||||
if (property->casefold_mapping) {
|
if (property->casefold_mapping) {
|
||||||
const int32_t *casefold_entry;
|
const int32_t *casefold_entry;
|
||||||
ssize_t written = 0;
|
utf8proc_ssize_t written = 0;
|
||||||
for (casefold_entry = property->casefold_mapping;
|
for (casefold_entry = property->casefold_mapping;
|
||||||
*casefold_entry >= 0; casefold_entry++) {
|
*casefold_entry >= 0; casefold_entry++) {
|
||||||
written += utf8proc_decompose_char(*casefold_entry, dst+written,
|
written += utf8proc_decompose_char(*casefold_entry, dst+written,
|
||||||
@ -328,7 +328,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssi
|
|||||||
if (property->decomp_mapping &&
|
if (property->decomp_mapping &&
|
||||||
(!property->decomp_type || (options & UTF8PROC_COMPAT))) {
|
(!property->decomp_type || (options & UTF8PROC_COMPAT))) {
|
||||||
const int32_t *decomp_entry;
|
const int32_t *decomp_entry;
|
||||||
ssize_t written = 0;
|
utf8proc_ssize_t written = 0;
|
||||||
for (decomp_entry = property->decomp_mapping;
|
for (decomp_entry = property->decomp_mapping;
|
||||||
*decomp_entry >= 0; decomp_entry++) {
|
*decomp_entry >= 0; decomp_entry++) {
|
||||||
written += utf8proc_decompose_char(*decomp_entry, dst+written,
|
written += utf8proc_decompose_char(*decomp_entry, dst+written,
|
||||||
@ -354,12 +354,12 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssi
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
|
||||||
const uint8_t *str, ssize_t strlen,
|
const uint8_t *str, utf8proc_ssize_t strlen,
|
||||||
int32_t *buffer, ssize_t bufsize, utf8proc_option_t options
|
int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
|
||||||
) {
|
) {
|
||||||
/* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
|
/* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
|
||||||
ssize_t wpos = 0;
|
utf8proc_ssize_t wpos = 0;
|
||||||
if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE))
|
if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE))
|
||||||
return UTF8PROC_ERROR_INVALIDOPTS;
|
return UTF8PROC_ERROR_INVALIDOPTS;
|
||||||
if ((options & UTF8PROC_STRIPMARK) &&
|
if ((options & UTF8PROC_STRIPMARK) &&
|
||||||
@ -367,8 +367,8 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
|
|||||||
return UTF8PROC_ERROR_INVALIDOPTS;
|
return UTF8PROC_ERROR_INVALIDOPTS;
|
||||||
{
|
{
|
||||||
int32_t uc;
|
int32_t uc;
|
||||||
ssize_t rpos = 0;
|
utf8proc_ssize_t rpos = 0;
|
||||||
ssize_t decomp_result;
|
utf8proc_ssize_t decomp_result;
|
||||||
int boundclass = UTF8PROC_BOUNDCLASS_START;
|
int boundclass = UTF8PROC_BOUNDCLASS_START;
|
||||||
while (1) {
|
while (1) {
|
||||||
if (options & UTF8PROC_NULLTERM) {
|
if (options & UTF8PROC_NULLTERM) {
|
||||||
@ -395,7 +395,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) {
|
if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) {
|
||||||
ssize_t pos = 0;
|
utf8proc_ssize_t pos = 0;
|
||||||
while (pos < wpos-1) {
|
while (pos < wpos-1) {
|
||||||
int32_t uc1, uc2;
|
int32_t uc1, uc2;
|
||||||
const utf8proc_property_t *property1, *property2;
|
const utf8proc_property_t *property1, *property2;
|
||||||
@ -416,12 +416,12 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
|
|||||||
return wpos;
|
return wpos;
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_option_t options) {
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
|
||||||
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
|
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
|
||||||
ASSERT: 'buffer' has one spare byte of free space at the end! */
|
ASSERT: 'buffer' has one spare byte of free space at the end! */
|
||||||
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
|
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
|
||||||
ssize_t rpos;
|
utf8proc_ssize_t rpos;
|
||||||
ssize_t wpos = 0;
|
utf8proc_ssize_t wpos = 0;
|
||||||
int32_t uc;
|
int32_t uc;
|
||||||
for (rpos = 0; rpos < length; rpos++) {
|
for (rpos = 0; rpos < length; rpos++) {
|
||||||
uc = buffer[rpos];
|
uc = buffer[rpos];
|
||||||
@ -455,8 +455,8 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, ut
|
|||||||
int32_t current_char;
|
int32_t current_char;
|
||||||
const utf8proc_property_t *starter_property = NULL, *current_property;
|
const utf8proc_property_t *starter_property = NULL, *current_property;
|
||||||
utf8proc_propval_t max_combining_class = -1;
|
utf8proc_propval_t max_combining_class = -1;
|
||||||
ssize_t rpos;
|
utf8proc_ssize_t rpos;
|
||||||
ssize_t wpos = 0;
|
utf8proc_ssize_t wpos = 0;
|
||||||
int32_t composition;
|
int32_t composition;
|
||||||
for (rpos = 0; rpos < length; rpos++) {
|
for (rpos = 0; rpos < length; rpos++) {
|
||||||
current_char = buffer[rpos];
|
current_char = buffer[rpos];
|
||||||
@ -520,7 +520,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, ut
|
|||||||
length = wpos;
|
length = wpos;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
ssize_t rpos, wpos = 0;
|
utf8proc_ssize_t rpos, wpos = 0;
|
||||||
int32_t uc;
|
int32_t uc;
|
||||||
for (rpos = 0; rpos < length; rpos++) {
|
for (rpos = 0; rpos < length; rpos++) {
|
||||||
uc = buffer[rpos];
|
uc = buffer[rpos];
|
||||||
@ -531,11 +531,11 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, ut
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_map(
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
|
||||||
const uint8_t *str, ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options
|
const uint8_t *str, utf8proc_ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options
|
||||||
) {
|
) {
|
||||||
int32_t *buffer;
|
int32_t *buffer;
|
||||||
ssize_t result;
|
utf8proc_ssize_t result;
|
||||||
*dstptr = NULL;
|
*dstptr = NULL;
|
||||||
result = utf8proc_decompose(str, strlen, NULL, 0, options);
|
result = utf8proc_decompose(str, strlen, NULL, 0, options);
|
||||||
if (result < 0) return result;
|
if (result < 0) return result;
|
||||||
|
|||||||
27
utf8proc.h
27
utf8proc.h
@ -83,9 +83,9 @@ typedef short int16_t;
|
|||||||
typedef unsigned short uint16_t;
|
typedef unsigned short uint16_t;
|
||||||
typedef int int32_t;
|
typedef int int32_t;
|
||||||
# ifdef _WIN64
|
# ifdef _WIN64
|
||||||
# define ssize_t __int64
|
typedef __int64 utf8proc_ssize_t;
|
||||||
# else
|
# else
|
||||||
# define ssize_t int
|
typedef int utf8proc_ssize_t;
|
||||||
# endif
|
# endif
|
||||||
# ifndef __cplusplus
|
# ifndef __cplusplus
|
||||||
typedef unsigned char bool;
|
typedef unsigned char bool;
|
||||||
@ -94,6 +94,7 @@ enum {false, true};
|
|||||||
#else
|
#else
|
||||||
# include <stdbool.h>
|
# include <stdbool.h>
|
||||||
# include <inttypes.h>
|
# include <inttypes.h>
|
||||||
|
typedef ssize_t utf8proc_ssize_t;
|
||||||
#endif
|
#endif
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
|
||||||
@ -364,7 +365,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void);
|
|||||||
* Returns an informative error string for the given utf8proc error code
|
* Returns an informative error string for the given utf8proc error code
|
||||||
* (e.g. the error codes returned by @ref utf8proc_map).
|
* (e.g. the error codes returned by @ref utf8proc_map).
|
||||||
*/
|
*/
|
||||||
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode);
|
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
|
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
|
||||||
@ -376,7 +377,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode);
|
|||||||
* In case of success, the number of bytes read is returned; otherwise, a
|
* In case of success, the number of bytes read is returned; otherwise, a
|
||||||
* negative error code is returned.
|
* negative error code is returned.
|
||||||
*/
|
*/
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t *codepoint_ref);
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const uint8_t *str, utf8proc_ssize_t strlen, int32_t *codepoint_ref);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if a codepoint is valid (regardless of whether it has been
|
* Check if a codepoint is valid (regardless of whether it has been
|
||||||
@ -395,7 +396,7 @@ UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t codepoint);
|
|||||||
*
|
*
|
||||||
* This function does not check whether `codepoint` is valid Unicode.
|
* This function does not check whether `codepoint` is valid Unicode.
|
||||||
*/
|
*/
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_encode_char(int32_t codepoint, uint8_t *dst);
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(int32_t codepoint, uint8_t *dst);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Look up the properties for a given codepoint.
|
* Look up the properties for a given codepoint.
|
||||||
@ -438,8 +439,8 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t code
|
|||||||
* required buffer size is returned, while the buffer will be overwritten with
|
* required buffer size is returned, while the buffer will be overwritten with
|
||||||
* undefined data.
|
* undefined data.
|
||||||
*/
|
*/
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
|
||||||
int32_t codepoint, int32_t *dst, ssize_t bufsize,
|
int32_t codepoint, int32_t *dst, utf8proc_ssize_t bufsize,
|
||||||
utf8proc_option_t options, int *last_boundclass
|
utf8proc_option_t options, int *last_boundclass
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -459,9 +460,9 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(
|
|||||||
* required buffer size is returned, while the buffer will be overwritten with
|
* required buffer size is returned, while the buffer will be overwritten with
|
||||||
* undefined data.
|
* undefined data.
|
||||||
*/
|
*/
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
|
||||||
const uint8_t *str, ssize_t strlen,
|
const uint8_t *str, utf8proc_ssize_t strlen,
|
||||||
int32_t *buffer, ssize_t bufsize, utf8proc_option_t options
|
int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
|
||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -489,7 +490,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
|
|||||||
* entries of the array pointed to by `str` have to be in the
|
* entries of the array pointed to by `str` have to be in the
|
||||||
* range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
|
* range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
|
||||||
*/
|
*/
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_option_t options);
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a pair of consecutive codepoints, return whether a grapheme break is
|
* Given a pair of consecutive codepoints, return whether a grapheme break is
|
||||||
@ -537,8 +538,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t codepoint);
|
|||||||
* @note The memory of the new UTF-8 string will have been allocated
|
* @note The memory of the new UTF-8 string will have been allocated
|
||||||
* with `malloc`, and should therefore be deallocated with `free`.
|
* with `malloc`, and should therefore be deallocated with `free`.
|
||||||
*/
|
*/
|
||||||
UTF8PROC_DLLEXPORT ssize_t utf8proc_map(
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
|
||||||
const uint8_t *str, ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options
|
const uint8_t *str, utf8proc_ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options
|
||||||
);
|
);
|
||||||
|
|
||||||
/** @name Unicode normalization
|
/** @name Unicode normalization
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user