commit
a5c9de2047
3
.gitignore
vendored
3
.gitignore
vendored
@ -9,6 +9,7 @@
|
|||||||
*.dylib
|
*.dylib
|
||||||
*.dSYM
|
*.dSYM
|
||||||
*.out
|
*.out
|
||||||
|
*.new
|
||||||
data/*.txt
|
data/*.txt
|
||||||
data/*.ttf
|
data/*.ttf
|
||||||
data/*.sfd
|
data/*.sfd
|
||||||
@ -18,9 +19,9 @@ bench/icu
|
|||||||
bench/unistring
|
bench/unistring
|
||||||
normtest
|
normtest
|
||||||
graphemetest
|
graphemetest
|
||||||
utf8proc_data.c.new
|
|
||||||
printproperty
|
printproperty
|
||||||
charwidth
|
charwidth
|
||||||
valid
|
valid
|
||||||
iterate
|
iterate
|
||||||
case
|
case
|
||||||
|
/tmp/
|
||||||
|
|||||||
@ -10,9 +10,10 @@ before_install:
|
|||||||
- sudo apt-get update -qq -y
|
- sudo apt-get update -qq -y
|
||||||
- sudo apt-get install libpcre3-dev julia fontforge -y
|
- sudo apt-get install libpcre3-dev julia fontforge -y
|
||||||
script:
|
script:
|
||||||
- make prefix=`pwd`/local install
|
- make manifest && diff MANIFEST.new MANIFEST
|
||||||
- make check
|
- make check
|
||||||
- make data && diff data/utf8proc_data.c.new utf8proc_data.c
|
- make data && diff data/utf8proc_data.c.new utf8proc_data.c
|
||||||
|
- make clean && git status --ignored --porcelain && test -z "$(git status --ignored --porcelain)"
|
||||||
- (mkdir build_static && cd build_static && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make)
|
- (mkdir build_static && cd build_static && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make)
|
||||||
- (mkdir build_shared && cd build_shared && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_SHARED_LIBS=ON && make)
|
- (mkdir build_shared && cd build_shared && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_SHARED_LIBS=ON && make)
|
||||||
env:
|
env:
|
||||||
|
|||||||
7
MANIFEST
Normal file
7
MANIFEST
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
include/
|
||||||
|
include/utf8proc.h
|
||||||
|
lib/
|
||||||
|
lib/libutf8proc.a
|
||||||
|
lib/libutf8proc.so -> libutf8proc.so.1.3.0
|
||||||
|
lib/libutf8proc.so.1 -> libutf8proc.so.1.3.0
|
||||||
|
lib/libutf8proc.so.1.3.0
|
||||||
56
Makefile
56
Makefile
@ -5,6 +5,7 @@ MAKE=make
|
|||||||
AR?=ar
|
AR?=ar
|
||||||
CC?=gcc
|
CC?=gcc
|
||||||
INSTALL=install
|
INSTALL=install
|
||||||
|
FIND=find
|
||||||
|
|
||||||
# compiler settings
|
# compiler settings
|
||||||
CFLAGS ?= -O2
|
CFLAGS ?= -O2
|
||||||
@ -17,7 +18,7 @@ UCFLAGS = $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS
|
|||||||
# from the utf8proc version number because it indicates ABI compatibility,
|
# from the utf8proc version number because it indicates ABI compatibility,
|
||||||
# not API compatibility: MAJOR should be incremented whenever *binary*
|
# not API compatibility: MAJOR should be incremented whenever *binary*
|
||||||
# compatibility is broken, even if the API is backward-compatible
|
# compatibility is broken, even if the API is backward-compatible
|
||||||
# Be sure to also update these in CMakeLists.txt!
|
# Be sure to also update these in MANIFEST and CMakeLists.txt!
|
||||||
MAJOR=1
|
MAJOR=1
|
||||||
MINOR=3
|
MINOR=3
|
||||||
PATCH=0
|
PATCH=0
|
||||||
@ -38,12 +39,17 @@ includedir=$(prefix)/include
|
|||||||
|
|
||||||
# meta targets
|
# meta targets
|
||||||
|
|
||||||
.PHONY: all, clean, update, data
|
.PHONY: all clean data update manifest install
|
||||||
|
|
||||||
all: libutf8proc.a libutf8proc.$(SHLIB_EXT)
|
all: libutf8proc.a libutf8proc.$(SHLIB_EXT)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f utf8proc.o libutf8proc.a libutf8proc.$(SHLIB_VERS_EXT) libutf8proc.$(SHLIB_EXT) test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate
|
rm -f utf8proc.o libutf8proc.a libutf8proc.$(SHLIB_VERS_EXT) libutf8proc.$(SHLIB_EXT)
|
||||||
|
ifneq ($(OS),Darwin)
|
||||||
|
rm -f libutf8proc.so.$(MAJOR)
|
||||||
|
endif
|
||||||
|
rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case
|
||||||
|
rm -rf MANIFEST.new tmp
|
||||||
$(MAKE) -C bench clean
|
$(MAKE) -C bench clean
|
||||||
$(MAKE) -C data clean
|
$(MAKE) -C data clean
|
||||||
|
|
||||||
@ -52,6 +58,8 @@ data: data/utf8proc_data.c.new
|
|||||||
update: data/utf8proc_data.c.new
|
update: data/utf8proc_data.c.new
|
||||||
cp -f data/utf8proc_data.c.new utf8proc_data.c
|
cp -f data/utf8proc_data.c.new utf8proc_data.c
|
||||||
|
|
||||||
|
manifest: MANIFEST.new
|
||||||
|
|
||||||
# real targets
|
# real targets
|
||||||
|
|
||||||
data/utf8proc_data.c.new: libutf8proc.$(SHLIB_EXT) data/data_generator.rb data/charwidths.jl
|
data/utf8proc_data.c.new: libutf8proc.$(SHLIB_EXT) data/data_generator.rb data/charwidths.jl
|
||||||
@ -84,12 +92,17 @@ install: libutf8proc.a libutf8proc.$(SHLIB_EXT) libutf8proc.$(SHLIB_VERS_EXT)
|
|||||||
mkdir -m 755 -p $(DESTDIR)$(libdir)
|
mkdir -m 755 -p $(DESTDIR)$(libdir)
|
||||||
$(INSTALL) -m 644 libutf8proc.a $(DESTDIR)$(libdir)
|
$(INSTALL) -m 644 libutf8proc.a $(DESTDIR)$(libdir)
|
||||||
$(INSTALL) -m 755 libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)
|
$(INSTALL) -m 755 libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)
|
||||||
ln -f -s $(libdir)/libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.$(SHLIB_EXT)
|
ln -f -s libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.$(SHLIB_EXT)
|
||||||
ifneq ($(OS),Darwin)
|
ifneq ($(OS),Darwin)
|
||||||
ln -f -s $(libdir)/libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.so.$(MAJOR)
|
ln -f -s libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.so.$(MAJOR)
|
||||||
ln -f -s $(libdir)/libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.so.$(MAJOR).$(MINOR)
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
MANIFEST.new:
|
||||||
|
rm -rf tmp
|
||||||
|
$(MAKE) install prefix=/usr DESTDIR=$(PWD)/tmp
|
||||||
|
$(FIND) tmp/usr -mindepth 1 -type l -printf "%P -> %l\n" -or -type f -printf "%P\n" -or -type d -printf "%P/\n" | LC_ALL=C sort > $@
|
||||||
|
rm -rf tmp
|
||||||
|
|
||||||
# Test programs
|
# Test programs
|
||||||
|
|
||||||
data/NormalizationTest.txt:
|
data/NormalizationTest.txt:
|
||||||
@ -98,26 +111,29 @@ data/NormalizationTest.txt:
|
|||||||
data/GraphemeBreakTest.txt:
|
data/GraphemeBreakTest.txt:
|
||||||
$(MAKE) -C data GraphemeBreakTest.txt
|
$(MAKE) -C data GraphemeBreakTest.txt
|
||||||
|
|
||||||
test/normtest: test/normtest.c utf8proc.o utf8proc.h test/tests.h
|
test/tests.o: test/tests.c test/tests.h utf8proc.h
|
||||||
$(CC) $(UCFLAGS) test/normtest.c utf8proc.o -o $@
|
$(CC) $(UCFLAGS) -c -o test/tests.o test/tests.c
|
||||||
|
|
||||||
test/graphemetest: test/graphemetest.c utf8proc.o utf8proc.h test/tests.h
|
test/normtest: test/normtest.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||||
$(CC) $(UCFLAGS) test/graphemetest.c utf8proc.o -o $@
|
$(CC) $(UCFLAGS) test/normtest.c test/tests.o utf8proc.o -o $@
|
||||||
|
|
||||||
test/printproperty: test/printproperty.c utf8proc.o utf8proc.h test/tests.h
|
test/graphemetest: test/graphemetest.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||||
$(CC) $(UCFLAGS) test/printproperty.c utf8proc.o -o $@
|
$(CC) $(UCFLAGS) test/graphemetest.c test/tests.o utf8proc.o -o $@
|
||||||
|
|
||||||
test/charwidth: test/charwidth.c utf8proc.o utf8proc.h test/tests.h
|
test/printproperty: test/printproperty.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||||
$(CC) $(UCFLAGS) test/charwidth.c utf8proc.o -o $@
|
$(CC) $(UCFLAGS) test/printproperty.c test/tests.o utf8proc.o -o $@
|
||||||
|
|
||||||
test/valid: test/valid.c utf8proc.o utf8proc.h test/tests.h
|
test/charwidth: test/charwidth.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||||
$(CC) $(UCFLAGS) test/valid.c utf8proc.o -o $@
|
$(CC) $(UCFLAGS) test/charwidth.c test/tests.o utf8proc.o -o $@
|
||||||
|
|
||||||
test/iterate: test/iterate.c utf8proc.o utf8proc.h test/tests.h
|
test/valid: test/valid.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||||
$(CC) $(UCFLAGS) test/iterate.c utf8proc.o -o $@
|
$(CC) $(UCFLAGS) test/valid.c test/tests.o utf8proc.o -o $@
|
||||||
|
|
||||||
test/case: test/case.c utf8proc.o utf8proc.h test/tests.h
|
test/iterate: test/iterate.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||||
$(CC) $(UCFLAGS) test/case.c utf8proc.o -o $@
|
$(CC) $(UCFLAGS) test/iterate.c test/tests.o utf8proc.o -o $@
|
||||||
|
|
||||||
|
test/case: test/case.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||||
|
$(CC) $(UCFLAGS) test/case.c test/tests.o utf8proc.o -o $@
|
||||||
|
|
||||||
check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/charwidth test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
|
check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/charwidth test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
|
||||||
$(MAKE) -C bench
|
$(MAKE) -C bench
|
||||||
|
|||||||
@ -59,4 +59,5 @@ GraphemeBreakTest.txt:
|
|||||||
$(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@
|
$(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f UnicodeData.txt EastAsianWidth.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt CharWidths.txt unifont*.ttf unifont*.sfd
|
rm -f UnicodeData.txt EastAsianWidth.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt CharWidths.txt unifont*.ttf unifont*.sfd
|
||||||
|
rm -f utf8proc_data.c.new
|
||||||
|
|||||||
@ -8,8 +8,14 @@
|
|||||||
|
|
||||||
#############################################################################
|
#############################################################################
|
||||||
# Julia 0.3/0.4 compatibility (taken from Compat package)
|
# Julia 0.3/0.4 compatibility (taken from Compat package)
|
||||||
|
if VERSION < v"0.4.0-dev+1387"
|
||||||
|
typealias AbstractString String
|
||||||
|
end
|
||||||
if VERSION < v"0.4.0-dev+1419"
|
if VERSION < v"0.4.0-dev+1419"
|
||||||
const UInt16 = Uint16
|
const UInt32 = Uint32
|
||||||
|
end
|
||||||
|
if VERSION < v"0.4.0-dev+3874"
|
||||||
|
Base.parse{T<:Integer}(::Type{T}, s::AbstractString) = parseint(T, s)
|
||||||
end
|
end
|
||||||
|
|
||||||
CharWidths = Dict{Int,Int}()
|
CharWidths = Dict{Int,Int}()
|
||||||
@ -52,7 +58,7 @@ end
|
|||||||
# Widths from GNU Unifont
|
# Widths from GNU Unifont
|
||||||
|
|
||||||
#Read sfdfile for character widths
|
#Read sfdfile for character widths
|
||||||
function parsesfd(filename::String, CharWidths::Dict{Int,Int}=Dict{Int,Int}())
|
function parsesfd(filename::AbstractString, CharWidths::Dict{Int,Int}=Dict{Int,Int}())
|
||||||
state=:seekchar
|
state=:seekchar
|
||||||
lineno = 0
|
lineno = 0
|
||||||
codepoint = width = nothing
|
codepoint = width = nothing
|
||||||
@ -65,8 +71,8 @@ function parsesfd(filename::String, CharWidths::Dict{Int,Int}=Dict{Int,Int}())
|
|||||||
state = :readdata
|
state = :readdata
|
||||||
end
|
end
|
||||||
elseif state==:readdata #Encoding: 65538 -1 2, Width: 1024
|
elseif state==:readdata #Encoding: 65538 -1 2, Width: 1024
|
||||||
contains(line, "Encoding:") && (codepoint = int(split(line)[3]))
|
contains(line, "Encoding:") && (codepoint = parse(Int, split(line)[3]))
|
||||||
contains(line, "Width:") && (width = int(split(line)[2]))
|
contains(line, "Width:") && (width = parse(Int, split(line)[2]))
|
||||||
if codepoint!=nothing && width!=nothing && codepoint >= 0
|
if codepoint!=nothing && width!=nothing && codepoint >= 0
|
||||||
w=div(width, 512) # 512 units to the en
|
w=div(width, 512) # 512 units to the en
|
||||||
if w > 0
|
if w > 0
|
||||||
@ -100,8 +106,8 @@ for line in readlines(open("EastAsianWidth.txt"))
|
|||||||
width = strip(tokens[2])
|
width = strip(tokens[2])
|
||||||
#Parse code point range into Julia UnitRange
|
#Parse code point range into Julia UnitRange
|
||||||
rangetokens = split(charrange, "..")
|
rangetokens = split(charrange, "..")
|
||||||
charstart = uint32("0x"*rangetokens[1])
|
charstart = parse(UInt32, "0x"*rangetokens[1])
|
||||||
charend = uint32("0x"*rangetokens[length(rangetokens)>1 ? 2 : 1])
|
charend = parse(UInt32, "0x"*rangetokens[length(rangetokens)>1 ? 2 : 1])
|
||||||
|
|
||||||
#Assign widths
|
#Assign widths
|
||||||
for c in charstart:charend
|
for c in charstart:charend
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <wchar.h>
|
#include <wchar.h>
|
||||||
|
|
||||||
int my_isprint(int c) {
|
static int my_isprint(int c) {
|
||||||
int cat = utf8proc_get_property(c)->category;
|
int cat = utf8proc_get_property(c)->category;
|
||||||
return (UTF8PROC_CATEGORY_LU <= cat && cat <= UTF8PROC_CATEGORY_ZS) ||
|
return (UTF8PROC_CATEGORY_LU <= cat && cat <= UTF8PROC_CATEGORY_ZS) ||
|
||||||
(c == 0x0601 || c == 0x0602 || c == 0x0603 || c == 0x06dd);
|
(c == 0x0601 || c == 0x0602 || c == 0x0603 || c == 0x06dd);
|
||||||
|
|||||||
@ -8,7 +8,7 @@ static int error;
|
|||||||
#define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
|
#define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
|
||||||
#define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
|
#define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
|
||||||
|
|
||||||
void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line)
|
static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line)
|
||||||
{
|
{
|
||||||
utf8proc_int32_t out[16];
|
utf8proc_int32_t out[16];
|
||||||
utf8proc_ssize_t ret;
|
utf8proc_ssize_t ret;
|
||||||
|
|||||||
@ -7,7 +7,7 @@ int main(int argc, char **argv)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 1; i < argc; ++i) {
|
for (i = 1; i < argc; ++i) {
|
||||||
int c;
|
unsigned int c;
|
||||||
if (!strcmp(argv[i], "-V")) {
|
if (!strcmp(argv[i], "-V")) {
|
||||||
printf("utf8proc version %s\n", utf8proc_version());
|
printf("utf8proc version %s\n", utf8proc_version());
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
46
test/tests.c
Normal file
46
test/tests.c
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
/* Common functions for our test programs. */
|
||||||
|
|
||||||
|
#include "tests.h"
|
||||||
|
|
||||||
|
size_t lineno = 0;
|
||||||
|
|
||||||
|
void check(int cond, const char *format, ...)
|
||||||
|
{
|
||||||
|
if (!cond) {
|
||||||
|
va_list args;
|
||||||
|
fprintf(stderr, "line %zd: ", lineno);
|
||||||
|
va_start(args, format);
|
||||||
|
vfprintf(stderr, format, args);
|
||||||
|
va_end(args);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t skipspaces(const char *buf, size_t i)
|
||||||
|
{
|
||||||
|
while (isspace(buf[i])) ++i;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if buf points to a sequence of codepoints encoded as hexadecimal strings,
|
||||||
|
separated by whitespace, and terminated by any character not in
|
||||||
|
[0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
|
||||||
|
in dest, returning the number of bytes read from buf */
|
||||||
|
size_t encode(char *dest, const char *buf)
|
||||||
|
{
|
||||||
|
size_t i = 0, j, d = 0;
|
||||||
|
for (;;) {
|
||||||
|
int c;
|
||||||
|
i = skipspaces(buf, i);
|
||||||
|
for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j)
|
||||||
|
; /* find end of hex input */
|
||||||
|
if (j == i) { /* no codepoint found */
|
||||||
|
dest[d] = 0; /* NUL-terminate destination string */
|
||||||
|
return i + 1;
|
||||||
|
}
|
||||||
|
check(sscanf(buf + i, "%x", (unsigned int *)&c) == 1, "invalid hex input %s", buf+i);
|
||||||
|
i = j; /* skip to char after hex input */
|
||||||
|
d += utf8proc_encode_char(c, (utf8proc_uint8_t *) (dest + d));
|
||||||
|
}
|
||||||
|
}
|
||||||
54
test/tests.h
54
test/tests.h
@ -1,5 +1,13 @@
|
|||||||
/* Common functions and includes for our test programs. */
|
/* Common functions and includes for our test programs. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set feature macro to enable getline() and wcwidth().
|
||||||
|
*
|
||||||
|
* Please refer to section 2.2.1 of POSIX.1-2008:
|
||||||
|
* http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_02_01_02
|
||||||
|
*/
|
||||||
|
#define _XOPEN_SOURCE 700
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
@ -8,46 +16,8 @@
|
|||||||
|
|
||||||
#include "../utf8proc.h"
|
#include "../utf8proc.h"
|
||||||
|
|
||||||
size_t lineno = 0;
|
extern size_t lineno;
|
||||||
|
|
||||||
void check(int cond, const char *format, ...)
|
|
||||||
{
|
|
||||||
if (!cond) {
|
|
||||||
va_list args;
|
|
||||||
fprintf(stderr, "line %zd: ", lineno);
|
|
||||||
va_start(args, format);
|
|
||||||
vfprintf(stderr, format, args);
|
|
||||||
va_end(args);
|
|
||||||
fprintf(stderr, "\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t skipspaces(const char *buf, size_t i)
|
|
||||||
{
|
|
||||||
while (isspace(buf[i])) ++i;
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if buf points to a sequence of codepoints encoded as hexadecimal strings,
|
|
||||||
separated by whitespace, and terminated by any character not in
|
|
||||||
[0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
|
|
||||||
in dest, returning the number of bytes read from buf */
|
|
||||||
size_t encode(char *dest, const char *buf)
|
|
||||||
{
|
|
||||||
size_t i = 0, j, d = 0;
|
|
||||||
for (;;) {
|
|
||||||
int c;
|
|
||||||
i = skipspaces(buf, i);
|
|
||||||
for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j)
|
|
||||||
; /* find end of hex input */
|
|
||||||
if (j == i) { /* no codepoint found */
|
|
||||||
dest[d] = 0; /* NUL-terminate destination string */
|
|
||||||
return i + 1;
|
|
||||||
}
|
|
||||||
check(sscanf(buf + i, "%x", (unsigned int *)&c) == 1, "invalid hex input %s", buf+i);
|
|
||||||
i = j; /* skip to char after hex input */
|
|
||||||
d += utf8proc_encode_char(c, (utf8proc_uint8_t *) (dest + d));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
void check(int cond, const char *format, ...);
|
||||||
|
size_t skipspaces(const char *buf, size_t i);
|
||||||
|
size_t encode(char *dest, const char *buf);
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user