commit
a5c9de2047
3
.gitignore
vendored
3
.gitignore
vendored
@ -9,6 +9,7 @@
|
||||
*.dylib
|
||||
*.dSYM
|
||||
*.out
|
||||
*.new
|
||||
data/*.txt
|
||||
data/*.ttf
|
||||
data/*.sfd
|
||||
@ -18,9 +19,9 @@ bench/icu
|
||||
bench/unistring
|
||||
normtest
|
||||
graphemetest
|
||||
utf8proc_data.c.new
|
||||
printproperty
|
||||
charwidth
|
||||
valid
|
||||
iterate
|
||||
case
|
||||
/tmp/
|
||||
|
||||
@ -10,9 +10,10 @@ before_install:
|
||||
- sudo apt-get update -qq -y
|
||||
- sudo apt-get install libpcre3-dev julia fontforge -y
|
||||
script:
|
||||
- make prefix=`pwd`/local install
|
||||
- make manifest && diff MANIFEST.new MANIFEST
|
||||
- make check
|
||||
- make data && diff data/utf8proc_data.c.new utf8proc_data.c
|
||||
- make clean && git status --ignored --porcelain && test -z "$(git status --ignored --porcelain)"
|
||||
- (mkdir build_static && cd build_static && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make)
|
||||
- (mkdir build_shared && cd build_shared && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_SHARED_LIBS=ON && make)
|
||||
env:
|
||||
|
||||
7
MANIFEST
Normal file
7
MANIFEST
Normal file
@ -0,0 +1,7 @@
|
||||
include/
|
||||
include/utf8proc.h
|
||||
lib/
|
||||
lib/libutf8proc.a
|
||||
lib/libutf8proc.so -> libutf8proc.so.1.3.0
|
||||
lib/libutf8proc.so.1 -> libutf8proc.so.1.3.0
|
||||
lib/libutf8proc.so.1.3.0
|
||||
56
Makefile
56
Makefile
@ -5,6 +5,7 @@ MAKE=make
|
||||
AR?=ar
|
||||
CC?=gcc
|
||||
INSTALL=install
|
||||
FIND=find
|
||||
|
||||
# compiler settings
|
||||
CFLAGS ?= -O2
|
||||
@ -17,7 +18,7 @@ UCFLAGS = $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS
|
||||
# from the utf8proc version number because it indicates ABI compatibility,
|
||||
# not API compatibility: MAJOR should be incremented whenever *binary*
|
||||
# compatibility is broken, even if the API is backward-compatible
|
||||
# Be sure to also update these in CMakeLists.txt!
|
||||
# Be sure to also update these in MANIFEST and CMakeLists.txt!
|
||||
MAJOR=1
|
||||
MINOR=3
|
||||
PATCH=0
|
||||
@ -38,12 +39,17 @@ includedir=$(prefix)/include
|
||||
|
||||
# meta targets
|
||||
|
||||
.PHONY: all, clean, update, data
|
||||
.PHONY: all clean data update manifest install
|
||||
|
||||
all: libutf8proc.a libutf8proc.$(SHLIB_EXT)
|
||||
|
||||
clean:
|
||||
rm -f utf8proc.o libutf8proc.a libutf8proc.$(SHLIB_VERS_EXT) libutf8proc.$(SHLIB_EXT) test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate
|
||||
rm -f utf8proc.o libutf8proc.a libutf8proc.$(SHLIB_VERS_EXT) libutf8proc.$(SHLIB_EXT)
|
||||
ifneq ($(OS),Darwin)
|
||||
rm -f libutf8proc.so.$(MAJOR)
|
||||
endif
|
||||
rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case
|
||||
rm -rf MANIFEST.new tmp
|
||||
$(MAKE) -C bench clean
|
||||
$(MAKE) -C data clean
|
||||
|
||||
@ -52,6 +58,8 @@ data: data/utf8proc_data.c.new
|
||||
update: data/utf8proc_data.c.new
|
||||
cp -f data/utf8proc_data.c.new utf8proc_data.c
|
||||
|
||||
manifest: MANIFEST.new
|
||||
|
||||
# real targets
|
||||
|
||||
data/utf8proc_data.c.new: libutf8proc.$(SHLIB_EXT) data/data_generator.rb data/charwidths.jl
|
||||
@ -84,12 +92,17 @@ install: libutf8proc.a libutf8proc.$(SHLIB_EXT) libutf8proc.$(SHLIB_VERS_EXT)
|
||||
mkdir -m 755 -p $(DESTDIR)$(libdir)
|
||||
$(INSTALL) -m 644 libutf8proc.a $(DESTDIR)$(libdir)
|
||||
$(INSTALL) -m 755 libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)
|
||||
ln -f -s $(libdir)/libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.$(SHLIB_EXT)
|
||||
ln -f -s libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.$(SHLIB_EXT)
|
||||
ifneq ($(OS),Darwin)
|
||||
ln -f -s $(libdir)/libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.so.$(MAJOR)
|
||||
ln -f -s $(libdir)/libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.so.$(MAJOR).$(MINOR)
|
||||
ln -f -s libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.so.$(MAJOR)
|
||||
endif
|
||||
|
||||
MANIFEST.new:
|
||||
rm -rf tmp
|
||||
$(MAKE) install prefix=/usr DESTDIR=$(PWD)/tmp
|
||||
$(FIND) tmp/usr -mindepth 1 -type l -printf "%P -> %l\n" -or -type f -printf "%P\n" -or -type d -printf "%P/\n" | LC_ALL=C sort > $@
|
||||
rm -rf tmp
|
||||
|
||||
# Test programs
|
||||
|
||||
data/NormalizationTest.txt:
|
||||
@ -98,26 +111,29 @@ data/NormalizationTest.txt:
|
||||
data/GraphemeBreakTest.txt:
|
||||
$(MAKE) -C data GraphemeBreakTest.txt
|
||||
|
||||
test/normtest: test/normtest.c utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/normtest.c utf8proc.o -o $@
|
||||
test/tests.o: test/tests.c test/tests.h utf8proc.h
|
||||
$(CC) $(UCFLAGS) -c -o test/tests.o test/tests.c
|
||||
|
||||
test/graphemetest: test/graphemetest.c utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/graphemetest.c utf8proc.o -o $@
|
||||
test/normtest: test/normtest.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/normtest.c test/tests.o utf8proc.o -o $@
|
||||
|
||||
test/printproperty: test/printproperty.c utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/printproperty.c utf8proc.o -o $@
|
||||
test/graphemetest: test/graphemetest.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/graphemetest.c test/tests.o utf8proc.o -o $@
|
||||
|
||||
test/charwidth: test/charwidth.c utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/charwidth.c utf8proc.o -o $@
|
||||
test/printproperty: test/printproperty.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/printproperty.c test/tests.o utf8proc.o -o $@
|
||||
|
||||
test/valid: test/valid.c utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/valid.c utf8proc.o -o $@
|
||||
test/charwidth: test/charwidth.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/charwidth.c test/tests.o utf8proc.o -o $@
|
||||
|
||||
test/iterate: test/iterate.c utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/iterate.c utf8proc.o -o $@
|
||||
test/valid: test/valid.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/valid.c test/tests.o utf8proc.o -o $@
|
||||
|
||||
test/case: test/case.c utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/case.c utf8proc.o -o $@
|
||||
test/iterate: test/iterate.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/iterate.c test/tests.o utf8proc.o -o $@
|
||||
|
||||
test/case: test/case.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) test/case.c test/tests.o utf8proc.o -o $@
|
||||
|
||||
check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/charwidth test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
|
||||
$(MAKE) -C bench
|
||||
|
||||
@ -59,4 +59,5 @@ GraphemeBreakTest.txt:
|
||||
$(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@
|
||||
|
||||
clean:
|
||||
rm -f UnicodeData.txt EastAsianWidth.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt CharWidths.txt unifont*.ttf unifont*.sfd
|
||||
rm -f UnicodeData.txt EastAsianWidth.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt CharWidths.txt unifont*.ttf unifont*.sfd
|
||||
rm -f utf8proc_data.c.new
|
||||
|
||||
@ -8,8 +8,14 @@
|
||||
|
||||
#############################################################################
|
||||
# Julia 0.3/0.4 compatibility (taken from Compat package)
|
||||
if VERSION < v"0.4.0-dev+1387"
|
||||
typealias AbstractString String
|
||||
end
|
||||
if VERSION < v"0.4.0-dev+1419"
|
||||
const UInt16 = Uint16
|
||||
const UInt32 = Uint32
|
||||
end
|
||||
if VERSION < v"0.4.0-dev+3874"
|
||||
Base.parse{T<:Integer}(::Type{T}, s::AbstractString) = parseint(T, s)
|
||||
end
|
||||
|
||||
CharWidths = Dict{Int,Int}()
|
||||
@ -52,7 +58,7 @@ end
|
||||
# Widths from GNU Unifont
|
||||
|
||||
#Read sfdfile for character widths
|
||||
function parsesfd(filename::String, CharWidths::Dict{Int,Int}=Dict{Int,Int}())
|
||||
function parsesfd(filename::AbstractString, CharWidths::Dict{Int,Int}=Dict{Int,Int}())
|
||||
state=:seekchar
|
||||
lineno = 0
|
||||
codepoint = width = nothing
|
||||
@ -65,8 +71,8 @@ function parsesfd(filename::String, CharWidths::Dict{Int,Int}=Dict{Int,Int}())
|
||||
state = :readdata
|
||||
end
|
||||
elseif state==:readdata #Encoding: 65538 -1 2, Width: 1024
|
||||
contains(line, "Encoding:") && (codepoint = int(split(line)[3]))
|
||||
contains(line, "Width:") && (width = int(split(line)[2]))
|
||||
contains(line, "Encoding:") && (codepoint = parse(Int, split(line)[3]))
|
||||
contains(line, "Width:") && (width = parse(Int, split(line)[2]))
|
||||
if codepoint!=nothing && width!=nothing && codepoint >= 0
|
||||
w=div(width, 512) # 512 units to the en
|
||||
if w > 0
|
||||
@ -100,8 +106,8 @@ for line in readlines(open("EastAsianWidth.txt"))
|
||||
width = strip(tokens[2])
|
||||
#Parse code point range into Julia UnitRange
|
||||
rangetokens = split(charrange, "..")
|
||||
charstart = uint32("0x"*rangetokens[1])
|
||||
charend = uint32("0x"*rangetokens[length(rangetokens)>1 ? 2 : 1])
|
||||
charstart = parse(UInt32, "0x"*rangetokens[1])
|
||||
charend = parse(UInt32, "0x"*rangetokens[length(rangetokens)>1 ? 2 : 1])
|
||||
|
||||
#Assign widths
|
||||
for c in charstart:charend
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
#include <ctype.h>
|
||||
#include <wchar.h>
|
||||
|
||||
int my_isprint(int c) {
|
||||
static int my_isprint(int c) {
|
||||
int cat = utf8proc_get_property(c)->category;
|
||||
return (UTF8PROC_CATEGORY_LU <= cat && cat <= UTF8PROC_CATEGORY_ZS) ||
|
||||
(c == 0x0601 || c == 0x0602 || c == 0x0603 || c == 0x06dd);
|
||||
|
||||
@ -8,7 +8,7 @@ static int error;
|
||||
#define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
|
||||
#define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
|
||||
|
||||
void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line)
|
||||
static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line)
|
||||
{
|
||||
utf8proc_int32_t out[16];
|
||||
utf8proc_ssize_t ret;
|
||||
|
||||
@ -7,7 +7,7 @@ int main(int argc, char **argv)
|
||||
int i;
|
||||
|
||||
for (i = 1; i < argc; ++i) {
|
||||
int c;
|
||||
unsigned int c;
|
||||
if (!strcmp(argv[i], "-V")) {
|
||||
printf("utf8proc version %s\n", utf8proc_version());
|
||||
continue;
|
||||
|
||||
46
test/tests.c
Normal file
46
test/tests.c
Normal file
@ -0,0 +1,46 @@
|
||||
/* Common functions for our test programs. */
|
||||
|
||||
#include "tests.h"
|
||||
|
||||
size_t lineno = 0;
|
||||
|
||||
void check(int cond, const char *format, ...)
|
||||
{
|
||||
if (!cond) {
|
||||
va_list args;
|
||||
fprintf(stderr, "line %zd: ", lineno);
|
||||
va_start(args, format);
|
||||
vfprintf(stderr, format, args);
|
||||
va_end(args);
|
||||
fprintf(stderr, "\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
size_t skipspaces(const char *buf, size_t i)
|
||||
{
|
||||
while (isspace(buf[i])) ++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
/* if buf points to a sequence of codepoints encoded as hexadecimal strings,
|
||||
separated by whitespace, and terminated by any character not in
|
||||
[0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
|
||||
in dest, returning the number of bytes read from buf */
|
||||
size_t encode(char *dest, const char *buf)
|
||||
{
|
||||
size_t i = 0, j, d = 0;
|
||||
for (;;) {
|
||||
int c;
|
||||
i = skipspaces(buf, i);
|
||||
for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j)
|
||||
; /* find end of hex input */
|
||||
if (j == i) { /* no codepoint found */
|
||||
dest[d] = 0; /* NUL-terminate destination string */
|
||||
return i + 1;
|
||||
}
|
||||
check(sscanf(buf + i, "%x", (unsigned int *)&c) == 1, "invalid hex input %s", buf+i);
|
||||
i = j; /* skip to char after hex input */
|
||||
d += utf8proc_encode_char(c, (utf8proc_uint8_t *) (dest + d));
|
||||
}
|
||||
}
|
||||
54
test/tests.h
54
test/tests.h
@ -1,5 +1,13 @@
|
||||
/* Common functions and includes for our test programs. */
|
||||
|
||||
/*
|
||||
* Set feature macro to enable getline() and wcwidth().
|
||||
*
|
||||
* Please refer to section 2.2.1 of POSIX.1-2008:
|
||||
* http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_02_01_02
|
||||
*/
|
||||
#define _XOPEN_SOURCE 700
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
@ -8,46 +16,8 @@
|
||||
|
||||
#include "../utf8proc.h"
|
||||
|
||||
size_t lineno = 0;
|
||||
|
||||
void check(int cond, const char *format, ...)
|
||||
{
|
||||
if (!cond) {
|
||||
va_list args;
|
||||
fprintf(stderr, "line %zd: ", lineno);
|
||||
va_start(args, format);
|
||||
vfprintf(stderr, format, args);
|
||||
va_end(args);
|
||||
fprintf(stderr, "\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
size_t skipspaces(const char *buf, size_t i)
|
||||
{
|
||||
while (isspace(buf[i])) ++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
/* if buf points to a sequence of codepoints encoded as hexadecimal strings,
|
||||
separated by whitespace, and terminated by any character not in
|
||||
[0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
|
||||
in dest, returning the number of bytes read from buf */
|
||||
size_t encode(char *dest, const char *buf)
|
||||
{
|
||||
size_t i = 0, j, d = 0;
|
||||
for (;;) {
|
||||
int c;
|
||||
i = skipspaces(buf, i);
|
||||
for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j)
|
||||
; /* find end of hex input */
|
||||
if (j == i) { /* no codepoint found */
|
||||
dest[d] = 0; /* NUL-terminate destination string */
|
||||
return i + 1;
|
||||
}
|
||||
check(sscanf(buf + i, "%x", (unsigned int *)&c) == 1, "invalid hex input %s", buf+i);
|
||||
i = j; /* skip to char after hex input */
|
||||
d += utf8proc_encode_char(c, (utf8proc_uint8_t *) (dest + d));
|
||||
}
|
||||
}
|
||||
extern size_t lineno;
|
||||
|
||||
void check(int cond, const char *format, ...);
|
||||
size_t skipspaces(const char *buf, size_t i);
|
||||
size_t encode(char *dest, const char *buf);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user