diff --git a/.gitignore b/.gitignore index 58d7ad6..c516642 100644 --- a/.gitignore +++ b/.gitignore @@ -8,8 +8,8 @@ *.dll *.dylib *.dSYM -*.txt *.out +data/*.txt bench/bench bench/icu bench/unistring diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..4d0c1c3 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required (VERSION 2.8) + +include (utils.cmake) + +disallow_intree_builds() + +project (utf8proc C) + +add_definitions ( + -DUTF8PROC_EXPORTS +) + +if (NOT MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -std=c99 -pedantic -Wall") +endif () + +add_library (utf8proc + utf8proc.c + utf8proc.h +) + +set_property (TARGET utf8proc PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/NEWS.md b/NEWS.md index ead7f6b..989aba5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -105,7 +105,7 @@ Release of version 1.0.1 2006-09-17: -- added the `LUMP` option, which lumps certain characters together (see `lump.txt`) (also used for the PostgreSQL `unifold` function) +- added the `LUMP` option, which lumps certain characters together (see `lump.md`) (also used for the PostgreSQL `unifold` function) - added the `STRIPMARK` option, which strips marking characters (or marks of composed characters) - deprecated ruby method `String#char_ary` in favour of `String#utf8chars` diff --git a/lump.txt b/lump.md similarity index 99% rename from lump.txt rename to lump.md index 442cafb..39186fb 100644 --- a/lump.txt +++ b/lump.md @@ -1,3 +1,4 @@ +``` U+0020 <-- all space characters (general category Zs) U+0027 ' <-- left/right single quotation mark U+2018..2019, modifier letter apostrophe U+02BC, @@ -23,4 +24,4 @@ U+005F _ <-- all connector characters (general category Pc), U+0060 ` <-- modifier letter grave accent U+02CB U+007C | <-- divides U+2223 U+007E ~ <-- tilde operator U+223C - +``` diff --git a/utf8proc.h b/utf8proc.h index 06346c8..0d647db 100644 --- a/utf8proc.h +++ b/utf8proc.h @@ -140,7 +140,7 @@ extern "C" { * is representing a single grapheme cluster (see UAX#29). * LUMP: Lumps certain characters together * (e.g. HYPHEN U+2010 and MINUS U+2212 to ASCII "-"). - * (See lump.txt for details.) + * (See lump.md for details.) * If NLF2LF is set, this includes a transformation of * paragraph and line separators to ASCII line-feed (LF). * STRIPMARK: Strips all character markings diff --git a/utils.cmake b/utils.cmake new file mode 100644 index 0000000..63fc426 --- /dev/null +++ b/utils.cmake @@ -0,0 +1,20 @@ + +function (disallow_intree_builds) + # Adapted from LLVM's toplevel CMakeLists.txt file + if( CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE ) + message(FATAL_ERROR " + In-source builds are not allowed. CMake would overwrite the + makefiles distributed with utf8proc. Please create a directory + and run cmake from there. Building in a subdirectory is + fine, e.g.: + + mkdir build + cd build + cmake .. + + This process created the file `CMakeCache.txt' and the + directory `CMakeFiles'. Please delete them. + + ") + endif() +endfunction()