import of utf8proc-v1.1.6
This commit is contained in:
2
ruby/extconf.rb
Normal file
2
ruby/extconf.rb
Normal file
@@ -0,0 +1,2 @@
|
||||
require 'mkmf'
|
||||
create_makefile("utf8proc_native")
|
||||
64
ruby/gem/LICENSE
Normal file
64
ruby/gem/LICENSE
Normal file
@@ -0,0 +1,64 @@
|
||||
|
||||
Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
|
||||
This software distribution contains derived data from a modified version of
|
||||
the Unicode data files. The following license applies to that data:
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
|
||||
under the Terms of Use in http://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of the Unicode data files and any associated documentation (the "Data
|
||||
Files") or Unicode software and any associated documentation (the
|
||||
"Software") to deal in the Data Files or Software without restriction,
|
||||
including without limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, and/or sell copies of the Data Files or Software, and
|
||||
to permit persons to whom the Data Files or Software are furnished to do
|
||||
so, provided that (a) the above copyright notice(s) and this permission
|
||||
notice appear with all copies of the Data Files or Software, (b) both the
|
||||
above copyright notice(s) and this permission notice appear in associated
|
||||
documentation, and (c) there is clear notice in each modified Data File or
|
||||
in the Software as well as in the documentation associated with the Data
|
||||
File(s) or Software that the data or software has been modified.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
|
||||
THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
|
||||
INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
|
||||
CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
|
||||
USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder shall
|
||||
not be used in advertising or otherwise to promote the sale, use or other
|
||||
dealings in these Data Files or Software without prior written
|
||||
authorization of the copyright holder.
|
||||
|
||||
|
||||
Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be
|
||||
registered in some jurisdictions. All other trademarks and registered
|
||||
trademarks mentioned herein are the property of their respective owners.
|
||||
|
||||
12
ruby/gem/utf8proc.gemspec
Normal file
12
ruby/gem/utf8proc.gemspec
Normal file
@@ -0,0 +1,12 @@
|
||||
require 'rubygems'
|
||||
SPEC = Gem::Specification.new do |s|
|
||||
s.name = 'utf8proc'
|
||||
s.version = '1.1.6'
|
||||
s.author = 'Public Software Group e. V., Berlin, Germany'
|
||||
s.homepage = 'http://www.public-software-group.org/utf8proc'
|
||||
s.summary = 'UTF-8 Unicode string processing'
|
||||
s.files = ['LICENSE', 'lib/utf8proc.rb', 'ext/utf8proc_native.c']
|
||||
s.require_path = 'lib/'
|
||||
s.extensions = ['ext/extconf.rb']
|
||||
s.has_rdoc = false
|
||||
end
|
||||
98
ruby/utf8proc.rb
Normal file
98
ruby/utf8proc.rb
Normal file
@@ -0,0 +1,98 @@
|
||||
# Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
|
||||
#
|
||||
# File name: ruby/utf8proc.rb
|
||||
#
|
||||
# Description:
|
||||
# Part of the ruby wrapper for libutf8proc, which is written in ruby.
|
||||
#
|
||||
|
||||
|
||||
require 'utf8proc_native'
|
||||
|
||||
|
||||
module Utf8Proc
|
||||
|
||||
SpecialChars = {
|
||||
:HT => "\x09",
|
||||
:LF => "\x0A",
|
||||
:VT => "\x0B",
|
||||
:FF => "\x0C",
|
||||
:CR => "\x0D",
|
||||
:FS => "\x1C",
|
||||
:GS => "\x1D",
|
||||
:RS => "\x1E",
|
||||
:US => "\x1F",
|
||||
:LS => "\xE2\x80\xA8",
|
||||
:PS => "\xE2\x80\xA9",
|
||||
}
|
||||
|
||||
module StringExtensions
|
||||
def utf8map(*option_array)
|
||||
options = 0
|
||||
option_array.each do |option|
|
||||
flag = Utf8Proc::Options[option]
|
||||
raise ArgumentError, "Unknown argument given to String#utf8map." unless
|
||||
flag
|
||||
options |= flag
|
||||
end
|
||||
return Utf8Proc::utf8map(self, options)
|
||||
end
|
||||
def utf8map!(*option_array)
|
||||
self.replace(self.utf8map(*option_array))
|
||||
end
|
||||
def utf8nfd; utf8map( :stable, :decompose); end
|
||||
def utf8nfd!; utf8map!(:stable, :decompose); end
|
||||
def utf8nfc; utf8map( :stable, :compose); end
|
||||
def utf8nfc!; utf8map!(:stable, :compose); end
|
||||
def utf8nfkd; utf8map( :stable, :decompose, :compat); end
|
||||
def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end
|
||||
def utf8nfkc; utf8map( :stable, :compose, :compat); end
|
||||
def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
|
||||
def utf8chars
|
||||
result = self.utf8map(:charbound).split("\377")
|
||||
result.shift if result.first == ""
|
||||
result
|
||||
end
|
||||
def char_ary
|
||||
# depecated, use String#utf8chars instead
|
||||
utf8chars
|
||||
end
|
||||
end
|
||||
|
||||
module IntegerExtensions
|
||||
def utf8
|
||||
return Utf8Proc::utf8char(self)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
class String
|
||||
include(Utf8Proc::StringExtensions)
|
||||
end
|
||||
|
||||
class Integer
|
||||
include(Utf8Proc::IntegerExtensions)
|
||||
end
|
||||
|
||||
160
ruby/utf8proc_native.c
Normal file
160
ruby/utf8proc_native.c
Normal file
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* File name: ruby/utf8proc_native.c
|
||||
*
|
||||
* Description:
|
||||
* Native part of the ruby wrapper for libutf8proc.
|
||||
*/
|
||||
|
||||
|
||||
#include "../utf8proc.c"
|
||||
#include "ruby.h"
|
||||
|
||||
#ifndef RSTRING_PTR
|
||||
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
||||
#endif
|
||||
#ifndef RSTRING_LEN
|
||||
#define RSTRING_LEN(s) (RSTRING(s)->len)
|
||||
#endif
|
||||
|
||||
typedef struct utf8proc_ruby_mapenv_struct {
|
||||
int32_t *buffer;
|
||||
} utf8proc_ruby_mapenv_t;
|
||||
|
||||
void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) {
|
||||
free(env->buffer);
|
||||
free(env);
|
||||
}
|
||||
|
||||
VALUE utf8proc_ruby_module;
|
||||
VALUE utf8proc_ruby_options;
|
||||
VALUE utf8proc_ruby_eUnicodeError;
|
||||
VALUE utf8proc_ruby_eInvalidUtf8Error;
|
||||
VALUE utf8proc_ruby_eCodeNotAssignedError;
|
||||
|
||||
VALUE utf8proc_ruby_map_error(ssize_t result) {
|
||||
VALUE excpt_class;
|
||||
switch (result) {
|
||||
case UTF8PROC_ERROR_NOMEM:
|
||||
excpt_class = rb_eNoMemError; break;
|
||||
case UTF8PROC_ERROR_OVERFLOW:
|
||||
case UTF8PROC_ERROR_INVALIDOPTS:
|
||||
excpt_class = rb_eArgError; break;
|
||||
case UTF8PROC_ERROR_INVALIDUTF8:
|
||||
excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
|
||||
case UTF8PROC_ERROR_NOTASSIGNED:
|
||||
excpt_class = utf8proc_ruby_eCodeNotAssignedError; break;
|
||||
default:
|
||||
excpt_class = rb_eRuntimeError;
|
||||
}
|
||||
rb_raise(excpt_class, "%s", utf8proc_errmsg(result));
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) {
|
||||
VALUE str;
|
||||
int options;
|
||||
VALUE env_obj;
|
||||
utf8proc_ruby_mapenv_t *env;
|
||||
ssize_t result;
|
||||
VALUE retval;
|
||||
str = StringValue(str_param);
|
||||
options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM;
|
||||
env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL,
|
||||
utf8proc_ruby_mapenv_free, env);
|
||||
result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str),
|
||||
NULL, 0, options);
|
||||
if (result < 0) {
|
||||
utf8proc_ruby_map_error(result);
|
||||
return Qnil; /* needed to prevent problems with optimization */
|
||||
}
|
||||
env->buffer = ALLOC_N(int32_t, result+1);
|
||||
result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str),
|
||||
env->buffer, result, options);
|
||||
if (result < 0) {
|
||||
free(env->buffer);
|
||||
env->buffer = 0;
|
||||
utf8proc_ruby_map_error(result);
|
||||
return Qnil; /* needed to prevent problems with optimization */
|
||||
}
|
||||
result = utf8proc_reencode(env->buffer, result, options);
|
||||
if (result < 0) {
|
||||
free(env->buffer);
|
||||
env->buffer = 0;
|
||||
utf8proc_ruby_map_error(result);
|
||||
return Qnil; /* needed to prevent problems with optimization */
|
||||
}
|
||||
retval = rb_str_new((char *)env->buffer, result);
|
||||
free(env->buffer);
|
||||
env->buffer = 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) {
|
||||
char buffer[4];
|
||||
ssize_t result;
|
||||
int uc;
|
||||
uc = NUM2INT(code_param);
|
||||
if (!utf8proc_codepoint_valid(uc))
|
||||
rb_raise(rb_eArgError, "Invalid Unicode code point");
|
||||
result = utf8proc_encode_char(uc, buffer);
|
||||
return rb_str_new(buffer, result);
|
||||
}
|
||||
|
||||
#define register_utf8proc_option(sym, field) \
|
||||
rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field))
|
||||
|
||||
void Init_utf8proc_native() {
|
||||
utf8proc_ruby_module = rb_define_module("Utf8Proc");
|
||||
rb_define_module_function(utf8proc_ruby_module, "utf8map",
|
||||
utf8proc_ruby_map, 2);
|
||||
rb_define_module_function(utf8proc_ruby_module, "utf8char",
|
||||
utf8proc_ruby_char, 1);
|
||||
utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module,
|
||||
"UnicodeError", rb_eStandardError);
|
||||
utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under(
|
||||
utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError);
|
||||
utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under(
|
||||
utf8proc_ruby_module, "CodeNotAssignedError",
|
||||
utf8proc_ruby_eUnicodeError);
|
||||
utf8proc_ruby_options = rb_hash_new();
|
||||
register_utf8proc_option("stable", UTF8PROC_STABLE);
|
||||
register_utf8proc_option("compat", UTF8PROC_COMPAT);
|
||||
register_utf8proc_option("compose", UTF8PROC_COMPOSE);
|
||||
register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE);
|
||||
register_utf8proc_option("ignore", UTF8PROC_IGNORE);
|
||||
register_utf8proc_option("rejectna", UTF8PROC_REJECTNA);
|
||||
register_utf8proc_option("nlf2ls", UTF8PROC_NLF2LS);
|
||||
register_utf8proc_option("nlf2ps", UTF8PROC_NLF2PS);
|
||||
register_utf8proc_option("nlf2lf", UTF8PROC_NLF2LF);
|
||||
register_utf8proc_option("stripcc", UTF8PROC_STRIPCC);
|
||||
register_utf8proc_option("casefold", UTF8PROC_CASEFOLD);
|
||||
register_utf8proc_option("charbound", UTF8PROC_CHARBOUND);
|
||||
register_utf8proc_option("lump", UTF8PROC_LUMP);
|
||||
register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK);
|
||||
OBJ_FREEZE(utf8proc_ruby_options);
|
||||
rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user