Tasuku SUENAGA a.k.a. gunyarakun
null+****@clear*****
Thu Feb 5 22:18:44 JST 2015
Tasuku SUENAGA a.k.a. gunyarakun 2015-02-05 22:18:44 +0900 (Thu, 05 Feb 2015) New Revision: 6b4e5e1c2bbd9d5f46b918e4e710d61da972a106 https://github.com/ranguba/rroonga/commit/6b4e5e1c2bbd9d5f46b918e4e710d61da972a106 Merged 8634043: Merge pull request #44 from ranguba/feature/normalize-flag Message: Add flags for Groonga::Normalizer#normalize Modified files: doc/po/ja.po ext/groonga/rb-grn-normalizer.c test/test-normalizer.rb Modified: doc/po/ja.po (+4 -2) =================================================================== --- doc/po/ja.po 2015-01-31 14:08:07 +0900 (f2265ae) +++ doc/po/ja.po 2015-02-05 22:18:44 +0900 (bc593bc) @@ -5779,8 +5779,10 @@ msgstr "文字列を正規化します。" # @example msgid "" -"# Normalizes \"ABC\" with the default normalizer\n" -"Groonga::Normalizer.normalize(\"AbC\") # => \"abc\"" +"# Normalizes \"AbC DeF\" with the default normalizer (removing spaces)\n" +"Groonga::Normalizer.normalize(\"AbC DeF\") # => \"abcdef\"" +"# Normalizes \"AbC DeF\" without removing spaces\n" +"Groonga::Normalizer.normalize(\"AbC DeF\", 0) # => \"abc def\"" msgstr "" # @overload Modified: ext/groonga/rb-grn-normalizer.c (+21 -6) =================================================================== --- ext/groonga/rb-grn-normalizer.c 2015-01-31 14:08:07 +0900 (47fc02f) +++ ext/groonga/rb-grn-normalizer.c 2015-02-05 22:18:44 +0900 (58a68a2) @@ -40,7 +40,7 @@ VALUE rb_cGrnNormalizer; * @param [String] string The original string */ static VALUE -rb_grn_normalizer_s_normalize (VALUE klass, VALUE rb_string) +rb_grn_normalizer_s_normalize (int argc, VALUE *argv, VALUE klass) { VALUE rb_context = Qnil; VALUE rb_encoded_string; @@ -48,19 +48,30 @@ rb_grn_normalizer_s_normalize (VALUE klass, VALUE rb_string) grn_ctx *context = NULL; grn_obj *grn_string; grn_obj *normalizer = GRN_NORMALIZER_AUTO; - /* TODO: make customizable */ - int flags = GRN_STRING_REMOVE_BLANK; + int flags = 0; const char *normalized_string; unsigned int normalized_string_length; + if (argc != 1 && argc != 2) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } else if (TYPE(argv[0]) != T_STRING) { + rb_raise(rb_eArgError, "argument 0 should be a string to be normalized"); + } else if (argc == 1) { + flags = GRN_STRING_REMOVE_BLANK; + } else if (TYPE(argv[1]) == T_FIXNUM) { + flags = FIX2INT(argv[1]); + } else { + rb_raise(rb_eArgError, "argument 1 should be a flag defined in Groonga::Normalizer class"); + } + context = rb_grn_context_ensure(&rb_context); - rb_encoded_string = rb_grn_context_rb_string_encode(context, rb_string); + rb_encoded_string = rb_grn_context_rb_string_encode(context, argv[0]); grn_string = grn_string_open(context, RSTRING_PTR(rb_encoded_string), RSTRING_LEN(rb_encoded_string), normalizer, flags); - rb_grn_context_check(context, rb_string); + rb_grn_context_check(context, argv[0]); grn_string_get_normalized(context, grn_string, &normalized_string, &normalized_string_length, NULL); @@ -79,5 +90,9 @@ rb_grn_init_normalizer (VALUE mGrn) rb_cGrnNormalizer = rb_define_class_under(mGrn, "Normalizer", rb_cObject); rb_define_singleton_method(rb_cGrnNormalizer, "normalize", - rb_grn_normalizer_s_normalize, 1); + rb_grn_normalizer_s_normalize, -1); + rb_define_const(rb_cGrnNormalizer, "REMOVE_BLANK", + INT2FIX(GRN_STRING_REMOVE_BLANK)); + rb_define_const(rb_cGrnNormalizer, "REMOVE_TOKENIZED_DELIMITER", + INT2FIX(GRN_STRING_REMOVE_TOKENIZED_DELIMITER)); } Modified: test/test-normalizer.rb (+24 -0) =================================================================== --- test/test-normalizer.rb 2015-01-31 14:08:07 +0900 (74224b4) +++ test/test-normalizer.rb 2015-02-05 22:18:44 +0900 (7333d68) @@ -21,4 +21,28 @@ class NormalizerTest < Test::Unit::TestCase def test_normalize assert_equal("abc", Groonga::Normalizer.normalize("AbC")) end + + def test_normalize_with_space + assert_equal("abcdefgh", Groonga::Normalizer.normalize("AbC Def gh")) + end + + def test_normalize_with_space_explicitly + assert_equal("abcdefgh", + Groonga::Normalizer.normalize("AbC Def gh", Groonga::Normalizer::REMOVE_BLANK)) + end + + def test_normalize_group_text + assert_equal("キロメートルキロメートルキロメートルキロメートル", + Groonga::Normalizer.normalize("㌖㌖㌖㌖")); + end + + def test_normalize_keep_space + # full width space => half width space + assert_equal("abc def gh", + Groonga::Normalizer.normalize("AbC Def gh", 0)) + end + + def test_normalize_tilda + assert_equal("~~~", Groonga::Normalizer.normalize("~~〜")) + end end -------------- next part -------------- HTML����������������������������... Download