[Groonga-commit] ranguba/rroonga at 6b4e5e1 [master] Add flags for Groonga::Normalizer#normalize

Back to archive index

Tasuku SUENAGA a.k.a. gunyarakun null+****@clear*****
Thu Feb 5 22:18:44 JST 2015


Tasuku SUENAGA a.k.a. gunyarakun	2015-02-05 22:18:44 +0900 (Thu, 05 Feb 2015)

  New Revision: 6b4e5e1c2bbd9d5f46b918e4e710d61da972a106
  https://github.com/ranguba/rroonga/commit/6b4e5e1c2bbd9d5f46b918e4e710d61da972a106

  Merged 8634043: Merge pull request #44 from ranguba/feature/normalize-flag

  Message:
    Add flags for Groonga::Normalizer#normalize

  Modified files:
    doc/po/ja.po
    ext/groonga/rb-grn-normalizer.c
    test/test-normalizer.rb

  Modified: doc/po/ja.po (+4 -2)
===================================================================
--- doc/po/ja.po    2015-01-31 14:08:07 +0900 (f2265ae)
+++ doc/po/ja.po    2015-02-05 22:18:44 +0900 (bc593bc)
@@ -5779,8 +5779,10 @@ msgstr "文字列を正規化します。"
 
 # @example
 msgid ""
-"# Normalizes \"ABC\" with the default normalizer\n"
-"Groonga::Normalizer.normalize(\"AbC\") # => \"abc\""
+"# Normalizes \"AbC DeF\" with the default normalizer (removing spaces)\n"
+"Groonga::Normalizer.normalize(\"AbC DeF\") # => \"abcdef\""
+"# Normalizes \"AbC DeF\" without removing spaces\n"
+"Groonga::Normalizer.normalize(\"AbC DeF\", 0) # => \"abc def\""
 msgstr ""
 
 # @overload

  Modified: ext/groonga/rb-grn-normalizer.c (+21 -6)
===================================================================
--- ext/groonga/rb-grn-normalizer.c    2015-01-31 14:08:07 +0900 (47fc02f)
+++ ext/groonga/rb-grn-normalizer.c    2015-02-05 22:18:44 +0900 (58a68a2)
@@ -40,7 +40,7 @@ VALUE rb_cGrnNormalizer;
  *   @param [String] string The original string
  */
 static VALUE
-rb_grn_normalizer_s_normalize (VALUE klass, VALUE rb_string)
+rb_grn_normalizer_s_normalize (int argc, VALUE *argv, VALUE klass)
 {
     VALUE rb_context = Qnil;
     VALUE rb_encoded_string;
@@ -48,19 +48,30 @@ rb_grn_normalizer_s_normalize (VALUE klass, VALUE rb_string)
     grn_ctx *context = NULL;
     grn_obj *grn_string;
     grn_obj *normalizer = GRN_NORMALIZER_AUTO;
-    /* TODO: make customizable */
-    int flags = GRN_STRING_REMOVE_BLANK;
+    int flags = 0;
     const char *normalized_string;
     unsigned int normalized_string_length;
 
+    if (argc != 1 && argc != 2) {
+        rb_raise(rb_eArgError, "wrong number of arguments");
+    } else if (TYPE(argv[0]) != T_STRING) {
+        rb_raise(rb_eArgError, "argument 0 should be a string to be normalized");
+    } else if (argc == 1) {
+        flags = GRN_STRING_REMOVE_BLANK;
+    } else if (TYPE(argv[1]) == T_FIXNUM) {
+        flags = FIX2INT(argv[1]);
+    } else {
+        rb_raise(rb_eArgError, "argument 1 should be a flag defined in Groonga::Normalizer class");
+    }
+
     context = rb_grn_context_ensure(&rb_context);
-    rb_encoded_string = rb_grn_context_rb_string_encode(context, rb_string);
+    rb_encoded_string = rb_grn_context_rb_string_encode(context, argv[0]);
     grn_string = grn_string_open(context,
                                  RSTRING_PTR(rb_encoded_string),
                                  RSTRING_LEN(rb_encoded_string),
                                  normalizer,
                                  flags);
-    rb_grn_context_check(context, rb_string);
+    rb_grn_context_check(context, argv[0]);
     grn_string_get_normalized(context, grn_string,
                               &normalized_string, &normalized_string_length,
                               NULL);
@@ -79,5 +90,9 @@ rb_grn_init_normalizer (VALUE mGrn)
     rb_cGrnNormalizer = rb_define_class_under(mGrn, "Normalizer", rb_cObject);
 
     rb_define_singleton_method(rb_cGrnNormalizer, "normalize",
-                               rb_grn_normalizer_s_normalize, 1);
+                               rb_grn_normalizer_s_normalize, -1);
+    rb_define_const(rb_cGrnNormalizer, "REMOVE_BLANK",
+                    INT2FIX(GRN_STRING_REMOVE_BLANK));
+    rb_define_const(rb_cGrnNormalizer, "REMOVE_TOKENIZED_DELIMITER",
+                    INT2FIX(GRN_STRING_REMOVE_TOKENIZED_DELIMITER));
 }

  Modified: test/test-normalizer.rb (+24 -0)
===================================================================
--- test/test-normalizer.rb    2015-01-31 14:08:07 +0900 (74224b4)
+++ test/test-normalizer.rb    2015-02-05 22:18:44 +0900 (7333d68)
@@ -21,4 +21,28 @@ class NormalizerTest < Test::Unit::TestCase
   def test_normalize
     assert_equal("abc", Groonga::Normalizer.normalize("AbC"))
   end
+
+  def test_normalize_with_space
+    assert_equal("abcdefgh", Groonga::Normalizer.normalize("AbC Def gh"))
+  end
+
+  def test_normalize_with_space_explicitly
+    assert_equal("abcdefgh",
+                 Groonga::Normalizer.normalize("AbC Def gh", Groonga::Normalizer::REMOVE_BLANK))
+  end
+
+  def test_normalize_group_text
+    assert_equal("キロメートルキロメートルキロメートルキロメートル",
+                 Groonga::Normalizer.normalize("㌖㌖㌖㌖"));
+  end
+
+  def test_normalize_keep_space
+    # full width space => half width space
+    assert_equal("abc def gh",
+                 Groonga::Normalizer.normalize("AbC Def gh", 0))
+  end
+
+  def test_normalize_tilda
+    assert_equal("~~~", Groonga::Normalizer.normalize("~~〜"))
+  end
 end
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index