[Groonga-commit] groonga/groonga [master] Fix a bug that UTF-8 normalization computes wrong offset

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Oct 29 18:12:32 JST 2012


Kouhei Sutou	2012-10-29 18:12:32 +0900 (Mon, 29 Oct 2012)

  New Revision: 69ad62eff16203799cce57ef66f8b89db355306b
  https://github.com/groonga/groonga/commit/69ad62eff16203799cce57ef66f8b89db355306b

  Log:
    Fix a bug that UTF-8 normalization computes wrong offset
    
    For example, it causes a problem for snippet. Snippet shows wrong
    location as detected location.
    
    fixes #1531
    
    Reported by Tomoatsu Shimada. Thanks!!!

  Modified files:
    lib/string.c
    test/unit/util/test-snip.c

  Modified: lib/string.c (+3 -1)
===================================================================
--- lib/string.c    2012-10-29 17:27:51 +0900 (6b0cfec)
+++ lib/string.c    2012-10-29 18:12:32 +0900 (0f203bb)
@@ -602,7 +602,9 @@ utf8_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data
       if (cp) { cp--; }
       if (ch) {
         ch -= (d - d_);
-        s_ = s__;
+        if (ch[0] >= 0) {
+          s_ = s__;
+        }
       }
       d = d_;
       length--;

  Modified: test/unit/util/test-snip.c (+30 -0)
===================================================================
--- test/unit/util/test-snip.c    2012-10-29 17:27:51 +0900 (d0748a1)
+++ test/unit/util/test-snip.c    2012-10-29 18:12:32 +0900 (fdd97b5)
@@ -31,6 +31,7 @@ void test_exec_with_empty_string(void);
 void test_exec_with_invalid_argument(void);
 void data_proper_tag_insertion(void);
 void test_proper_tag_insertion(gconstpointer data);
+void test_exec_composed_decomposed_normalize_utf8(void);
 void test_exec_with_normalize(void);
 void test_exec_with_many_results(void);
 void test_customized_tag(void);
@@ -442,6 +443,35 @@ test_exec_with_invalid_argument(void)
 }
 
 void
+test_exec_composed_decomposed_normalize_utf8(void)
+{
+  unsigned int n_results;
+  unsigned int max_tagged_len;
+  unsigned int result_len;
+  const gchar text[] = "Ⅶ¨abcde";
+  const gchar keyword[] = "ab";
+
+  default_encoding = GRN_ENC_UTF8;
+  default_flags = GRN_SNIP_NORMALIZE;
+
+  cut_assert_open_snip();
+  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
+                                    NULL, 0, NULL, 0));
+
+  grn_test_assert(grn_snip_exec(&context, snip,
+                                text, strlen(text),
+                                &n_results, &max_tagged_len));
+  cut_assert_equal_uint(1, n_results);
+  cut_assert_equal_uint(15, max_tagged_len);
+  result = g_new(gchar, max_tagged_len);
+
+  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
+  cut_assert_equal_string("Ⅶ¨[[ab]]cde",
+                          result);
+  cut_assert_equal_uint(14, result_len);
+}
+
+void
 test_exec_with_normalize(void)
 {
   unsigned int n_results;
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index