[Groonga-commit] groonga/groonga at 27eee0a [master] highlighter: fix wrong character length detection

Back to archive index

Kouhei Sutou null+****@clear*****
Tue May 22 16:39:29 JST 2018


Kouhei Sutou	2018-05-22 16:39:29 +0900 (Tue, 22 May 2018)

  New Revision: 27eee0a827203a927fb42873ea161b2806a410b6
  https://github.com/groonga/groonga/commit/27eee0a827203a927fb42873ea161b2806a410b6

  Message:
    highlighter: fix wrong character length detection

  Added files:
    test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.expected
    test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.test
  Modified files:
    lib/highlighter.c

  Modified: lib/highlighter.c (+2 -9)
===================================================================
--- lib/highlighter.c    2018-05-22 16:38:48 +0900 (d9e730c3c)
+++ lib/highlighter.c    2018-05-22 16:39:29 +0900 (9c27b6b60)
@@ -455,7 +455,6 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx,
                                   grn_obj *output)
 {
   grn_token_cursor *cursor;
-  grn_encoding encoding = highlighter->lexicon.encoding;
   grn_obj *token_ids = &(highlighter->lexicon.token_ids);
   grn_obj *token_locations = &(highlighter->lexicon.token_locations);
   grn_obj *candidates = &(highlighter->lexicon.candidates);
@@ -487,15 +486,9 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx,
     GRN_RECORD_PUT(ctx, token_ids, token_id);
     location.offset = grn_token_get_source_offset(ctx, token);
     location.length = grn_token_get_source_length(ctx, token);
+    location.first_character_length =
+      grn_token_get_source_first_character_length(ctx, token);
     location.have_overlap = grn_token_have_overlap(ctx, token);
-    {
-      const char *data;
-      size_t data_length;
-
-      data = grn_token_get_data_raw(ctx, token, &data_length);
-      location.first_character_length =
-        grn_charlen_(ctx, data, data + data_length, encoding);
-    }
     GRN_TEXT_PUT(ctx, token_locations, &location, sizeof(location));
   }
   grn_token_cursor_close(ctx, cursor);

  Added: test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.expected (+37 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.expected    2018-05-22 16:39:29 +0900 (962e6f444)
@@ -0,0 +1,37 @@
+table_create Entries TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Entries body COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer 'TokenNgram("loose_symbol", true,                                   "report_source_location", true)'   --normalizer 'NormalizerNFKC100("unify_hyphen_and_prolonged_sound_mark", true)'
+[[0,0.0,0.0],true]
+column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body
+[[0,0.0,0.0],true]
+load --table Entries
+[
+{"body": "[クリアコード]"}
+]
+[[0,0.0,0.0],1]
+select Entries   --match_columns body   --query 'クリアコード'   --output_columns 'highlight_html(body, Terms)'
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        1
+      ],
+      [
+        [
+          "highlight_html",
+          null
+        ]
+      ],
+      [
+        "<span class=\"keyword\">[クリアコード</span>]"
+      ]
+    ]
+  ]
+]

  Added: test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.test (+19 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.test    2018-05-22 16:39:29 +0900 (fb4556c79)
@@ -0,0 +1,19 @@
+
+table_create Entries TABLE_NO_KEY
+column_create Entries body COLUMN_SCALAR ShortText
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer 'TokenNgram("loose_symbol", true, \
+                                  "report_source_location", true)' \
+  --normalizer 'NormalizerNFKC100("unify_hyphen_and_prolonged_sound_mark", true)'
+column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body
+
+load --table Entries
+[
+{"body": "[クリアコード]"}
+]
+
+select Entries \
+  --match_columns body \
+  --query 'クリアコード' \
+  --output_columns 'highlight_html(body, Terms)'
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180522/3567b240/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index