[Groonga-commit] groonga/groonga at 21c578c [support-index-section-in-match-columns] Support index section in match_columns

Back to archive index

Kouhei Sutou null+****@clear*****
Wed May 15 11:32:53 JST 2013


Kouhei Sutou	2013-05-15 11:32:53 +0900 (Wed, 15 May 2013)

  New Revision: 21c578cf6bfd7fbc9d6b71036e51b1c8162a7fb2
  https://github.com/groonga/groonga/commit/21c578cf6bfd7fbc9d6b71036e51b1c8162a7fb2

  Message:
    Support index section in match_columns
    
    It supports:
    
        select Documents \
          --match_columns "Lexicon.index.section1 * 10 || Lexicon.index.section2" \
          --query "QUERY" \
          --output_columns "content, _score"
        # -> _score = (# of matches against section1) * 10 + (# of matches against section2)

  Added files:
    test/command/suite/select/match_columns/section.test
  Modified files:
    lib/db.c
    lib/expr.c

  Modified: lib/db.c (+69 -1)
===================================================================
--- lib/db.c    2013-05-15 10:47:03 +0900 (9f7bc63)
+++ lib/db.c    2013-05-15 11:32:53 +0900 (7bbd7b5)
@@ -2557,12 +2557,17 @@ grn_obj_search_accessor(grn_ctx *ctx, grn_obj *obj, grn_obj *query,
   grn_obj *last_obj = NULL;
   int n_accessors;
 
-  n_accessors = 0;
   for (a = (grn_accessor *)obj; a; a = a->next) {
     if (!a->next) {
       last_obj = a->obj;
     }
+  }
+  n_accessors = 0;
+  for (a = (grn_accessor *)obj; a; a = a->next) {
     n_accessors++;
+    if (a->obj->header.type == GRN_COLUMN_INDEX) {
+      break;
+    }
   }
 
   {
@@ -8393,6 +8398,63 @@ is_valid_index(grn_ctx *ctx, grn_obj *index_column, grn_operator op)
   }
 }
 
+static int
+find_section(grn_ctx *ctx, grn_obj *index_column, grn_obj *indexed_column)
+{
+  int section = 0;
+  grn_id indexed_column_id;
+  grn_id *source_ids;
+  int i, n_source_ids;
+
+  indexed_column_id = DB_OBJ(indexed_column)->id;
+
+  source_ids = DB_OBJ(index_column)->source;
+  n_source_ids = DB_OBJ(index_column)->source_size / sizeof(grn_id);
+  for (i = 0; i < n_source_ids; i++) {
+    grn_id source_id = source_ids[i];
+    if (source_id == indexed_column_id) {
+      section = i + 1;
+      break;
+    }
+  }
+
+  return section;
+}
+
+static int
+grn_column_index_accessor_index_column(grn_ctx *ctx, grn_accessor *a,
+                                       grn_operator op,
+                                       grn_obj **indexbuf, int buf_size,
+                                       int *section)
+{
+  grn_obj *index_column = a->obj;
+
+  if (!is_valid_index(ctx, index_column, op)) {
+    return 0;
+  }
+
+  if (a->next) {
+    int specified_section;
+    grn_bool is_invalid_section;
+    if (a->next->next) {
+      return 0;
+    }
+    specified_section = find_section(ctx, index_column, a->next->obj);
+    is_invalid_section = (specified_section == 0);
+    if (is_invalid_section) {
+      return 0;
+    }
+    if (section) {
+      *section = specified_section;
+    }
+  }
+  if (buf_size > 0) {
+    *indexbuf = index_column;
+  }
+
+  return 1;
+}
+
 static inline int
 grn_column_index_accessor(grn_ctx *ctx, grn_obj *obj, grn_operator op,
                           grn_obj **indexbuf, int buf_size, int *section)
@@ -8406,6 +8468,12 @@ grn_column_index_accessor(grn_ctx *ctx, grn_obj *obj, grn_operator op,
     grn_bool found = GRN_FALSE;
     grn_hook_entry entry = -1;
 
+    if (a->action == GRN_ACCESSOR_GET_COLUMN_VALUE &&
+        a->obj->header.type == GRN_COLUMN_INDEX) {
+      return grn_column_index_accessor_index_column(ctx, a, op, indexbuf,
+                                                    buf_size, section);
+    }
+
     switch (a->action) {
     case GRN_ACCESSOR_GET_KEY :
       entry = GRN_HOOK_INSERT;

  Modified: lib/expr.c (+36 -1)
===================================================================
--- lib/expr.c    2013-05-15 10:47:03 +0900 (67fa332)
+++ lib/expr.c    2013-05-15 11:32:53 +0900 (5657407)
@@ -5679,6 +5679,41 @@ get_string(grn_ctx *ctx, efs_info *q)
   return rc;
 }
 
+static grn_obj *
+resolve_top_level_name(grn_ctx *ctx, const char *name, unsigned int name_size)
+{
+  unsigned int i;
+  unsigned int first_delimiter_position = 0;
+  unsigned int n_delimiters = 0;
+  grn_obj *top_level_object;
+  grn_obj *object;
+
+  for (i = 0; i < name_size; i++) {
+    if (name[i] != GRN_DB_DELIMITER) {
+      continue;
+    }
+
+    if (n_delimiters == 0) {
+      first_delimiter_position = i;
+    }
+    n_delimiters++;
+  }
+
+  if (n_delimiters < 2) {
+    return grn_ctx_get(ctx, name, name_size);
+  }
+
+  top_level_object = grn_ctx_get(ctx, name, first_delimiter_position);
+  if (!top_level_object) {
+    return NULL;
+  }
+  object = grn_obj_column(ctx, top_level_object,
+                          name + first_delimiter_position + 1,
+                          name_size - first_delimiter_position - 1);
+  grn_obj_unlink(ctx, top_level_object);
+  return object;
+}
+
 static grn_rc
 get_identifier(grn_ctx *ctx, efs_info *q)
 {
@@ -5767,7 +5802,7 @@ done :
       grn_expr_append_obj(ctx, q->e, obj, GRN_OP_GET_VALUE, 1);
       goto exit;
     }
-    if ((obj = grn_ctx_get(ctx, name, name_size))) {
+    if ((obj = resolve_top_level_name(ctx, name, name_size))) {
       PARSE(GRN_EXPR_TOKEN_IDENTIFIER);
       grn_expr_append_obj(ctx, q->e, obj, GRN_OP_PUSH, 1);
       goto exit;

  Added: test/command/suite/select/match_columns/section.test (+22 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/match_columns/section.test    2013-05-15 11:32:53 +0900 (b796634)
@@ -0,0 +1,22 @@
+table_create Memos TABLE_NO_KEY
+column_create Memos title COLUMN_SCALAR ShortText
+column_create Memos content COLUMN_SCALAR ShortText
+
+table_create Lexicon TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
+column_create Lexicon memo_index COLUMN_INDEX|WITH_POSITION|WITH_SECTION \
+  Memos title,content
+
+load --table Memos
+[
+["title", "content"],
+["groonga", "Start groonga!"],
+["mroonga", "Start mroonga!"],
+["rroonga", "Start rroonga!"],
+["Ruby", "Start Ruby!"],
+["learn", "Learning Ruby and groonga..."]
+]
+
+select Memos \
+  --match_columns "Lexicon.memo_index.title * 10 || Lexicon.memo_index.content" \
+  --query rroonga \
+  --output_columns "title, content, _score"
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index