[Groonga-commit] groonga/groonga [master] suggest: use configuration.weight in learning

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Apr 1 12:52:42 JST 2013


Kouhei Sutou	2013-04-01 12:52:42 +0900 (Mon, 01 Apr 2013)

  New Revision: 4976167a0e5f1e8623a63f720f123af48a060724
  https://github.com/groonga/groonga/commit/4976167a0e5f1e8623a63f720f123af48a060724

  Message:
    suggest: use configuration.weight in learning
    
    Before this change, score of a learned item is always increment 1.
    After this change, incremented value can be customized by
    configuration.weight value. If configuration.weight is zero, the
    learning is ignored. If configuration.weight is 2 or more, the
    learning is more important learning.

  Added files:
    test/command/suite/suggest/suggest/learn-weight-0.expected
    test/command/suite/suggest/suggest/learn-weight-0.test
    test/command/suite/suggest/suggest/learn-weight.expected
    test/command/suite/suggest/suggest/learn-weight.test
  Modified files:
    plugins/suggest/suggest.c

  Modified: plugins/suggest/suggest.c (+81 -6)
===================================================================
--- plugins/suggest/suggest.c    2013-03-29 18:52:18 +0900 (b47174c)
+++ plugins/suggest/suggest.c    2013-04-01 12:52:42 +0900 (a4613a0)
@@ -78,7 +78,11 @@ typedef struct {
   grn_obj *pairs_freq1;
   grn_obj *pairs_freq2;
 
-  grn_obj v1;
+  grn_obj dataset_name;
+
+  grn_obj *configuration;
+
+  grn_obj weight;
   grn_obj pre_events;
 
   uint64_t key_prefix;
@@ -698,18 +702,85 @@ learner_fin_columns(grn_ctx *ctx, grn_suggest_learner *learner)
 }
 
 static void
-learner_init_buffers(grn_ctx *ctx, grn_suggest_learner *learner)
+learner_init_weight(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+  grn_obj *weight_column = NULL;
+  unsigned int weight = 1;
+
+  if (learner->configuration) {
+    weight_column = grn_obj_column(ctx,
+                                   learner->configuration,
+                                   CONST_STR_LEN("weight"));
+  }
+  if (weight_column) {
+    grn_id id;
+    id = grn_table_get(ctx, learner->configuration,
+                       GRN_TEXT_VALUE(&(learner->dataset_name)),
+                       GRN_TEXT_LEN(&(learner->dataset_name)));
+    if (id != GRN_ID_NIL) {
+      grn_obj weight_value;
+      GRN_UINT32_INIT(&weight_value, 0);
+      grn_obj_get_value(ctx, weight_column, id, &weight_value);
+      weight = GRN_UINT32_VALUE(&weight_value);
+      GRN_OBJ_FIN(ctx, &weight_value);
+    }
+    grn_obj_unlink(ctx, weight_column);
+  }
+
+  GRN_UINT32_INIT(&(learner->weight), 0);
+  GRN_UINT32_SET(ctx, &(learner->weight), weight);
+}
+
+static void
+learner_init_dataset_name(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+  char events_name[GRN_TABLE_MAX_KEY_SIZE];
+  unsigned int events_name_size;
+  unsigned int events_name_prefix_size;
+
+  events_name_size = grn_obj_name(ctx, learner->events,
+                                  events_name, GRN_TABLE_MAX_KEY_SIZE);
+  GRN_TEXT_INIT(&(learner->dataset_name), 0);
+  events_name_prefix_size = strlen("event_");
+  if (events_name_size > events_name_prefix_size) {
+    GRN_TEXT_PUT(ctx,
+                 &(learner->dataset_name),
+                 events_name + events_name_prefix_size,
+                 events_name_size - events_name_prefix_size);
+  }
+}
+
+static void
+learner_fin_dataset_name(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+  GRN_OBJ_FIN(ctx, &(learner->dataset_name));
+}
+
+static void
+learner_init_configuration(grn_ctx *ctx, grn_suggest_learner *learner)
 {
-  GRN_UINT32_INIT(&(learner->v1), 0);
-  GRN_UINT32_SET(ctx, &(learner->v1), 1);
+  learner->configuration = grn_ctx_get(ctx, "configuration", -1);
+}
 
+static void
+learner_fin_configuration(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+  if (learner->configuration) {
+    grn_obj_unlink(ctx, learner->configuration);
+  }
+}
+
+static void
+learner_init_buffers(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+  learner_init_weight(ctx, learner);
   GRN_RECORD_INIT(&(learner->pre_events), 0, grn_obj_id(ctx, learner->events));
 }
 
 static void
 learner_fin_buffers(grn_ctx *ctx, grn_suggest_learner *learner)
 {
-  grn_obj_unlink(ctx, &(learner->v1));
+  grn_obj_unlink(ctx, &(learner->weight));
   grn_obj_unlink(ctx, &(learner->pre_events));
 }
 
@@ -744,7 +815,7 @@ static void
 learner_increment(grn_ctx *ctx, grn_suggest_learner *learner,
                   grn_obj *column, grn_id record_id)
 {
-  grn_obj_set_value(ctx, column, record_id, &(learner->v1), GRN_OBJ_INCR);
+  grn_obj_set_value(ctx, column, record_id, &(learner->weight), GRN_OBJ_INCR);
 }
 
 static void
@@ -876,6 +947,8 @@ learner_learn(grn_ctx *ctx, grn_suggest_learner *learner)
 {
   if (learner_is_valid_input(ctx, learner)) {
     learner_init_columns(ctx, learner);
+    learner_init_dataset_name(ctx, learner);
+    learner_init_configuration(ctx, learner);
     learner_init_buffers(ctx, learner);
     learner_increment_item_freq(ctx, learner, learner->items_freq);
     learner_set_last_post_time(ctx, learner);
@@ -888,6 +961,8 @@ learner_learn(grn_ctx *ctx, grn_suggest_learner *learner)
     }
     learner_append_post_event(ctx, learner);
     learner_fin_buffers(ctx, learner);
+    learner_fin_configuration(ctx, learner);
+    learner_fin_dataset_name(ctx, learner);
     learner_fin_columns(ctx, learner);
   }
 }

  Added: test/command/suite/suggest/suggest/learn-weight-0.expected (+39 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/suggest/suggest/learn-weight-0.expected    2013-04-01 12:52:42 +0900 (54249e4)
@@ -0,0 +1,39 @@
+load --table configuration
+[
+{"_key": "query", "weight": 0}
+]
+[[0,0.0,0.0],1]
+load --table event_query --each 'suggest_preparer(_id, type, item, sequence, time, pair_query)'
+[
+{"sequence": "1", "time": 1312950803.86057, "item": "engine engine engine", "type": "submit"}
+]
+[[0,0.0,0.0],1]
+suggest   --table item_query   --column kana   --types suggest   --query engine   --frequency_threshold 0   --conditional_probability_threshold 0
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "suggest": [
+      [
+        1
+      ],
+      [
+        [
+          "_key",
+          "ShortText"
+        ],
+        [
+          "_score",
+          "Int32"
+        ]
+      ],
+      [
+        "engine engine engine",
+        0
+      ]
+    ]
+  }
+]

  Added: test/command/suite/suggest/suggest/learn-weight-0.test (+21 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/suggest/suggest/learn-weight-0.test    2013-04-01 12:52:42 +0900 (27968c2)
@@ -0,0 +1,21 @@
+#@disable-logging
+#@suggest-create-dataset query
+#@enable-logging
+
+load --table configuration
+[
+{"_key": "query", "weight": 0}
+]
+
+load --table event_query --each 'suggest_preparer(_id, type, item, sequence, time, pair_query)'
+[
+{"sequence": "1", "time": 1312950803.86057, "item": "engine engine engine", "type": "submit"}
+]
+
+suggest \
+  --table item_query \
+  --column kana \
+  --types suggest \
+  --query engine \
+  --frequency_threshold 0 \
+  --conditional_probability_threshold 0

  Added: test/command/suite/suggest/suggest/learn-weight.expected (+39 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/suggest/suggest/learn-weight.expected    2013-04-01 12:52:42 +0900 (5a78e39)
@@ -0,0 +1,39 @@
+load --table configuration
+[
+{"_key": "query", "weight": 10}
+]
+[[0,0.0,0.0],1]
+load --table event_query --each 'suggest_preparer(_id, type, item, sequence, time, pair_query)'
+[
+{"sequence": "1", "time": 1312950803.86057, "item": "engine engine engine", "type": "submit"}
+]
+[[0,0.0,0.0],1]
+suggest   --table item_query   --column kana   --types suggest   --query engine   --frequency_threshold 0   --conditional_probability_threshold 0
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "suggest": [
+      [
+        1
+      ],
+      [
+        [
+          "_key",
+          "ShortText"
+        ],
+        [
+          "_score",
+          "Int32"
+        ]
+      ],
+      [
+        "engine engine engine",
+        10
+      ]
+    ]
+  }
+]

  Added: test/command/suite/suggest/suggest/learn-weight.test (+21 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/suggest/suggest/learn-weight.test    2013-04-01 12:52:42 +0900 (652171d)
@@ -0,0 +1,21 @@
+#@disable-logging
+#@suggest-create-dataset query
+#@enable-logging
+
+load --table configuration
+[
+{"_key": "query", "weight": 10}
+]
+
+load --table event_query --each 'suggest_preparer(_id, type, item, sequence, time, pair_query)'
+[
+{"sequence": "1", "time": 1312950803.86057, "item": "engine engine engine", "type": "submit"}
+]
+
+suggest \
+  --table item_query \
+  --column kana \
+  --types suggest \
+  --query engine \
+  --frequency_threshold 0 \
+  --conditional_probability_threshold 0
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index