null+****@clear*****
null+****@clear*****
2012年 3月 12日 (月) 15:36:07 JST
Kouhei Sutou 2012-03-12 15:36:07 +0900 (Mon, 12 Mar 2012)
New Revision: 8a333f32a5767edc18d5d3c86682071941bbdfbb
Log:
suggest: don't learn duplicated terms for suggest
Added files:
test/function/suite/suggest/suggest/learn-duplicated.expected
test/function/suite/suggest/suggest/learn-duplicated.test
Modified files:
plugins/suggest/suggest.c
Modified: plugins/suggest/suggest.c (+15 -1)
===================================================================
--- plugins/suggest/suggest.c 2012-03-12 15:24:02 +0900 (8f2fb05)
+++ plugins/suggest/suggest.c 2012-03-12 15:36:07 +0900 (7147ed5)
@@ -787,6 +787,7 @@ learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner)
grn_id tid;
grn_obj *pre_item = &(learner->pre_item);
grn_obj *post_item = learner->post_item;
+ grn_hash *token_ids = NULL;
while ((tid = grn_token_next(ctx, token)) && tid != learner->post_item_id) {
uint64_t key;
int added;
@@ -801,7 +802,20 @@ learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner)
grn_obj_set_value(ctx, learner->pairs_post, pair_id,
post_item, GRN_OBJ_SET);
}
- learner_increment(ctx, learner, learner->pairs_freq2, pair_id);
+ if (!token_ids) {
+ token_ids = grn_hash_create(ctx, NULL, sizeof(grn_id), 0,
+ GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY);
+ }
+ if (token_ids) {
+ int token_added;
+ grn_hash_add(ctx, token_ids, &tid, sizeof(grn_id), NULL, &token_added);
+ if (token_added) {
+ learner_increment(ctx, learner, learner->pairs_freq2, pair_id);
+ }
+ }
+ }
+ if (token_ids) {
+ grn_hash_close(ctx, token_ids);
}
grn_token_close(ctx, token);
}
Added: test/function/suite/suggest/suggest/learn-duplicated.expected (+37 -0) 100644
===================================================================
--- /dev/null
+++ test/function/suite/suggest/suggest/learn-duplicated.expected 2012-03-12 15:36:07 +0900 (d50a54f)
@@ -0,0 +1,37 @@
+load --table event_query --each 'suggest_preparer(_id, type, item, sequence, time, pair_query)'
+[
+{"sequence": "1", "time": 1312950803.86057, "item": "engine engine engine", "type": "submit"},
+{"sequence": "2", "time": 1312950803.96857, "item": "engine engine engine", "type": "submit"}
+]
+[[0,0.0,0.0],2]
+suggest --table item_query --column kana --types suggest --query engine --frequency_threshold 0 --conditional_probability_threshold 3
+[[0,0.0,0.0],{"suggest":[[0],[["_key","ShortText"],["_score","Int32"]]]}]
+suggest --table item_query --column kana --types suggest --query engine --frequency_threshold 0 --conditional_probability_threshold 1
+[
+ [
+ 0,
+ 0.0,
+ 0.0
+ ],
+ {
+ "suggest": [
+ [
+ 1
+ ],
+ [
+ [
+ "_key",
+ "ShortText"
+ ],
+ [
+ "_score",
+ "Int32"
+ ]
+ ],
+ [
+ "engine engine engine",
+ 2
+ ]
+ ]
+ }
+]
Added: test/function/suite/suggest/suggest/learn-duplicated.test (+25 -0) 100644
===================================================================
--- /dev/null
+++ test/function/suite/suggest/suggest/learn-duplicated.test 2012-03-12 15:36:07 +0900 (3caad5f)
@@ -0,0 +1,25 @@
+# disable-logging
+# suggest-create-dataset query
+# enable-logging
+
+load --table event_query --each 'suggest_preparer(_id, type, item, sequence, time, pair_query)'
+[
+{"sequence": "1", "time": 1312950803.86057, "item": "engine engine engine", "type": "submit"},
+{"sequence": "2", "time": 1312950803.96857, "item": "engine engine engine", "type": "submit"}
+]
+
+suggest \
+ --table item_query \
+ --column kana \
+ --types suggest \
+ --query engine \
+ --frequency_threshold 0 \
+ --conditional_probability_threshold 3
+
+suggest \
+ --table item_query \
+ --column kana \
+ --types suggest \
+ --query engine \
+ --frequency_threshold 0 \
+ --conditional_probability_threshold 1