[Groonga-mysql-commit] mroonga/mroonga [master] use bigram tokenizer as the fallback tokenizer.

Back to archive index

null+****@clear***** null+****@clear*****
2011年 9月 25日 (日) 10:25:55 JST


Kouhei Sutou	2011-09-25 01:25:55 +0000 (Sun, 25 Sep 2011)

  New Revision: 65d608a5f17efce0a7ada0597b697d543098a85b

  Log:
    use bigram tokenizer as the fallback tokenizer.
    
    fixes #592

  Modified files:
    ha_mroonga.cc
    ha_mroonga.h

  Modified: ha_mroonga.cc (+36 -7)
===================================================================
--- ha_mroonga.cc    2011-09-25 01:25:24 +0000 (75f2f37)
+++ ha_mroonga.cc    2011-09-25 01:25:55 +0000 (d511f13)
@@ -1431,10 +1431,10 @@ int ha_mroonga::wrapper_create_index_table(grn_obj *grn_table,
   index_tables[i] = index_table;
 
   grn_info_type info_type = GRN_INFO_DEFAULT_TOKENIZER;
-  grn_obj *token_type = grn_ctx_get(ctx, tmp_share->key_parser[i],
-                                    tmp_share->key_parser_length[i]);
-  grn_obj_set_info(ctx, index_table, info_type, token_type);
-  grn_obj_unlink(ctx, token_type);
+  grn_obj *tokenizer = find_tokenizer(tmp_share->key_parser[i],
+                                      tmp_share->key_parser_length[i]);
+  grn_obj_set_info(ctx, index_table, info_type, tokenizer);
+  grn_obj_unlink(ctx, tokenizer);
 
   grn_obj *index_column = grn_column_create(ctx, index_table,
                                             index_column_name,
@@ -1789,9 +1789,10 @@ int ha_mroonga::storage_create_index(TABLE *table, const char *grn_table_name,
 
     if (key_alg == HA_KEY_ALG_FULLTEXT) {
       grn_info_type info_type = GRN_INFO_DEFAULT_TOKENIZER;
-      grn_obj *token_type = grn_ctx_get(ctx, tmp_share->key_parser[i],
-                                        tmp_share->key_parser_length[i]);
-      grn_obj_set_info(ctx, index_table, info_type, token_type);
+      grn_obj *tokenizer = find_tokenizer(tmp_share->key_parser[i],
+                                          tmp_share->key_parser_length[i]);
+      grn_obj_set_info(ctx, index_table, info_type, tokenizer);
+      grn_obj_unlink(ctx, tokenizer);
     }
 
     grn_obj_flags index_column_flags =
@@ -5195,6 +5196,34 @@ void ha_mroonga::clear_search_result()
   DBUG_VOID_RETURN;
 }
 
+grn_obj *ha_mroonga::find_tokenizer(const char *name, int name_length)
+{
+  MRN_DBUG_ENTER_METHOD();
+  grn_obj *tokenizer;
+  tokenizer = grn_ctx_get(ctx, name, name_length);
+  if (!tokenizer) {
+    char message[MRN_BUFFER_SIZE];
+    sprintf(message,
+            "specified tokenizer <%.*s> doesn't exist. "
+            "default tokenizer <%s> is used instead.",
+            name_length, name,
+            MRN_TOKENIZER_DEFAULT);
+    push_warning(ha_thd(),
+                 MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNSUPPORTED_EXTENSION,
+                 message);
+    tokenizer = grn_ctx_get(ctx,
+                            MRN_TOKENIZER_DEFAULT,
+                            strlen(MRN_TOKENIZER_DEFAULT));
+  }
+  if (!tokenizer) {
+    push_warning(ha_thd(),
+                 MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNSUPPORTED_EXTENSION,
+                 "couldn't find tokenizer. use bigram tokenizer instead.");
+    tokenizer = grn_ctx_at(ctx, GRN_DB_BIGRAM);
+  }
+  DBUG_RETURN(tokenizer);
+}
+
 int ha_mroonga::storage_get_next_record(uchar *buf)
 {
   MRN_DBUG_ENTER_METHOD();

  Modified: ha_mroonga.h (+1 -0)
===================================================================
--- ha_mroonga.h    2011-09-25 01:25:24 +0000 (d957332)
+++ ha_mroonga.h    2011-09-25 01:25:55 +0000 (cef0959)
@@ -324,6 +324,7 @@ protected:
 private:
   void push_warning_unsupported_spatial_index_search(enum ha_rkey_function flag);
   void clear_search_result();
+  grn_obj *find_tokenizer(const char *name, int name_length);
   int storage_get_next_record(uchar *buf);
   grn_obj *storage_geo_select_in_rectangle(grn_obj *index_column,
                                            const uchar *rectangle);




Groonga-mysql-commit メーリングリストの案内
Back to archive index