Kouhei Sutou
null+****@clear*****
Thu Mar 6 18:47:32 JST 2014
Kouhei Sutou 2014-03-06 18:47:32 +0900 (Thu, 06 Mar 2014) New Revision: 811aa70040a7f87c7aaeb07758ef9f757c9a8d27 https://github.com/ranguba/rroonga/commit/811aa70040a7f87c7aaeb07758ef9f757c9a8d27 Message: Follow forward index -> weight vector change in Groonga Modified files: ext/groonga/rb-grn-column.c ext/groonga/rb-grn-index-column.c ext/groonga/rb-grn-object.c ext/groonga/rb-grn-table.c ext/groonga/rb-grn-variable-size-column.c ext/groonga/rb-grn.h lib/groonga/dumper.rb lib/groonga/schema.rb test/test-index-column.rb test/test-schema-dumper.rb test/test-table-dumper.rb test/test-variable-size-column.rb Modified: ext/groonga/rb-grn-column.c (+42 -2) =================================================================== --- ext/groonga/rb-grn-column.c 2014-03-06 18:42:28 +0900 (3ebdf61) +++ ext/groonga/rb-grn-column.c 2014-03-06 18:47:32 +0900 (9bb10e3) @@ -1,7 +1,7 @@ /* -*- coding: utf-8; mode: C; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* vim: set sts=4 sw=4 ts=8 noet: */ /* - Copyright (C) 2009-2011 Kouhei Sutou <kou �� clear-code.com> + Copyright (C) 2009-2014 Kouhei Sutou <kou �� clear-code.com> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -71,10 +71,29 @@ rb_grn_column_bind (RbGrnColumn *rb_column, grn_ctx *context, grn_obj *column) { RbGrnObject *rb_grn_object; + int column_type; + unsigned char value_type; rb_grn_object = RB_GRN_OBJECT(rb_column); rb_grn_named_object_bind(RB_GRN_NAMED_OBJECT(rb_column), context, column); - rb_column->value = grn_obj_open(context, GRN_BULK, 0, + + column_type = (column->header.flags & GRN_OBJ_COLUMN_TYPE_MASK); + if (column_type == GRN_OBJ_COLUMN_VECTOR) { + switch (rb_grn_object->range->header.type) { + case GRN_TABLE_HASH_KEY: + case GRN_TABLE_PAT_KEY: + case GRN_TABLE_DAT_KEY: + case GRN_TABLE_NO_KEY: + value_type = GRN_UVECTOR; + break; + default: + value_type = GRN_VECTOR; + break; + } + } else { + value_type = GRN_BULK; + } + rb_column->value = grn_obj_open(context, value_type, 0, rb_grn_object->range_id); } @@ -675,6 +694,24 @@ rb_grn_column_scalar_p (VALUE self) } /* + * @overload with_weight? + * @returns [Boolean] @true@ if the column is vector and created with + * @:with_weight => true@ flag, @false@ otherwise. + * @since 4.0.1 + */ +static VALUE +rb_grn_column_with_weight_p(VALUE self) +{ + grn_obj *column; + + rb_grn_column_deconstruct(SELF(self), &column, NULL, + NULL, NULL, + NULL, NULL, NULL); + + return CBOOL2RVAL(column->header.flags & GRN_OBJ_WITH_WEIGHT); +} + +/* * _operator_ を実行できる _column_ のインデックスを返す。 * @since 1.0.9 * @return [Array<index_column>] _operator_ を実行できる _column_ のインデックス @@ -772,6 +809,9 @@ rb_grn_init_column (VALUE mGrn) rb_define_method(rb_cGrnColumn, "vector?", rb_grn_column_vector_p, 0); rb_define_method(rb_cGrnColumn, "scalar?", rb_grn_column_scalar_p, 0); + rb_define_method(rb_cGrnColumn, "with_weight?", + rb_grn_column_with_weight_p, 0); + rb_define_method(rb_cGrnColumn, "indexes", rb_grn_column_get_indexes, -1); rb_define_method(rb_cGrnColumn, "rename", rb_grn_column_rename, 1); Modified: ext/groonga/rb-grn-index-column.c (+0 -344) =================================================================== --- ext/groonga/rb-grn-index-column.c 2014-03-06 18:42:28 +0900 (914ec82) +++ ext/groonga/rb-grn-index-column.c 2014-03-06 18:47:32 +0900 (b293975) @@ -103,287 +103,6 @@ rb_grn_index_column_deconstruct (RbGrnIndexColumn *rb_grn_index_column, } /* - * It gets a value of forward index for the record that ID is _id_. - * - * This method is for forward index. You can't use this method for - * inverted index. - * - * @example Gets forward index value - * Groonga::Schema.define do |schema| - * schema.create_table("Tags", - * :type => :hash, - * :key_type => "ShortText") do |table| - * end - * - * schema.create_table("Products", - * :type => :patricia_trie, - * :key_type => "ShortText") do |table| - * # This is forward index. - * # ":with_weight => true" is important to store weight value. - * table.index("Tags", - * :name => "tags", - * :with_weight => true) - * end - * end - * - * products = Groonga["Products"] - * rroonga = products.add("Rroonga") - * rroonga.tags = [ - * { - * :value => "ruby", - * :weight => 100, - * }, - * { - * :value => "groonga", - * :weight => 10, - * }, - * ] - * - * p rroonga.tags - * # => [ - * # {:value => "ruby", :weight => 100}, - * # {:value => "groonga", :weight => 10} - * # ] - * - * @overload [](id) - * @param [Integer, Record] id The record ID. - * @return [Array<Hash<Symbol, String>>] An array of values. - * Each value is a Hash like the following form: - * - * <pre> - * { - * :value => [KEY], - * :weight => [WEIGHT], - * } - * </pre> - * - * @[KEY]@ is the key of the table that is specified as range on - * creating the forward index. - * - * @[WEIGHT]@ is a positive integer. - * - * @since 4.0.1. - */ -static VALUE -rb_grn_index_column_array_reference (VALUE self, VALUE rb_id) -{ - grn_ctx *context = NULL; - grn_obj *column, *range; - grn_id id; - grn_obj *set_value; - VALUE rb_value; - unsigned int i, n; - - rb_grn_index_column_deconstruct(SELF(self), &column, &context, - NULL, NULL, - NULL, NULL, &set_value, - NULL, &range, - NULL, NULL); - - id = RVAL2GRNID(rb_id, context, range, self); - - grn_obj_reinit(context, set_value, - set_value->header.domain, - set_value->header.flags | GRN_OBJ_VECTOR); - grn_obj_get_value(context, column, id, set_value); - rb_grn_context_check(context, self); - - n = grn_vector_size(context, set_value); - rb_value = rb_ary_new2(n); - for (i = 0; i < n; i++) { - const char *value; - unsigned int value_length; - unsigned int weight = 0; - grn_id domain; - VALUE rb_element; - - value_length = grn_vector_get_element(context, set_value, i, - &value, &weight, &domain); - rb_element = rb_hash_new(); - rb_hash_aset(rb_element, - ID2SYM(rb_intern("value")), - rb_str_new(value, value_length)); - rb_hash_aset(rb_element, - ID2SYM(rb_intern("weight")), - UINT2NUM(weight)); - - rb_ary_push(rb_value, rb_element); - } - - return rb_value; -} - -/* - * It updates forward index for the record that ID is _id_. - * - * This method is for forward index. You can't use this method for - * inverted index. - * - * Inverted index is updated automatically. You don't need to - * use this method for inverted index. - * - * You can define a forward index by omitting source on defining an - * index column. - * - * @example Use forward index as matrix search result weight - * Groonga::Schema.define do |schema| - * schema.create_table("Tags", - * :type => :hash, - * :key_type => "ShortText") do |table| - * end - * - * schema.create_table("Products", - * :type => :patricia_trie, - * :key_type => "ShortText") do |table| - * # This is forward index. - * # ":with_weight => true" is important for matrix search result weight. - * table.index("Tags", - * :name => "tags", - * :with_weight => true) - * end - * - * schema.change_table("Tags") do |table| - * # This is inverted index. - * # It is just for tag search. It isn't for matrix search result weight. - * table.index("Products.tags") - * end - * end - * - * products = Groonga["Products"] - * groonga = products.add("Groonga") - * groonga.tags = [ - * { - * :value => "groonga", - * :weight => 100, - * }, - * ] - * rroonga = products.add("Rroonga") - * rroonga.tags = [ - * { - * :value => "ruby", - * :weight => 100, - * }, - * { - * :value => "groonga", - * :weight => 10, - * }, - * ] - * - * result = products.select do |record| - * # Search by "groonga" - * record.match("groonga") do |match_target| - * match_target.tags - * end - * end - * - * result.each do |record| - * p [record.key.key, record.score] - * end - * # Matches all records with weight. - * # => ["Groonga", 100] - * # ["Rroonga", 10] - * - * # Increases score for "ruby" 10 times - * products.select(# The previous search result. Required. - * :result => result, - * # It just adds score to existing records in the result. Required. - * :operator => Groonga::Operator::ADJUST) do |record| - * record.match("ruby") do |target| - * target.tags * 10 # 10 times - * end - * end - * - * result.each do |record| - * p [record.key.key, record.score] - * end - * # Weight is used for increasing score. - * # => ["Groonga", 100] <- Not changed. - * # ["Rroonga", 1010] <- 1000 (= 100 * 10) increased. - * - * @overload []=(id, documents) - * @param [Integer, Record] id The record ID. - * @param [Array<Hash<Symbol, String>>] documents An array of values. - * Each value is a Hash like the following form: - * - * <pre> - * { - * :value => [KEY], - * :weight => [WEIGHT], - * } - * </pre> - * - * @[KEY]@ must be the same type of the key of the table that is - * specified as range on creating the forward index. - * - * @[WEIGHT]@ must be an positive integer. - * - * This method was deprecated since 3.0.2. This method behavior was - * changed since 4.0.1. Use {#add}, {#delete} or {#update} instead for - * old behavior. - */ -static VALUE -rb_grn_index_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value) -{ - grn_ctx *context = NULL; - grn_obj *column, *range; - grn_rc rc; - grn_id id; - grn_obj *new_value, *set_value; - int i, n; - int flags = GRN_OBJ_SET; - - rb_grn_index_column_deconstruct(SELF(self), &column, &context, - NULL, NULL, - &new_value, NULL, &set_value, - NULL, &range, - NULL, NULL); - - id = RVAL2GRNID(rb_id, context, range, self); - - if (!RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cArray))) { - rb_raise(rb_eArgError, - "<%s>: " - "forward index value must be an array of index value: <%s>", - rb_grn_inspect(self), - rb_grn_inspect(rb_value)); - } - - grn_obj_reinit(context, set_value, - set_value->header.domain, - set_value->header.flags | GRN_OBJ_VECTOR); - n = RARRAY_LEN(rb_value); - for (i = 0; i < n; i++) { - unsigned int weight = 0; - VALUE rb_new_value, rb_weight; - - rb_grn_scan_options(RARRAY_PTR(rb_value)[i], - "value", &rb_new_value, - "weight", &rb_weight, - NULL); - - if (!NIL_P(rb_weight)) { - weight = NUM2UINT(rb_weight); - } - - GRN_BULK_REWIND(new_value); - if (!NIL_P(rb_new_value)) { - RVAL2GRNBULK(rb_new_value, context, new_value); - } - - grn_vector_add_element(context, set_value, - GRN_BULK_HEAD(new_value), - GRN_BULK_VSIZE(new_value), - weight, - new_value->header.domain); - } - rc = grn_obj_set_value(context, column, id, set_value, flags); - rb_grn_context_check(context, self); - rb_grn_rc_check(rc, self); - - return rb_value; -} - -/* * Adds a record that has @value@ content to inverted index for fast * fulltext serach. Normally, this method is not used * explicitly. Inverted index for fulltext search is updated @@ -1047,58 +766,6 @@ rb_grn_index_column_with_position_p (VALUE self) } /* - * @overload inverted? - * @returns [Boolean] @true@ if the index column is inverted index, - * @false@ otherwise. - */ -static VALUE -rb_grn_index_column_inverted_p (VALUE self) -{ - grn_ctx *context; - grn_obj *column; - grn_obj sources; - grn_bool inverted_p; - - rb_grn_index_column_deconstruct(SELF(self), &column, &context, - NULL, NULL, - NULL, NULL, NULL, NULL, NULL, - NULL, NULL); - - GRN_RECORD_INIT(&sources, GRN_OBJ_VECTOR, GRN_ID_NIL); - grn_obj_get_info(context, column, GRN_INFO_SOURCE, &sources); - inverted_p = (GRN_BULK_VSIZE(&sources) > 0); - GRN_OBJ_FIN(context, &sources); - - return CBOOL2RVAL(inverted_p); -} - -/* - * @overload forward? - * @returns [Boolean] @true@ if the index column is forward index, - * @false@ otherwise. - */ -static VALUE -rb_grn_index_column_forward_p (VALUE self) -{ - grn_ctx *context; - grn_obj *column; - grn_obj sources; - grn_bool forward_p; - - rb_grn_index_column_deconstruct(SELF(self), &column, &context, - NULL, NULL, - NULL, NULL, NULL, NULL, NULL, - NULL, NULL); - - GRN_RECORD_INIT(&sources, GRN_OBJ_VECTOR, GRN_ID_NIL); - grn_obj_get_info(context, column, GRN_INFO_SOURCE, &sources); - forward_p = (GRN_BULK_VSIZE(&sources) == 0); - GRN_OBJ_FIN(context, &sources); - - return CBOOL2RVAL(forward_p); -} - -/* * Opens cursor to iterate posting in the index column. * * @example @@ -1187,11 +854,6 @@ rb_grn_init_index_column (VALUE mGrn) rb_cGrnIndexColumn = rb_define_class_under(mGrn, "IndexColumn", rb_cGrnColumn); - rb_define_method(rb_cGrnIndexColumn, "[]", - rb_grn_index_column_array_reference, 1); - rb_define_method(rb_cGrnIndexColumn, "[]=", - rb_grn_index_column_array_set, 2); - rb_define_method(rb_cGrnIndexColumn, "add", rb_grn_index_column_add, -1); rb_define_method(rb_cGrnIndexColumn, "delete", @@ -1216,12 +878,6 @@ rb_grn_init_index_column (VALUE mGrn) rb_define_method(rb_cGrnIndexColumn, "with_position?", rb_grn_index_column_with_position_p, 0); - rb_define_method(rb_cGrnIndexColumn, "inverted?", - rb_grn_index_column_inverted_p, 0); - rb_define_method(rb_cGrnIndexColumn, "forward?", - rb_grn_index_column_forward_p, 0); - - rb_define_method(rb_cGrnIndexColumn, "open_cursor", rb_grn_index_column_open_cursor, -1); } Modified: ext/groonga/rb-grn-object.c (+7 -0) =================================================================== --- ext/groonga/rb-grn-object.c 2014-03-06 18:42:28 +0900 (0362d7f) +++ ext/groonga/rb-grn-object.c 2014-03-06 18:47:32 +0900 (33f08b3) @@ -444,6 +444,13 @@ rb_grn_object_assign (VALUE klass, VALUE self, VALUE rb_context, context, object); rb_grn_index_column_bind(RB_GRN_INDEX_COLUMN(rb_grn_object), context, object); + } else if (RVAL2CBOOL(rb_obj_is_kind_of(self, rb_cGrnVariableSizeColumn))) { + rb_grn_object = ALLOC(RbGrnVariableSizeColumn); + rb_grn_object_bind_common(klass, self, rb_context, rb_grn_object, + context, object); + rb_grn_variable_size_column_bind(RB_GRN_VARIABLE_SIZE_COLUMN(rb_grn_object), + context, + object); } else if (RVAL2CBOOL(rb_obj_is_kind_of(self, rb_cGrnColumn))) { rb_grn_object = ALLOC(RbGrnColumn); rb_grn_object_bind_common(klass, self, rb_context, rb_grn_object, Modified: ext/groonga/rb-grn-table.c (+16 -1) =================================================================== --- ext/groonga/rb-grn-table.c 2014-03-06 18:42:28 +0900 (486f6ed) +++ ext/groonga/rb-grn-table.c 2014-03-06 18:47:32 +0900 (24543f6) @@ -226,6 +226,11 @@ rb_grn_table_inspect (VALUE self) * * - +:scalar+ := スカラ値(単独の値)を格納する。 * - +:vector+ := 値の配列を格納する。 + * @option options [Boolean] :with_weight (false) + * It specifies whether making the column weight vector column or not. + * Weight vector column can store weight for each element. + * + * You can't use this option for scalar column. * @option options :compress * 値の圧縮方法を指定する。省略した場合は、圧縮しない。 * @@ -244,7 +249,7 @@ rb_grn_table_define_column (int argc, VALUE *argv, VALUE self) unsigned name_size = 0; grn_obj_flags flags = 0; VALUE rb_name, rb_value_type; - VALUE options, rb_path, rb_persistent, rb_compress, rb_type; + VALUE options, rb_path, rb_persistent, rb_compress, rb_type, rb_with_weight; VALUE columns; VALUE rb_column; @@ -262,6 +267,7 @@ rb_grn_table_define_column (int argc, VALUE *argv, VALUE self) "path", &rb_path, "persistent", &rb_persistent, "type", &rb_type, + "with_weight", &rb_with_weight, "compress", &rb_compress, NULL); @@ -294,6 +300,15 @@ rb_grn_table_define_column (int argc, VALUE *argv, VALUE self) rb_grn_inspect(rb_type)); } + if (RVAL2CBOOL(rb_with_weight)) { + if (flags & GRN_OBJ_COLUMN_VECTOR) { + flags |= GRN_OBJ_WITH_WEIGHT; + } else { + rb_raise(rb_eArgError, + "can't use weight for scalar column"); + } + } + if (NIL_P(rb_compress)) { } else if (rb_grn_equal_option(rb_compress, "zlib")) { flags |= GRN_OBJ_COMPRESS_ZLIB; Modified: ext/groonga/rb-grn-variable-size-column.c (+368 -5) =================================================================== --- ext/groonga/rb-grn-variable-size-column.c 2014-03-06 18:42:28 +0900 (8c9683a) +++ ext/groonga/rb-grn-variable-size-column.c 2014-03-06 18:47:32 +0900 (ed4e7d6) @@ -1,6 +1,6 @@ /* -*- coding: utf-8; mode: C; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* - Copyright (C) 2009-2011 Kouhei Sutou <kou �� clear-code.com> + Copyright (C) 2009-2014 Kouhei Sutou <kou �� clear-code.com> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -18,17 +18,375 @@ #include "rb-grn.h" -#define SELF(object) ((RbGrnColumn *)DATA_PTR(object)) +#define SELF(object) ((RbGrnVariableSizeColumn *)DATA_PTR(object)) VALUE rb_cGrnVariableSizeColumn; +void +rb_grn_variable_size_column_bind (RbGrnVariableSizeColumn *rb_column, + grn_ctx *context, grn_obj *column) +{ + RbGrnObject *rb_grn_object; + int column_type; + unsigned char value_type; + + rb_grn_object = RB_GRN_OBJECT(rb_column); + rb_grn_column_bind(RB_GRN_COLUMN(rb_column), context, column); + + rb_column->element_value = NULL; + column_type = (column->header.flags & GRN_OBJ_COLUMN_TYPE_MASK); + if (column_type != GRN_OBJ_COLUMN_VECTOR) { + return; + } + + switch (rb_grn_object->range->header.type) { + case GRN_TABLE_HASH_KEY: + case GRN_TABLE_PAT_KEY: + case GRN_TABLE_DAT_KEY: + case GRN_TABLE_NO_KEY: + value_type = GRN_UVECTOR; + break; + default: + value_type = GRN_VECTOR; + break; + } + if (column->header.flags & GRN_OBJ_WITH_WEIGHT) { + rb_column->element_value = grn_obj_open(context, value_type, 0, + rb_grn_object->range_id); + } +} + +void +rb_grn_variable_size_column_finalizer (grn_ctx *context, grn_obj *grn_object, + RbGrnVariableSizeColumn *rb_column) +{ + rb_grn_column_finalizer(context, grn_object, + RB_GRN_COLUMN(rb_column)); + if (context && rb_column->element_value) + grn_obj_unlink(context, rb_column->element_value); + rb_column->element_value = NULL; +} + +static void +rb_grn_variable_size_column_deconstruct (RbGrnVariableSizeColumn *rb_column, + grn_obj **column, + grn_ctx **context, + grn_id *domain_id, + grn_obj **domain, + grn_obj **value, + grn_obj **element_value, + grn_id *range_id, + grn_obj **range) +{ + RbGrnColumn *rb_grn_column; + + rb_grn_column = RB_GRN_COLUMN(rb_column); + rb_grn_column_deconstruct(rb_grn_column, column, context, + domain_id, domain, value, + range_id, range); + + if (element_value) + *element_value = rb_column->element_value; +} + /* * Document-class: Groonga::VariableSizeColumn < Groonga::Column * - * 可変長データ用のカラム。 + * A column for variable size data like text family types and vector + * column. */ /* + * It gets a value of variable size column value for the record that + * ID is _id_. + * + * @example Gets weight vector value + * Groonga::Schema.define do |schema| + * schema.create_table("Products", + * :type => :patricia_trie, + * :key_type => "ShortText") do |table| + * # This is weight vector. + * # ":with_weight => true" is important to store weight value. + * table.short_text("tags", + * :type => :vector, + * :with_weight => true) + * end + * end + * + * products = Groonga["Products"] + * rroonga = products.add("Rroonga") + * rroonga.tags = [ + * { + * :value => "ruby", + * :weight => 100, + * }, + * { + * :value => "groonga", + * :weight => 10, + * }, + * ] + * + * p rroonga.tags + * # => [ + * # {:value => "ruby", :weight => 100}, + * # {:value => "groonga", :weight => 10} + * # ] + * + * @overload [](id) + * @param [Integer, Record] id The record ID. + * @return [Array<Hash<Symbol, String>>] An array of value if the column + * is a weight vector column. + * Each value is a Hash like the following form: + * + * <pre> + * { + * :value => [KEY], + * :weight => [WEIGHT], + * } + * </pre> + * + * @[KEY]@ is the key of the table that is specified as range on + * creating the weight vector. + * + * @[WEIGHT]@ is a positive integer. + * + * @return [::Object] See {Groonga::Object#[]} for columns except + * weight vector column. + * + * @since 4.0.1. + */ +static VALUE +rb_grn_variable_size_column_array_reference (VALUE self, VALUE rb_id) +{ + grn_ctx *context = NULL; + grn_obj *column, *range; + grn_id id; + grn_obj *value; + VALUE rb_value; + unsigned int i, n; + + rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context, + NULL, NULL, &value, NULL, + NULL, &range); + + if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) { + return rb_call_super(1, &rb_id); + } + + id = RVAL2GRNID(rb_id, context, range, self); + + grn_obj_reinit(context, value, + value->header.domain, + value->header.flags | GRN_OBJ_VECTOR); + grn_obj_get_value(context, column, id, value); + rb_grn_context_check(context, self); + + n = grn_vector_size(context, value); + rb_value = rb_ary_new2(n); + for (i = 0; i < n; i++) { + const char *element_value; + unsigned int element_value_length; + unsigned int weight = 0; + grn_id domain; + VALUE rb_element; + + element_value_length = grn_vector_get_element(context, + value, + i, + &element_value, + &weight, + &domain); + rb_element = rb_hash_new(); + rb_hash_aset(rb_element, + ID2SYM(rb_intern("value")), + rb_str_new(element_value, element_value_length)); + rb_hash_aset(rb_element, + ID2SYM(rb_intern("weight")), + UINT2NUM(weight)); + + rb_ary_push(rb_value, rb_element); + } + + return rb_value; +} + +/* + * It updates a value of variable size column value for the record + * that ID is _id_. + * + * Weight vector column is a special variable size column. This + * description describes only weight vector column. Other variable + * size column works what you think. + * + * @example Use weight vector as matrix search result weight + * Groonga::Schema.define do |schema| + * schema.create_table("Products", + * :type => :patricia_trie, + * :key_type => "ShortText") do |table| + * # This is weight vector. + * # ":with_weight => true" is important for matrix search result weight. + * table.short_text("Tags", + * :type => :vector, + * :with_weight => true) + * end + * + * schema.create_table("Tags", + * :type => :hash, + * :key_type => "ShortText") do |table| + * # This is inverted index. It also needs ":with_weight => true". + * table.index("Products.tags", :with_weight => true) + * end + * end + * + * products = Groonga["Products"] + * groonga = products.add("Groonga") + * groonga.tags = [ + * { + * :value => "groonga", + * :weight => 100, + * }, + * ] + * rroonga = products.add("Rroonga") + * rroonga.tags = [ + * { + * :value => "ruby", + * :weight => 100, + * }, + * { + * :value => "groonga", + * :weight => 10, + * }, + * ] + * + * result = products.select do |record| + * # Search by "groonga" + * record.match("groonga") do |match_target| + * match_target.tags + * end + * end + * + * result.each do |record| + * p [record.key.key, record.score] + * end + * # Matches all records with weight. + * # => ["Groonga", 100] + * # ["Rroonga", 10] + * + * # Increases score for "ruby" 10 times + * products.select(# The previous search result. Required. + * :result => result, + * # It just adds score to existing records in the result. Required. + * :operator => Groonga::Operator::ADJUST) do |record| + * record.match("ruby") do |target| + * target.tags * 10 # 10 times + * end + * end + * + * result.each do |record| + * p [record.key.key, record.score] + * end + * # Weight is used for increasing score. + * # => ["Groonga", 100] <- Not changed. + * # ["Rroonga", 1010] <- 1000 (= 100 * 10) increased. + * + * @overload []=(id, elements) + * This description is for weight vector column. + * + * @param [Integer, Record] id The record ID. + * @param [Array<Hash<Symbol, String>>] elements An array of values + * for weight vector. + * Each value is a Hash like the following form: + * + * <pre> + * { + * :value => [KEY], + * :weight => [WEIGHT], + * } + * </pre> + * + * @[KEY]@ must be the same type of the key of the table that is + * specified as range on creating the weight vector. + * + * @[WEIGHT]@ must be an positive integer. + * + * @overload []=(id, value) + * This description is for variable size columns except weight + * vector column. + * + * @param [Integer, Record] id The record ID. + * @param [::Object] value A new value. + * @see Groonga::Object#[]= + * + * @since 4.0.1 + */ +static VALUE +rb_grn_variable_size_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value) +{ + grn_ctx *context = NULL; + grn_obj *column, *range; + grn_rc rc; + grn_id id; + grn_obj *value, *element_value; + int i, n; + int flags = GRN_OBJ_SET; + + rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context, + NULL, NULL, &value, &element_value, + NULL, &range); + + if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) { + VALUE args[2]; + args[0] = rb_id; + args[1] = rb_value; + return rb_call_super(2, args); + } + + id = RVAL2GRNID(rb_id, context, range, self); + + if (!RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cArray))) { + rb_raise(rb_eArgError, + "<%s>: " + "weight vector value must be an array of index value: <%s>", + rb_grn_inspect(self), + rb_grn_inspect(rb_value)); + } + + grn_obj_reinit(context, value, + value->header.domain, + value->header.flags | GRN_OBJ_VECTOR); + n = RARRAY_LEN(rb_value); + for (i = 0; i < n; i++) { + unsigned int weight = 0; + VALUE rb_element_value, rb_weight; + + rb_grn_scan_options(RARRAY_PTR(rb_value)[i], + "value", &rb_element_value, + "weight", &rb_weight, + NULL); + + if (!NIL_P(rb_weight)) { + weight = NUM2UINT(rb_weight); + } + + GRN_BULK_REWIND(element_value); + if (!NIL_P(rb_element_value)) { + RVAL2GRNBULK(rb_element_value, context, element_value); + } + + grn_vector_add_element(context, value, + GRN_BULK_HEAD(element_value), + GRN_BULK_VSIZE(element_value), + weight, + element_value->header.domain); + } + rc = grn_obj_set_value(context, column, id, value, flags); + rb_grn_context_check(context, self); + rb_grn_rc_check(rc, self); + + return rb_value; +} + +/* * Returns whether the column is compressed or not. If * @type@ is specified, it returns whether the column is * compressed by @type@ or not. @@ -42,7 +400,7 @@ VALUE rb_cGrnVariableSizeColumn; static VALUE rb_grn_variable_size_column_compressed_p (int argc, VALUE *argv, VALUE self) { - RbGrnColumn *rb_grn_column; + RbGrnVariableSizeColumn *rb_grn_column; grn_ctx *context = NULL; grn_obj *column; grn_obj_flags flags; @@ -112,7 +470,7 @@ rb_grn_variable_size_column_compressed_p (int argc, VALUE *argv, VALUE self) static VALUE rb_grn_variable_size_column_defrag (int argc, VALUE *argv, VALUE self) { - RbGrnColumn *rb_grn_column; + RbGrnVariableSizeColumn *rb_grn_column; grn_ctx *context = NULL; grn_obj *column; int n_segments; @@ -143,6 +501,11 @@ rb_grn_init_variable_size_column (VALUE mGrn) rb_cGrnVariableSizeColumn = rb_define_class_under(mGrn, "VariableSizeColumn", rb_cGrnColumn); + rb_define_method(rb_cGrnVariableSizeColumn, "[]", + rb_grn_variable_size_column_array_reference, 1); + rb_define_method(rb_cGrnVariableSizeColumn, "[]=", + rb_grn_variable_size_column_array_set, 2); + rb_define_method(rb_cGrnVariableSizeColumn, "compressed?", rb_grn_variable_size_column_compressed_p, -1); rb_define_method(rb_cGrnVariableSizeColumn, "defrag", Modified: ext/groonga/rb-grn.h (+15 -0) =================================================================== --- ext/groonga/rb-grn.h 2014-03-06 18:42:28 +0900 (58f94ce) +++ ext/groonga/rb-grn.h 2014-03-06 18:47:32 +0900 (d834ad8) @@ -96,6 +96,7 @@ RB_GRN_BEGIN_DECLS #define RB_GRN_TABLE_KEY_SUPPORT(object) ((RbGrnTableKeySupport *)(object)) #define RB_GRN_TABLE_CURSOR(object) ((RbGrnTableCursort *)(object)) #define RB_GRN_COLUMN(object) ((RbGrnColumn *)(object)) +#define RB_GRN_VARIABLE_SIZE_COLUMN(object) ((RbGrnVariableSizeColumn *)(object)) #define RB_GRN_INDEX_COLUMN(object) ((RbGrnIndexColumn *)(object)) #define RB_GRN_ACCESSOR(object) ((RbGrnAccessor *)(object)) #define RB_GRN_EXPRESSION(object) ((RbGrnExpression *)(object)) @@ -159,6 +160,13 @@ struct _RbGrnColumn grn_obj *value; }; +typedef struct _RbGrnVariableSizeColumn RbGrnVariableSizeColumn; +struct _RbGrnVariableSizeColumn +{ + RbGrnColumn parent; + grn_obj *element_value; +}; + typedef struct _RbGrnIndexColumn RbGrnIndexColumn; struct _RbGrnIndexColumn { @@ -491,6 +499,13 @@ void rb_grn_column_deconstruct (RbGrnColumn *rb_grn_column, grn_id *range_id, grn_obj **range); +void rb_grn_variable_size_column_bind (RbGrnVariableSizeColumn *rb_grn_column, + grn_ctx *context, + grn_obj *column); +void rb_grn_variable_size_column_finalizer(grn_ctx *context, + grn_obj *column, + RbGrnVariableSizeColumn *rb_grn_column); + void rb_grn_index_column_bind (RbGrnIndexColumn *rb_grn_index_column, grn_ctx *context, grn_obj *object); Modified: lib/groonga/dumper.rb (+12 -10) =================================================================== --- lib/groonga/dumper.rb 2014-03-06 18:42:28 +0900 (15ab7e8) +++ lib/groonga/dumper.rb 2014-03-06 18:47:32 +0900 (c1f131a) @@ -97,7 +97,7 @@ module Groonga options[:database].each(each_options(:order_by => :key)) do |object| next unless object.is_a?(Groonga::Table) next if object.size.zero? - next if inverted_index_only_table?(object) + next if index_only_table?(object) next if target_table?(options[:exclude_tables], object, false) next unless target_table?(options[:tables], object, true) options[:output].write("\n") if !first_table or options[:dump_schema] @@ -120,10 +120,10 @@ module Groonga output.write("register #{plugin_name}\n") end - def inverted_index_only_table?(table) + def index_only_table?(table) return false if table.columns.empty? table.columns.all? do |column| - column.index? and column.inverted? + column.index? end end @@ -524,6 +524,7 @@ module Groonga def column_options(column) options = {} options[:type] = :vector if column.vector? + options[:with_weight] = true if column.with_weight? return nil if options.empty? dump_options(options) @@ -608,6 +609,7 @@ module Groonga elsif column.vector? flags << "COLUMN_VECTOR" end + flags << "WITH_WEIGHT" if column.with_weight? # TODO: support COMPRESS_ZLIB and COMPRESS_LZO? parameters << "#{flags.join('|')}" parameters << "#{column.range.name}" @@ -700,8 +702,8 @@ module Groonga def resolve_value(record, column, value) case value when ::Array - if column.index? - resolve_forward_index_value(record, column, value) + if column.vector? and column.with_weight? + resolve_weight_vector_value(record, column, value) else value.collect do |v| resolve_value(record, column, v) @@ -743,17 +745,17 @@ module Groonga end end - def resolve_forward_index_value(record, column, entries) - resolved_forward_index_entries = {} + def resolve_weight_vector_value(record, column, entries) + resolved_weight_vector_entries = {} sorted_entries = entries.sort_by do |entry| entry[:value] end sorted_entries.each do |entry| resolved_value = resolve_value(record, column, entry[:value]) resolved_weight = resolve_value(record, column, entry[:weight]) - resolved_forward_index_entries[resolved_value] = resolved_weight + resolved_weight_vector_entries[resolved_value] = resolved_weight end - resolved_forward_index_entries + resolved_weight_vector_entries end def fix_encoding(value) @@ -779,7 +781,7 @@ module Groonga end columns << @table.column("_value") unles****@table*****? data_columns =****@table***** do |column| - column.index? and column.inverted? + column.index? end sorted_columns = data_columns.sort_by do |column| column.local_name Modified: lib/groonga/schema.rb (+15 -1) =================================================================== --- lib/groonga/schema.rb 2014-03-06 18:42:28 +0900 (e290d51) +++ lib/groonga/schema.rb 2014-03-06 18:47:32 +0900 (6624bfd) @@ -1010,6 +1010,11 @@ module Groonga # # - :scalar := スカラ値(単独の値)を格納する。 # - :vector := 値の配列を格納する。 + # @option options [Boolean] :with_weight (false) + # It specifies whether making the column weight vector column or not. + # Weight vector column can store weight for each element. + # + # You can't use this option for scalar column. # @option options :compress # # 値の圧縮方法を指定する。省略した場合は、圧縮しない。 @@ -1729,13 +1734,22 @@ module Groonga end def same_column?(context, column) - column.range == resolved_type(context) + return false unless column.range == resolved_type(context) + if column.scalar? + [nil, :scalar].include?(@options[:type]) + else + return false unless @options[:type] == :vector + with_weight = @options[:with_weight] + with_weight = false if with_weight.nil? + column.with_weight? == with_weight + end end def define_options(context, table) { :path => path(context, table), :type => @options[:type], + :with_weight => @options[:with_weight], :compress => @options[:compress], } end Modified: test/test-index-column.rb (+1 -82) =================================================================== --- test/test-index-column.rb 2014-03-06 18:42:28 +0900 (baa061e) +++ test/test-index-column.rb 2014-03-06 18:47:32 +0900 (963793e) @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2009-2013 Kouhei Sutou <kou �� clear-code.com> +# Copyright (C) 2009-2014 Kouhei Sutou <kou �� clear-code.com> # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -175,87 +175,6 @@ class IndexColumnTest < Test::Unit::TestCase end end - class InvertedIndexTest < self - setup - def setup_schema - Groonga::Schema.define do |schema| - schema.create_table("Tags", - :type => :patricia_trie, - :key_type => :short_text) do |table| - end - - schema.create_table("Products", - :type => :patricia_trie, - :key_type => :short_text) do |table| - table.reference("tags", "Tags", :type => :vector) - end - - schema.change_table("Tags") do |table| - table.index("Products.tags", :name => "products_tags") - end - end - - @index = Groonga["Tags.products_tags"] - end - - def test_predicate - assert_true(@index.inverted?) - end - end - - class ForwardIndexTest < self - setup - def setup_schema - Groonga::Schema.define do |schema| - schema.create_table("Tags", - :type => :patricia_trie, - :key_type => :short_text) do |table| - end - - schema.create_table("Products", - :type => :patricia_trie, - :key_type => :short_text) do |table| - table.index("Tags", - :name => "tags", - :with_weight => true) - end - end - - @products = Groonga["Products"] - @index = Groonga["Products.tags"] - end - - def test_predicate - assert_true(@index.forward?) - end - - def test_accessor - groonga =****@produ*****("Groonga") - groonga.tags = [ - { - :value => "groonga", - :weight => 100, - }, - { - :value => "full text search", - :weight => 1000, - }, - ] - - assert_equal([ - { - :value => "groonga", - :weight => 100, - }, - { - :value => "full text search", - :weight => 1000, - }, - ], - groonga.tags) - end - end - class NGramTest < self setup def setup_schema Modified: test/test-schema-dumper.rb (+11 -28) =================================================================== --- test/test-schema-dumper.rb 2014-03-06 18:42:28 +0900 (9d74ba7) +++ test/test-schema-dumper.rb 2014-03-06 18:47:32 +0900 (9bceb32) @@ -1,4 +1,4 @@ -# Copyright (C) 2009-2013 Kouhei Sutou <kou �� clear-code.com> +# Copyright (C) 2009-2014 Kouhei Sutou <kou �� clear-code.com> # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -104,19 +104,14 @@ class SchemaDumperTest < Test::Unit::TestCase end end - def define_forward_index_schema + def define_weight_vector_schema Groonga::Schema.define do |schema| - schema.create_table("Tags", - :type => :hash, - :key_type => "ShortText") do |table| - end - schema.create_table("Memos", :type => :patricia_trie, :key_type => "ShortText") do |table| - table.index("Tags", - :name => "tags", - :with_weight => true) + table.short_text("tags", + :type => :vector, + :with_weight => true) end end end @@ -237,23 +232,14 @@ end SCHEMA end - def test_forward_index - define_forward_index_schema + def test_weight_vector + define_weight_vector_schema assert_equal(<<-SCHEMA, dump) create_table("Memos", :type => :patricia_trie, :key_type => "ShortText", :force => true) do |table| -end - -create_table("Tags", - :type => :hash, - :key_type => "ShortText", - :force => true) do |table| -end - -change_table("Memos") do |table| - table.index("Tags", [], :name => "tags", :with_weight => true) + table.short_text("tags", :type => :vector, :with_weight => true) end SCHEMA end @@ -327,14 +313,11 @@ column_create Terms Items_title COLUMN_INDEX|WITH_POSITION Items title SCHEMA end - def test_forward_index - define_forward_index_schema + def test_weight_vector + define_weight_vector_schema assert_equal(<<-SCHEMA, dump) table_create Memos TABLE_PAT_KEY --key_type ShortText - -table_create Tags TABLE_HASH_KEY --key_type ShortText - -column_create Memos tags COLUMN_INDEX|WITH_WEIGHT Tags +column_create Memos tags COLUMN_VECTOR|WITH_WEIGHT ShortText SCHEMA end Modified: test/test-table-dumper.rb (+4 -9) =================================================================== --- test/test-table-dumper.rb 2014-03-06 18:42:28 +0900 (b8c766d) +++ test/test-table-dumper.rb 2014-03-06 18:47:32 +0900 (f9aa8fe) @@ -351,20 +351,15 @@ EOS end end - class ForwardIndexTest < self + class WeightVectorIndexTest < self def setup Groonga::Schema.define do |schema| - schema.create_table("Tags", - :type => :hash, - :key_type => "ShortText") do |table| - end - schema.create_table("Products", :type => :patricia_trie, :key_type => "ShortText") do |table| - table.index("Tags", - :name => "tags", - :with_weight => true) + table.short_text("tags", + :type => :vector, + :with_weight => true) end end end Modified: test/test-variable-size-column.rb (+46 -1) =================================================================== --- test/test-variable-size-column.rb 2014-03-06 18:42:28 +0900 (4c343f8) +++ test/test-variable-size-column.rb 2014-03-06 18:47:32 +0900 (470bcde) @@ -1,4 +1,4 @@ -# Copyright (C) 2009-2011 Kouhei Sutou <kou �� clear-code.com> +# Copyright (C) 2009-2014 Kouhei Sutou <kou �� clear-code.com> # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -18,7 +18,10 @@ class VariableSizeColumnTest < Test::Unit::TestCase def setup setup_database + setup_schema + end + def setup_schema setup_users_table setup_users end @@ -233,5 +236,47 @@ class VariableSizeColumnTest < Test::Unit::TestCase groonga_org.modified_times.collect(&:key)) end end + + class WeightTest < self + def setup_schema + Groonga::Schema.define do |schema| + schema.create_table("Products", + :type => :patricia_trie, + :key_type => :short_text) do |table| + table.short_text("tags", + :type => :vector, + :with_weight => true) + end + end + + @products = Groonga["Products"] + end + + def test_accessor + groonga =****@produ*****("Groonga") + groonga.tags = [ + { + :value => "groonga", + :weight => 100, + }, + { + :value => "full text search", + :weight => 1000, + }, + ] + + assert_equal([ + { + :value => "groonga", + :weight => 100, + }, + { + :value => "full text search", + :weight => 1000, + }, + ], + groonga.tags) + end + end end end -------------- next part -------------- HTML����������������������������...Download