Susumu Yata
null+****@clear*****
Fri Jun 30 17:55:27 JST 2017
Susumu Yata 2017-06-30 17:55:27 +0900 (Fri, 30 Jun 2017) New Revision: ad7c2a88c3d239677ac49d7e2156e66818008ab3 https://github.com/groonga/groonga/commit/ad7c2a88c3d239677ac49d7e2156e66818008ab3 Merged 723004e: Merge pull request #724 from groonga/use-sid-in-ii-for-text-vector Message: ii: assign a section to each element of text vector in static indexing GitHub: #494 Modified files: lib/ii.c Modified: lib/ii.c (+57 -13) =================================================================== --- lib/ii.c 2017-06-30 16:57:19 +0900 (013014f) +++ lib/ii.c 2017-06-30 17:55:27 +0900 (d405cad) @@ -12013,6 +12013,9 @@ grn_ii_builder_append_obj(grn_ctx *ctx, grn_ii_builder *builder, if (sec->length == 0) { continue; } + if (builder->tokenizer) { + sid = i + 1; + } rc = grn_ii_builder_append_value(ctx, builder, rid, sid, sec->weight, head + sec->offset, sec->length); if (rc != GRN_SUCCESS) { @@ -12047,13 +12050,6 @@ grn_ii_builder_append_srcs(grn_ctx *ctx, grn_ii_builder *builder) return ctx->rc; } - /* Create a block lexicon. */ - rc = grn_ii_builder_create_lexicon(ctx, builder); - if (rc != GRN_SUCCESS) { - GRN_FREE(objs); - return rc; - } - /* Create a cursor to get records in the ID order. */ cursor = grn_table_cursor_open(ctx, builder->src_table, NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_ID); @@ -12132,6 +12128,54 @@ grn_ii_builder_set_src_table(grn_ctx *ctx, grn_ii_builder *builder) return GRN_SUCCESS; } +/* grn_ii_builder_set_sid_bits calculates sid_bits and sid_mask. */ +static grn_rc +grn_ii_builder_set_sid_bits(grn_ctx *ctx, grn_ii_builder *builder) +{ + /* Calculate the number of bits required to represent a section ID. */ + if (builder->n_srcs == 1 && builder->tokenizer && + (builder->srcs[0]->header.flags & GRN_OBJ_COLUMN_VECTOR) != 0) { + /* If the source column is a vector column and the index has a tokenizer, */ + /* the maximum sid equals to the maximum number of elements. */ + size_t max_elems = 0; + grn_table_cursor *cursor; + grn_obj obj; + cursor = grn_table_cursor_open(ctx, builder->src_table, NULL, 0, NULL, 0, + 0, -1, GRN_CURSOR_BY_ID); + if (!cursor) { + if (ctx->rc == GRN_SUCCESS) { + ERR(GRN_OBJECT_CORRUPT, "[index] failed to open table cursor"); + } + return ctx->rc; + } + GRN_TEXT_INIT(&obj, 0); + for (;;) { + grn_id rid = grn_table_cursor_next(ctx, cursor); + if (rid == GRN_ID_NIL) { + break; + } + if (!grn_obj_get_value(ctx, builder->srcs[0], rid, &obj)) { + continue; + } + if (obj.u.v.n_sections > max_elems) { + max_elems = obj.u.v.n_sections; + } + } + GRN_OBJ_FIN(ctx, &obj); + grn_table_cursor_close(ctx, cursor); + while (((uint32_t)1 << builder->sid_bits) < max_elems) { + builder->sid_bits++; + } + } + if (builder->sid_bits == 0) { + while (((uint32_t)1 << builder->sid_bits) < builder->n_srcs) { + builder->sid_bits++; + } + } + builder->sid_mask = ((uint64_t)1 << builder->sid_bits) - 1; + return GRN_SUCCESS; +} + /* grn_ii_builder_set_srcs sets source columns. */ static grn_rc grn_ii_builder_set_srcs(grn_ctx *ctx, grn_ii_builder *builder) @@ -12159,12 +12203,7 @@ grn_ii_builder_set_srcs(grn_ctx *ctx, grn_ii_builder *builder) return ctx->rc; } } - /* Calculate the number of bits required to represent a section ID. */ - while (((uint32_t)1 << builder->sid_bits) < builder->n_srcs) { - builder->sid_bits++; - } - builder->sid_mask = ((uint64_t)1 << builder->sid_bits) - 1; - return GRN_SUCCESS; + return grn_ii_builder_set_sid_bits(ctx, builder); } /* grn_ii_builder_append_source appends values in source columns. */ @@ -12179,6 +12218,11 @@ grn_ii_builder_append_source(grn_ctx *ctx, grn_ii_builder *builder) /* Nothing to do because there are no values. */ return ctx->rc; } + /* Create a block lexicon. */ + rc = grn_ii_builder_create_lexicon(ctx, builder); + if (rc != GRN_SUCCESS) { + return rc; + } rc = grn_ii_builder_set_srcs(ctx, builder); if (rc != GRN_SUCCESS) { return rc; -------------- next part -------------- HTML����������������������������...Download