Kouhei Sutou
null+****@clear*****
Wed Jun 29 15:29:27 JST 2016
Kouhei Sutou 2016-06-29 15:29:27 +0900 (Wed, 29 Jun 2016) New Revision: db7cc5e668e82c6c791b88ed8e9513dc9b372d0b https://github.com/groonga/groonga/commit/db7cc5e668e82c6c791b88ed8e9513dc9b372d0b Message: experimental: support tiny index column It reduced memory usage. You can use the feature in index column for fixed size scalar column. New API: * grn_column_flags type * GRN_OBJ_INDEX_TINY flag * grn_column_get_flags() function * INDEX_TINY flag in column_create The default max N segments is 0x00200. It will be changed after our checks. The value can be customized by GRN_II_MAX_N_SEGMENTS_TINY environment variable. Added files: lib/column.c test/command/suite/dump/schema/column/index/index_tiny.expected test/command/suite/dump/schema/column/index/index_tiny.test Copied files: include/groonga/column.h (from include/groonga/dump.h) Modified files: include/groonga.h include/groonga/Makefile.am include/groonga/dump.h include/groonga/groonga.h lib/db.c lib/dump.c lib/grn_ii.h lib/grn_proc.h lib/grn_store.h lib/ii.c lib/proc/proc_column.c lib/proc/proc_dump.c lib/proc/proc_schema.c lib/sources.am lib/store.c test/command/suite/schema/tables/columns/type/index.expected test/command/suite/schema/tables/columns/type/index.test Modified: include/groonga.h (+1 -0) =================================================================== --- include/groonga.h 2016-06-29 13:55:35 +0900 (6dda8f0) +++ include/groonga.h 2016-06-29 15:29:27 +0900 (bb9bee6) @@ -22,6 +22,7 @@ #include "groonga/groonga.h" #include "groonga/array.h" +#include "groonga/column.h" #include "groonga/config.h" #include "groonga/dat.h" #include "groonga/db.h" Modified: include/groonga/Makefile.am (+1 -0) =================================================================== --- include/groonga/Makefile.am 2016-06-29 13:55:35 +0900 (b80db80) +++ include/groonga/Makefile.am 2016-06-29 15:29:27 +0900 (222c267) @@ -1,6 +1,7 @@ groonga_includedir = $(pkgincludedir)/groonga groonga_include_HEADERS = \ array.h \ + column.h \ command.h \ config.h \ dat.h \ Copied: include/groonga/column.h (+2 -7) 67% =================================================================== --- include/groonga/dump.h 2016-06-29 13:55:35 +0900 (d490c38) +++ include/groonga/column.h 2016-06-29 15:29:27 +0900 (753cdba) @@ -1,5 +1,5 @@ /* - Copyright(C) 2016 Brazil + Copyright(C) 2009-2016 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -22,12 +22,7 @@ extern "C" { #endif -GRN_API grn_rc grn_dump_table_create_flags(grn_ctx *ctx, - grn_table_flags flags, - grn_obj *buffer); -GRN_API grn_rc grn_dump_column_create_flags(grn_ctx *ctx, - grn_obj_flags flags, - grn_obj *buffer); +GRN_API grn_column_flags grn_column_get_flags(grn_ctx *ctx, grn_obj *column); #ifdef __cplusplus } Modified: include/groonga/dump.h (+1 -1) =================================================================== --- include/groonga/dump.h 2016-06-29 13:55:35 +0900 (d490c38) +++ include/groonga/dump.h 2016-06-29 15:29:27 +0900 (4f292f4) @@ -26,7 +26,7 @@ GRN_API grn_rc grn_dump_table_create_flags(grn_ctx *ctx, grn_table_flags flags, grn_obj *buffer); GRN_API grn_rc grn_dump_column_create_flags(grn_ctx *ctx, - grn_obj_flags flags, + grn_column_flags flags, grn_obj *buffer); #ifdef __cplusplus Modified: include/groonga/groonga.h (+6 -1) =================================================================== --- include/groonga/groonga.h 2016-06-29 13:55:35 +0900 (1343361) +++ include/groonga/groonga.h 2016-06-29 15:29:27 +0900 (9e6bdc2) @@ -295,6 +295,7 @@ GRN_API grn_encoding grn_encoding_parse(const char *name); typedef uint16_t grn_obj_flags; typedef uint32_t grn_table_flags; +typedef uint32_t grn_column_flags; /* flags for grn_obj_flags and grn_table_flags */ @@ -357,6 +358,10 @@ typedef uint32_t grn_table_flags; #define GRN_OBJ_KEY_LARGE (0x01<<16) +/* flags only for grn_column_flags */ + +#define GRN_OBJ_INDEX_TINY (0x01<<16) + /* obj types */ #define GRN_VOID (0x00) @@ -644,7 +649,7 @@ GRN_API grn_obj *grn_obj_column(grn_ctx *ctx, grn_obj *table, GRN_API grn_obj *grn_column_create(grn_ctx *ctx, grn_obj *table, const char *name, unsigned int name_size, - const char *path, grn_obj_flags flags, grn_obj *type); + const char *path, grn_column_flags flags, grn_obj *type); #define GRN_COLUMN_OPEN_OR_CREATE(ctx,table,name,name_size,path,flags,type,column) \ (((column) = grn_obj_column((ctx), (table), (name), (name_size))) ||\ Added: lib/column.c (+49 -0) 100644 =================================================================== --- /dev/null +++ lib/column.c 2016-06-29 15:29:27 +0900 (63c5d59) @@ -0,0 +1,49 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2009-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "grn.h" +#include "grn_store.h" +#include "grn_ii.h" + +grn_column_flags +grn_column_get_flags(grn_ctx *ctx, grn_obj *column) +{ + grn_column_flags flags = 0; + + GRN_API_ENTER; + + if (!column) { + GRN_API_RETURN(0); + } + + switch (column->header.type) { + case GRN_COLUMN_FIX_SIZE : + flags = column->header.flags; + break; + case GRN_COLUMN_VAR_SIZE : + flags = grn_ja_get_flags(ctx, (grn_ja *)column); + break; + case GRN_COLUMN_INDEX : + flags = grn_ii_get_flags(ctx, (grn_ii *)column); + break; + default : + break; + } + + GRN_API_RETURN(flags); +} Modified: lib/db.c (+1 -1) =================================================================== --- lib/db.c 2016-06-29 13:55:35 +0900 (02ff9a7) +++ lib/db.c 2016-06-29 15:29:27 +0900 (a3d5ff3) @@ -4703,7 +4703,7 @@ _grn_table_key(grn_ctx *ctx, grn_obj *table, grn_id id, uint32_t *key_size) grn_obj * grn_column_create(grn_ctx *ctx, grn_obj *table, const char *name, unsigned int name_size, - const char *path, grn_obj_flags flags, grn_obj *type) + const char *path, grn_column_flags flags, grn_obj *type) { grn_db *s; uint32_t value_size; Modified: lib/dump.c (+5 -2) =================================================================== --- lib/dump.c 2016-06-29 13:55:35 +0900 (d38d010) +++ lib/dump.c 2016-06-29 15:29:27 +0900 (cad2e57) @@ -57,8 +57,8 @@ grn_dump_table_create_flags(grn_ctx *ctx, grn_rc grn_dump_column_create_flags(grn_ctx *ctx, - grn_obj_flags flags, - grn_obj *buffer) + grn_column_flags flags, + grn_obj *buffer) { GRN_API_ENTER; @@ -83,6 +83,9 @@ grn_dump_column_create_flags(grn_ctx *ctx, if (flags & GRN_OBJ_WITH_POSITION) { GRN_TEXT_PUTS(ctx, buffer, "|WITH_POSITION"); } + if (flags & GRN_OBJ_INDEX_TINY) { + GRN_TEXT_PUTS(ctx, buffer, "|INDEX_TINY"); + } break; } switch (flags & GRN_OBJ_COMPRESS_MASK) { Modified: lib/grn_ii.h (+1 -0) =================================================================== --- lib/grn_ii.h 2016-06-29 13:55:35 +0900 (1c6366e) +++ lib/grn_ii.h 2016-06-29 15:29:27 +0900 (193484d) @@ -99,6 +99,7 @@ GRN_API grn_ii *grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon); GRN_API grn_rc grn_ii_close(grn_ctx *ctx, grn_ii *ii); GRN_API grn_rc grn_ii_remove(grn_ctx *ctx, const char *path); grn_rc grn_ii_info(grn_ctx *ctx, grn_ii *ii, uint64_t *seg_size, uint64_t *chunk_size); +grn_column_flags grn_ii_get_flags(grn_ctx *ctx, grn_ii *ii); grn_rc grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, uint32_t key, grn_ii_updspec *u, grn_hash *h); grn_rc grn_ii_delete_one(grn_ctx *ctx, grn_ii *ii, uint32_t key, grn_ii_updspec *u, Modified: lib/grn_proc.h (+4 -4) =================================================================== --- lib/grn_proc.h 2016-06-29 13:55:35 +0900 (2056439) +++ lib/grn_proc.h 2016-06-29 15:29:27 +0900 (c415641) @@ -85,10 +85,10 @@ grn_bool grn_proc_table_set_token_filters(grn_ctx *ctx, grn_obj *table, grn_obj *token_filter_names); -grn_obj_flags grn_proc_column_parse_flags(grn_ctx *ctx, - const char *error_message_tag, - const char *text, - const char *end); +grn_column_flags grn_proc_column_parse_flags(grn_ctx *ctx, + const char *error_message_tag, + const char *text, + const char *end); grn_bool grn_proc_select_output_columns_open(grn_ctx *ctx, grn_obj_format *format, Modified: lib/grn_store.h (+1 -0) =================================================================== --- lib/grn_store.h 2016-06-29 13:55:35 +0900 (a42e811) +++ lib/grn_store.h 2016-06-29 15:29:27 +0900 (67f183e) @@ -84,6 +84,7 @@ GRN_API grn_ja *grn_ja_create(grn_ctx *ctx, const char *path, uint32_t max_element_size, uint32_t flags); grn_ja *grn_ja_open(grn_ctx *ctx, const char *path); grn_rc grn_ja_info(grn_ctx *ctx, grn_ja *ja, unsigned int *max_element_size); +grn_column_flags grn_ja_get_flags(grn_ctx *ctx, grn_ja *ja); GRN_API grn_rc grn_ja_close(grn_ctx *ctx, grn_ja *ja); grn_rc grn_ja_remove(grn_ctx *ctx, const char *path); grn_rc grn_ja_put(grn_ctx *ctx, grn_ja *ja, grn_id id, Modified: lib/ii.c (+44 -2) =================================================================== --- lib/ii.c 2016-06-29 13:55:35 +0900 (017e876) +++ lib/ii.c 2016-06-29 15:29:27 +0900 (5cf0fc3) @@ -44,6 +44,7 @@ #endif #define MAX_PSEG 0x20000 +#define MAX_PSEG_TINY 0x00200 #define S_CHUNK (1 << GRN_II_W_CHUNK) #define W_SEGMENT 18 #define S_SEGMENT (1 << W_SEGMENT) @@ -76,6 +77,7 @@ static double grn_ii_select_too_many_index_match_ratio = -1; static double grn_ii_estimate_size_for_query_reduce_ratio = 0.9; static grn_bool grn_ii_overlap_token_skip_enable = GRN_FALSE; static uint32_t grn_ii_builder_block_threshold_force = 0; +static uint32_t grn_ii_max_n_segments_tiny = MAX_PSEG_TINY; void grn_ii_init_from_env(void) @@ -141,6 +143,23 @@ grn_ii_init_from_env(void) grn_ii_builder_block_threshold_force = 0; } } + + { + char grn_ii_max_n_segments_tiny_env[GRN_ENV_BUFFER_SIZE]; + grn_getenv("GRN_II_MAX_N_SEGMENTS_TINY", + grn_ii_max_n_segments_tiny_env, + GRN_ENV_BUFFER_SIZE); + if (grn_ii_max_n_segments_tiny_env[0]) { + grn_ii_max_n_segments_tiny = + grn_atoui(grn_ii_max_n_segments_tiny_env, + grn_ii_max_n_segments_tiny_env + + strlen(grn_ii_max_n_segments_tiny_env), + NULL); + if (grn_ii_max_n_segments_tiny > MAX_PSEG) { + grn_ii_max_n_segments_tiny = MAX_PSEG; + } + } + } } /* segment */ @@ -3730,6 +3749,7 @@ static grn_ii * _grn_ii_create(grn_ctx *ctx, grn_ii *ii, const char *path, grn_obj *lexicon, uint32_t flags) { int i; + uint32_t max_n_segments; grn_io *seg, *chunk; char path2[PATH_MAX]; struct grn_ii_header *header; @@ -3747,8 +3767,20 @@ _grn_ii_create(grn_ctx *ctx, grn_ii *ii, const char *path, grn_obj *lexicon, uin return NULL; } if (path && strlen(path) + 6 >= PATH_MAX) { return NULL; } - seg = grn_io_create(ctx, path, sizeof(struct grn_ii_header), - S_SEGMENT, MAX_PSEG, grn_io_auto, GRN_IO_EXPIRE_SEGMENT); + + if (flags & GRN_OBJ_INDEX_TINY) { + max_n_segments = grn_ii_max_n_segments_tiny; + } else { + max_n_segments = MAX_PSEG; + } + + seg = grn_io_create(ctx, + path, + sizeof(struct grn_ii_header), + S_SEGMENT, + max_n_segments, + grn_io_auto, + GRN_IO_EXPIRE_SEGMENT); if (!seg) { return NULL; } if (path) { grn_strcpy(path2, PATH_MAX, path); @@ -3952,6 +3984,16 @@ grn_ii_info(grn_ctx *ctx, grn_ii *ii, uint64_t *seg_size, uint64_t *chunk_size) return GRN_SUCCESS; } +grn_column_flags +grn_ii_get_flags(grn_ctx *ctx, grn_ii *ii) +{ + if (!ii) { + return 0; + } + + return ii->header->flags; +} + void grn_ii_expire(grn_ctx *ctx, grn_ii *ii) { Modified: lib/proc/proc_column.c (+5 -4) =================================================================== --- lib/proc/proc_column.c 2016-06-29 13:55:35 +0900 (94cc6f6) +++ lib/proc/proc_column.c 2016-06-29 15:29:27 +0900 (2d26b91) @@ -24,13 +24,13 @@ #include <groonga/plugin.h> -grn_obj_flags +grn_column_flags grn_proc_column_parse_flags(grn_ctx *ctx, const char *error_message_tag, const char *text, const char *end) { - grn_obj_flags flags = 0; + grn_column_flags flags = 0; while (text < end) { size_t name_size; @@ -57,6 +57,7 @@ grn_proc_column_parse_flags(grn_ctx *ctx, CHECK_FLAG(WITH_WEIGHT); CHECK_FLAG(WITH_POSITION); CHECK_FLAG(RING_BUFFER); + CHECK_FLAG(INDEX_TINY); #undef CHECK_FLAG @@ -175,7 +176,7 @@ command_column_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_obj *flags_raw; grn_obj *type_raw; grn_obj *source_raw; - grn_obj_flags flags; + grn_column_flags flags; grn_obj *type = NULL; table_raw = grn_plugin_proc_get_var(ctx, user_data, "table", -1); @@ -532,7 +533,7 @@ output_column_info(grn_ctx *ctx, grn_obj *column) output_column_name(ctx, column); grn_ctx_output_cstr(ctx, path); grn_ctx_output_cstr(ctx, type); - grn_dump_column_create_flags(ctx, column->header.flags, &o); + grn_dump_column_create_flags(ctx, grn_column_get_flags(ctx, column), &o); grn_ctx_output_obj(ctx, &o, NULL); grn_proc_output_object_id_name(ctx, column->header.domain); grn_proc_output_object_id_name(ctx, grn_obj_get_range(ctx, column)); Modified: lib/proc/proc_dump.c (+4 -2) =================================================================== --- lib/proc/proc_dump.c 2016-06-29 13:55:35 +0900 (2d2cd83) +++ lib/proc/proc_dump.c 2016-06-29 15:29:27 +0900 (4a3beb9) @@ -280,7 +280,8 @@ dump_column(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table, grn_obj *column) grn_id type_id; grn_bool is_opened_type = GRN_TRUE; grn_obj *type; - grn_obj_flags default_flags = GRN_OBJ_PERSISTENT; + grn_column_flags flags; + grn_column_flags default_flags = GRN_OBJ_PERSISTENT; type_id = grn_obj_get_range(ctx, column); if (dumper->is_close_opened_object_mode) { @@ -300,8 +301,9 @@ dump_column(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table, grn_obj *column) if (type->header.type == GRN_TYPE) { default_flags |= type->header.flags; } + flags = grn_column_get_flags(ctx, column); grn_dump_column_create_flags(ctx, - column->header.flags & ~default_flags, + flags & ~default_flags, dumper->output); GRN_TEXT_PUTC(ctx, dumper->output, ' '); dump_obj_name(ctx, dumper, type); Modified: lib/proc/proc_schema.c (+4 -1) =================================================================== --- lib/proc/proc_schema.c 2016-06-29 13:55:35 +0900 (97bc919) +++ lib/proc/proc_schema.c 2016-06-29 15:29:27 +0900 (129a067) @@ -759,9 +759,12 @@ command_schema_column_command_collect_arguments(grn_ctx *ctx, { grn_obj flags; + grn_column_flags column_flags; + GRN_TEXT_INIT(&flags, 0); + column_flags = grn_column_get_flags(ctx, column); grn_dump_column_create_flags(ctx, - column->header.flags & ~GRN_OBJ_PERSISTENT, + column_flags & ~GRN_OBJ_PERSISTENT, &flags); GRN_TEXT_PUTC(ctx, &flags, '\0'); ADD("flags", GRN_TEXT_VALUE(&flags)); Modified: lib/sources.am (+1 -0) =================================================================== --- lib/sources.am 2016-06-29 13:55:35 +0900 (fe1754d) +++ lib/sources.am 2016-06-29 15:29:27 +0900 (d5e4f26) @@ -3,6 +3,7 @@ libgroonga_la_SOURCES = \ grn_alloc.h \ cache.c \ grn_cache.h \ + column.c \ com.c \ grn_com.h \ command.c \ Modified: lib/store.c (+10 -0) =================================================================== --- lib/store.c 2016-06-29 13:55:35 +0900 (c7c8b19) +++ lib/store.c 2016-06-29 15:29:27 +0900 (bf3355f) @@ -480,6 +480,16 @@ grn_ja_info(grn_ctx *ctx, grn_ja *ja, unsigned int *max_element_size) return GRN_SUCCESS; } +grn_column_flags +grn_ja_get_flags(grn_ctx *ctx, grn_ja *ja) +{ + if (!ja) { + return 0; + } + + return ja->header->flags; +} + grn_rc grn_ja_close(grn_ctx *ctx, grn_ja *ja) { Added: test/command/suite/dump/schema/column/index/index_tiny.expected (+15 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/dump/schema/column/index/index_tiny.expected 2016-06-29 15:29:27 +0900 (93fee48) @@ -0,0 +1,15 @@ +table_create Users TABLE_HASH_KEY ShortText +[[0,0.0,0.0],true] +column_create Users name COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Names TABLE_HASH_KEY ShortText +[[0,0.0,0.0],true] +column_create Names users COLUMN_INDEX|INDEX_TINY Users name +[[0,0.0,0.0],true] +dump +table_create Users TABLE_HASH_KEY ShortText +column_create Users name COLUMN_SCALAR ShortText + +table_create Names TABLE_HASH_KEY ShortText + +column_create Names users COLUMN_INDEX|INDEX_TINY Users name Added: test/command/suite/dump/schema/column/index/index_tiny.test (+7 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/dump/schema/column/index/index_tiny.test 2016-06-29 15:29:27 +0900 (e9f89ed) @@ -0,0 +1,7 @@ +table_create Users TABLE_HASH_KEY ShortText +column_create Users name COLUMN_SCALAR ShortText + +table_create Names TABLE_HASH_KEY ShortText +column_create Names users COLUMN_INDEX|INDEX_TINY Users name + +dump Modified: test/command/suite/schema/tables/columns/type/index.expected (+3 -3) =================================================================== --- test/command/suite/schema/tables/columns/type/index.expected 2016-06-29 13:55:35 +0900 (9dac624) +++ test/command/suite/schema/tables/columns/type/index.expected 2016-06-29 15:29:27 +0900 (374d4bf) @@ -6,7 +6,7 @@ column_create Posts content COLUMN_SCALAR Text [[0,0.0,0.0],true] table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto [[0,0.0,0.0],true] -column_create Terms index COLUMN_INDEX|WITH_SECTION|WITH_WEIGHT|WITH_POSITION Posts _key,title,content +column_create Terms index COLUMN_INDEX|WITH_SECTION|WITH_WEIGHT|WITH_POSITION|INDEX_TINY Posts _key,title,content [[0,0.0,0.0],true] schema [ @@ -350,11 +350,11 @@ schema "arguments": { "table": "Terms", "name": "index", - "flags": "COLUMN_INDEX|WITH_SECTION|WITH_WEIGHT|WITH_POSITION", + "flags": "COLUMN_INDEX|WITH_SECTION|WITH_WEIGHT|WITH_POSITION|INDEX_TINY", "type": "Posts", "sources": "_key,title,content" }, - "command_line": "column_create --table Terms --name index --flags COLUMN_INDEX|WITH_SECTION|WITH_WEIGHT|WITH_POSITION --type Posts --sources _key,title,content" + "command_line": "column_create --table Terms --name index --flags COLUMN_INDEX|WITH_SECTION|WITH_WEIGHT|WITH_POSITION|INDEX_TINY --type Posts --sources _key,title,content" } } } Modified: test/command/suite/schema/tables/columns/type/index.test (+2 -1) =================================================================== --- test/command/suite/schema/tables/columns/type/index.test 2016-06-29 13:55:35 +0900 (86aa0d7) +++ test/command/suite/schema/tables/columns/type/index.test 2016-06-29 15:29:27 +0900 (5c12b44) @@ -5,7 +5,8 @@ column_create Posts content COLUMN_SCALAR Text table_create Terms TABLE_PAT_KEY ShortText \ --default_tokenizer TokenBigram \ --normalizer NormalizerAuto -column_create Terms index COLUMN_INDEX|WITH_SECTION|WITH_WEIGHT|WITH_POSITION \ +column_create Terms index \ + COLUMN_INDEX|WITH_SECTION|WITH_WEIGHT|WITH_POSITION|INDEX_TINY \ Posts _key,title,content schema -------------- next part -------------- HTML����������������������������...Download