Kouhei Sutou
null+****@clear*****
Tue Dec 27 13:23:00 JST 2016
Kouhei Sutou 2016-12-27 13:23:00 +0900 (Tue, 27 Dec 2016) New Revision: 5b6129d76a2491e6199f420f90aa3d2ffde52a1a https://github.com/groonga/groonga/commit/5b6129d76a2491e6199f420f90aa3d2ffde52a1a Message: ii: support falling back to available buffer It doesn't change the default existing buffer search algorithm. It's used only when the default existing buffer search algorithm doesn't find an existing buffer. If any existing buffer isn't found by the default existing buffer search algorithm, new buffer is allocated. It grows index file size. Existing buffer isn't found case is caused by the following case: table_create a TABLE_NO_KEY column_create a x COLUMN_SCALAR ShortText table_create b TABLE_PAT_KEY ShortText column_create b index COLUMN_INDEX a x load --table a [ {"x": "00000000"}, {"x": "00000001"}, ... {"x": "00486036"}, {"x": "00000000"}, {"x": "00000001"}, ... {"x": "00413962"} ] buffer_open_by_tid() is a function that is just extracted code from buffer_new(). It doesn't change any logic. Modified files: lib/grn_ii.h lib/ii.c Modified: lib/grn_ii.h (+1 -0) =================================================================== --- lib/grn_ii.h 2016-12-27 01:02:50 +0900 (752dc0e) +++ lib/grn_ii.h 2016-12-27 13:23:00 +0900 (eaa3d1b) @@ -39,6 +39,7 @@ struct _grn_ii { /* This member is used for matching */ uint32_t n_elements; /* Number of elements in postings */ /* rid, [sid], tf, [weight] and [pos] */ + grn_id next_buffer_candidate_tid; struct grn_ii_header *header; }; Modified: lib/ii.c (+75 -38) =================================================================== --- lib/ii.c 2016-12-27 01:02:50 +0900 (67714f1) +++ lib/ii.c 2016-12-27 13:23:00 +0900 (aded74c) @@ -3961,9 +3961,62 @@ buffer_split(grn_ctx *ctx, grn_ii *ii, uint32_t seg, grn_hash *h) #define SCALE_FACTOR 2048 #define MAX_NTERMS 8192 -#define SPLIT_COND (b->header.nterms > 1024 ||\ - (b->header.nterms > 1 &&\ - b->header.chunk_size * 100 > ii->header->total_chunk_size)) +#define SPLIT_COND(b)\ + ((b)->header.nterms > 1024 ||\ + ((b)->header.nterms > 1 &&\ + (b)->header.chunk_size * 100 > ii->header->total_chunk_size)) + +inline static uint32_t +buffer_open_by_tid(grn_ctx *ctx, + grn_ii *ii, + grn_id tid, + int size, + grn_hash *h, + buffer **b, + uint32_t *lseg) +{ + uint32_t pseg = NOT_ASSIGNED; + uint32_t *a; + + a = array_at(ctx, ii, tid); + if (!a) { + return pseg; + } + + for (;;) { + uint32_t pos = a[0]; + + if (!pos || (pos & 1)) { break; } + pseg = buffer_open(ctx, ii, pos, NULL, b); + if (pseg == NOT_ASSIGNED) { break; } + if ((*b)->header.buffer_free >= size + sizeof(buffer_term)) { + *lseg = LSEG(pos); + break; + } + buffer_close(ctx, ii, pseg); + if (SPLIT_COND(*b)) { + /* ((S_SEGMENT - sizeof(buffer_header) + ii->header->bmax - + (*b)->header.nterms * sizeof(buffer_term)) * 4 < + (*b)->header.chunk_size) */ + GRN_LOG(ctx, GRN_LOG_DEBUG, + "nterms=%d chunk=%d total=%" GRN_FMT_INT64U, + (*b)->header.nterms, + (*b)->header.chunk_size, + ii->header->total_chunk_size >> 10); + if (buffer_split(ctx, ii, LSEG(pos), h)) { break; } + } else { + if (S_SEGMENT - sizeof(buffer_header) + - (*b)->header.nterms * sizeof(buffer_term) + < size + sizeof(buffer_term)) { + break; + } + if (buffer_flush(ctx, ii, LSEG(pos), h)) { break; } + } + } + array_unref(ii, tid); + + return pseg; +} inline static uint32_t buffer_new(grn_ctx *ctx, grn_ii *ii, int size, uint32_t *pos, @@ -3977,7 +4030,7 @@ buffer_new(grn_ctx *ctx, grn_ii *ii, int size, uint32_t *pos, // const char *key = _grn_table_key(ctx, ii->lexicon, id, &key_size); int key_size = grn_table_get_key(ctx, ii->lexicon, id, key, GRN_TABLE_MAX_KEY_SIZE); - uint32_t *a, lseg = NOT_ASSIGNED, pseg = NOT_ASSIGNED; + uint32_t lseg = NOT_ASSIGNED, pseg = NOT_ASSIGNED; grn_table_cursor *tc = NULL; if (S_SEGMENT - sizeof(buffer_header) < size + sizeof(buffer_term)) { DEFINE_NAME(ii); @@ -4005,41 +4058,18 @@ buffer_new(grn_ctx *ctx, grn_ii *ii, int size, uint32_t *pos, while (ctx->rc == GRN_SUCCESS && lseg == NOT_ASSIGNED && (tid = grn_table_cursor_next(ctx, tc))) { - if ((a = array_at(ctx, ii, tid))) { - for (;;) { - uint32_t pos = a[0]; - if (!pos || (pos & 1)) { break; } - pseg = buffer_open(ctx, ii, pos, NULL, &b); - if (pseg == NOT_ASSIGNED) { break; } - if (b->header.buffer_free >= size + sizeof(buffer_term)) { - lseg = LSEG(pos); - break; - } - buffer_close(ctx, ii, pseg); - if (SPLIT_COND) { - /* ((S_SEGMENT - sizeof(buffer_header) + ii->header->bmax - - b->header.nterms * sizeof(buffer_term)) * 4 < - b->header.chunk_size) */ - GRN_LOG(ctx, GRN_LOG_DEBUG, - "nterms=%d chunk=%d total=%" GRN_FMT_INT64U, - b->header.nterms, - b->header.chunk_size, - ii->header->total_chunk_size >> 10); - if (buffer_split(ctx, ii, LSEG(pos), h)) { break; } - } else { - if (S_SEGMENT - sizeof(buffer_header) - - b->header.nterms * sizeof(buffer_term) - < size + sizeof(buffer_term)) { - break; - } - if (buffer_flush(ctx, ii, LSEG(pos), h)) { break; } - } - } - array_unref(ii, tid); - } + pseg = buffer_open_by_tid(ctx, ii, tid, size, h, &b, &lseg); } grn_table_cursor_close(ctx, tc); } + if (lseg == NOT_ASSIGNED && ii->next_buffer_candidate_tid != GRN_ID_NIL) { + pseg = buffer_open_by_tid(ctx, ii, + ii->next_buffer_candidate_tid, + size, h, &b, &lseg); + if (lseg == NOT_ASSIGNED) { + ii->next_buffer_candidate_tid = GRN_ID_NIL; + } + } if (lseg == NOT_ASSIGNED) { if (buffer_segment_new(ctx, ii, &lseg) || (pseg = buffer_open(ctx, ii, SEG2POS(lseg, 0), NULL, &b)) == NOT_ASSIGNED) { @@ -4147,6 +4177,7 @@ _grn_ii_create(grn_ctx *ctx, grn_ii *ii, const char *path, grn_obj *lexicon, uin ii->lexicon = lexicon; ii->lflags = lflags; ii->encoding = encoding; + ii->next_buffer_candidate_tid = GRN_ID_NIL; ii->header = header; ii->n_elements = 2; if ((flags & GRN_OBJ_WITH_SECTION)) { ii->n_elements++; } @@ -4271,6 +4302,7 @@ grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon) ii->lexicon = lexicon; ii->lflags = lflags; ii->encoding = encoding; + ii->next_buffer_candidate_tid = GRN_ID_NIL; ii->header = header; ii->n_elements = 2; if ((header->flags & GRN_OBJ_WITH_SECTION)) { ii->n_elements++; } @@ -4404,7 +4436,7 @@ grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_h GRN_LOG(ctx, GRN_LOG_DEBUG, "flushing a[0]=%d seg=%d(%p) free=%d", a[0], LSEG(a[0]), b, b->header.buffer_free); buffer_close(ctx, ii, pseg); - if (SPLIT_COND) { + if (SPLIT_COND(b)) { /*((S_SEGMENT - sizeof(buffer_header) + ii->header->bmax - b->header.nterms * sizeof(buffer_term)) * 4 < b->header.chunk_size)*/ @@ -4583,7 +4615,12 @@ grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_h } buffer_put(ctx, ii, b, bt, br, bs, u, size); buffer_close(ctx, ii, pseg); - if (!a[0] || (a[0] & 1)) { a[0] = pos; } + if (!a[0] || (a[0] & 1)) { + a[0] = pos; + if (ii->next_buffer_candidate_tid == GRN_ID_NIL) { + ii->next_buffer_candidate_tid = tid; + } + } exit : array_unref(ii, tid); if (bs) { GRN_FREE(bs); } -------------- next part -------------- HTML����������������������������...Download