null+****@clear*****
null+****@clear*****
2012年 2月 23日 (木) 17:49:35 JST
Daijiro MORI 2012-02-23 17:49:35 +0900 (Thu, 23 Feb 2012)
New Revision: 0b6a766884a89ca5abadab5b47351e419698acab
Log:
grn_ii_buffer: add 'sparsity' parameter to grn_ii_build().
Modified files:
lib/db.c
lib/ii.c
lib/ii.h
Modified: lib/db.c (+1 -1)
===================================================================
--- lib/db.c 2012-02-23 15:15:41 +0900 (775c063)
+++ lib/db.c 2012-02-23 17:49:35 +0900 (29a5a96)
@@ -5724,7 +5724,7 @@ build_index(grn_ctx *ctx, grn_obj *obj)
if ((src = grn_ctx_at(ctx, *s))) {
target = GRN_OBJ_TABLEP(src) ? src : grn_ctx_at(ctx, src->header.domain);
if (target) {
- grn_ii_build(ctx, (grn_ii *)obj);
+ grn_ii_build(ctx, (grn_ii *)obj, 10);
} else {
ERR(GRN_INVALID_ARGUMENT, "invalid target");
}
Modified: lib/ii.c (+28 -11)
===================================================================
--- lib/ii.c 2012-02-23 15:15:41 +0900 (6602d8c)
+++ lib/ii.c 2012-02-23 17:49:35 +0900 (7ccdf63)
@@ -6388,6 +6388,7 @@ struct _grn_ii_buffer {
uint32_t nblocks;
int tmpfd;
char tmpfpath[PATH_MAX];
+ uint64_t update_buffer_size;
// stuff for parsing
off_t filepos;
grn_id *block_buf;
@@ -6395,8 +6396,8 @@ struct _grn_ii_buffer {
uint32_t block_pos;
ii_buffer_counter *counters;
uint32_t ncounters;
- uint64_t total_nrecs;
- uint64_t total_nposts;
+ uint64_t total_size;
+ uint64_t curr_size;
// stuff for merging
grn_ii *ii;
uint32_t lseg;
@@ -6492,8 +6493,7 @@ encode_terms(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
GRN_B_ENC(gtid, outbufp);
GRN_B_ENC(counter->nrecs, outbufp);
GRN_B_ENC(counter->nposts, outbufp);
- ii_buffer->total_nrecs += counter->nrecs;
- ii_buffer->total_nposts += counter->nposts;
+ ii_buffer->total_size += counter->nrecs + counter->nposts;
counter->offset_rid = outbufp - outbuf;
outbufp += offset_rid;
if ((flags & GRN_OBJ_WITH_SECTION)) {
@@ -6853,6 +6853,7 @@ grn_ii_buffer_chunk_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
ii_buffer->packed_buf = NULL;
ii_buffer->packed_len = 0;
ii_buffer->packed_buf_size = 0;
+ ii_buffer->curr_size = 0;
}
static uint32_t
@@ -6869,6 +6870,7 @@ merge_hit_blocks(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
nrecs += block->nrecs;
nposts += block->nposts;
}
+ ii_buffer->curr_size += nrecs + nposts;
max_size = nrecs * (ii_buffer->ii->n_elements);
if (flags & GRN_OBJ_WITH_POSITION) { max_size += nposts - nrecs; }
datavec_reset(ctx, ii_buffer->data_vectors,
@@ -7056,7 +7058,9 @@ grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
bt->size_in_chunk = packed_len;
bt->pos_in_chunk = ii_buffer->packed_len;
ii_buffer->packed_len += packed_len;
- if (term_buffer->header.nterms == II_BUFFER_NTERMS_PER_BUFFER) {
+ if (((ii_buffer->curr_size * ii_buffer->update_buffer_size) +
+ (ii_buffer->total_size * term_buffer->header.nterms * 16)) >=
+ (ii_buffer->total_size * II_BUFFER_NTERMS_PER_BUFFER * 16)) {
grn_ii_buffer_chunk_flush(ctx, ii_buffer);
}
}
@@ -7064,7 +7068,7 @@ grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
}
grn_ii_buffer *
-grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii)
+grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii, uint64_t update_buffer_size)
{
if (ii && ii->lexicon) {
grn_ii_buffer *ii_buffer = GRN_MALLOCN(grn_ii_buffer, 1);
@@ -7077,8 +7081,8 @@ grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii)
ii_buffer->ncounters = II_BUFFER_NCOUNTERS_MARGIN;
ii_buffer->block_pos = 0;
ii_buffer->filepos = 0;
- ii_buffer->total_nrecs = 0;
- ii_buffer->total_nposts = 0;
+ ii_buffer->total_size = 0;
+ ii_buffer->update_buffer_size = update_buffer_size;
ii_buffer->counters = GRN_CALLOC(ii_buffer->ncounters *
sizeof(ii_buffer_counter));
if (ii_buffer->counters) {
@@ -7136,7 +7140,20 @@ grn_ii_buffer_commit(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
close(ii_buffer->tmpfd);
GRN_FREE(ii_buffer->block_buf);
GRN_FREE(ii_buffer->counters);
- GRN_LOG(ctx, GRN_LOG_NOTICE, "nblocks: %d", ii_buffer->nblocks);
+
+ if (ii_buffer->update_buffer_size &&
+ ii_buffer->update_buffer_size < 20) {
+ if (ii_buffer->update_buffer_size < 10) {
+ ii_buffer->update_buffer_size =
+ ii_buffer->total_size >> (10 - ii_buffer->update_buffer_size);
+ } else {
+ ii_buffer->update_buffer_size =
+ ii_buffer->total_size << (ii_buffer->update_buffer_size - 10);
+ }
+ }
+
+ GRN_LOG(ctx, GRN_LOG_NOTICE, "nblocks=%d, update_buffer_size=%zu",
+ ii_buffer->nblocks, ii_buffer->update_buffer_size);
ii_buffer->term_buffer = NULL;
ii_buffer->packed_buf = NULL;
@@ -7241,9 +7258,9 @@ grn_ii_buffer_parse(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
}
grn_rc
-grn_ii_build(grn_ctx *ctx, grn_ii *ii)
+grn_ii_build(grn_ctx *ctx, grn_ii *ii, unsigned int sparsity)
{
- grn_ii_buffer *ii_buffer = grn_ii_buffer_open(ctx, ii);
+ grn_ii_buffer *ii_buffer = grn_ii_buffer_open(ctx, ii, sparsity);
if (ii_buffer) {
grn_id *s = ii->obj.source;
if ((ii->obj.source_size) && s) {
Modified: lib/ii.h (+3 -2)
===================================================================
--- lib/ii.h 2012-02-23 15:15:41 +0900 (4d9708d)
+++ lib/ii.h 2012-02-23 17:49:35 +0900 (c0fad19)
@@ -188,8 +188,9 @@ grn_rc grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator
void grn_ii_inspect_elements(grn_ctx *ctx, grn_ii *ii, grn_obj *buf);
void grn_ii_cursor_inspect(grn_ctx *ctx, grn_ii_cursor *c, grn_obj *buf);
-grn_rc grn_ii_build(grn_ctx *ctx, grn_ii *ii);
-grn_ii_buffer *grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii);
+grn_rc grn_ii_build(grn_ctx *ctx, grn_ii *ii, unsigned int sparsity);
+grn_ii_buffer *grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii,
+ long long unsigned int update_buffer_size);
grn_rc grn_ii_buffer_append(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
grn_id rid, unsigned int section, grn_obj *value);
grn_rc grn_ii_buffer_commit(grn_ctx *ctx, grn_ii_buffer *ii_buffer);