null+****@clear*****
null+****@clear*****
2012年 2月 14日 (火) 18:38:20 JST
Daijiro MORI 2012-02-14 18:38:20 +0900 (Tue, 14 Feb 2012)
New Revision: 2d42b586c8fb8c5f0c2f94128636da05328bfa6d
Log:
grn_ii_buffer: calculate the size of packed buffer accurately.
Modified files:
lib/ii.c
Modified: lib/ii.c (+62 -32)
===================================================================
--- lib/ii.c 2012-02-14 14:14:33 +0900 (78df2ad)
+++ lib/ii.c 2012-02-14 18:38:20 +0900 (8c3ea14)
@@ -6343,11 +6343,12 @@ const int II_BUFFER_ORDER = GRN_CURSOR_BY_ID;
#else /* II_BUFFER_ORDER_BY_ID */
const int II_BUFFER_ORDER = GRN_CURSOR_BY_KEY;
#endif /* II_BUFFER_ORDER_BY_ID */
-const uint16_t II_BUFFER_NTERMS_PER_BUFFER = 16300;
+//const uint16_t II_BUFFER_NTERMS_PER_BUFFER = 16380;
+const uint16_t II_BUFFER_NTERMS_PER_BUFFER = 8190;
const uint32_t II_BUFFER_PACKED_BUFFER_SIZE = 0x4000000;
const char *TMPFILE_PATH = "grn_ii_buffer_tmp";
const uint32_t II_BUFFER_NCOUNTERS_MARGIN = 0x100000;
-const size_t II_BUFFER_BLOCK_SIZE = 0x100000;
+const size_t II_BUFFER_BLOCK_SIZE = 0x1000000;
const uint32_t II_BUFFER_BLOCK_READ_UNIT_SIZE = 0x200000;
typedef struct {
@@ -6399,6 +6400,7 @@ struct _grn_ii_buffer {
buffer *term_buffer;
datavec data_vectors[MAX_N_ELEMENTS + 1];
uint8_t *packed;
+ uint32_t packed_buf_size;
uint32_t packed_len;
uint64_t total_chunk_size;
};
@@ -6408,6 +6410,8 @@ grn_ii_buffer_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
{
uint8_t *outbuf, *outbufp, *outbufp_;
ii_buffer_block *block;
+ GRN_LOG(ctx, GRN_LOG_NOTICE, "flushing:%d npostings:%u",
+ ii_buffer->nblocks, ii_buffer->blockpos);
outbuf = (uint8_t *)GRN_MALLOC(ii_buffer->blockpos * 7 * sizeof(uint32_t));
/* if (!outbuf) { err } */
outbufp_ = outbufp = outbuf;
@@ -6522,6 +6526,8 @@ grn_ii_buffer_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
ssize_t r = write(ii_buffer->tmpfd, outbuf, outbufp - outbuf);
if (r > 0) { ii_buffer->filepos += r; }
block->tail = ii_buffer->filepos;
+ GRN_LOG(ctx, GRN_LOG_NOTICE, "flushed: %d encoded_size:%jdKB",
+ ii_buffer->nblocks, r >> 10);
}
ii_buffer->nblocks++;
GRN_FREE(outbuf);
@@ -6668,8 +6674,12 @@ grn_ii_buffer_chunk_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
ii_buffer->term_buffer->header.nterms_void = 0;
buffer_segment_update(ii_buffer->ii, ii_buffer->lseg, ii_buffer->dseg);
ii_buffer->ii->header->total_chunk_size += ii_buffer->packed_len;
- ii_buffer->term_buffer = NULL;
ii_buffer->total_chunk_size += ii_buffer->packed_len;
+ GRN_LOG(ctx, GRN_LOG_NOTICE, "nterms=%d chunk=%d total=%zuKB",
+ ii_buffer->term_buffer->header.nterms,
+ ii_buffer->term_buffer->header.chunk_size,
+ ii_buffer->ii->header->total_chunk_size >> 10);
+ ii_buffer->term_buffer = NULL;
ii_buffer->packed = NULL;
ii_buffer->packed_len = 0;
}
@@ -6699,22 +6709,6 @@ grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
uint64_t spos = 0;
uint32_t nrecs = 0;
uint32_t nposts = 0;
- uint16_t nterm;
- buffer_term *bt;
- if (!ii_buffer->term_buffer) {
- uint32_t lseg;
- void *term_buffer;
- for (lseg = 0; lseg < GRN_II_MAX_LSEG; lseg++) {
- if (ii_buffer->ii->header->binfo[lseg] == NOT_ASSIGNED) { break; }
- }
- ii_buffer->lseg = lseg;
- ii_buffer->dseg = segment_get(ctx, ii_buffer->ii);
- GRN_IO_SEG_REF(ii_buffer->ii->seg, ii_buffer->dseg, term_buffer);
- ii_buffer->term_buffer = (buffer *)term_buffer;
- }
- nterm = ii_buffer->term_buffer->header.nterms++;
- bt = &ii_buffer->term_buffer->terms[nterm];
- a[0] = SEG2POS(ii_buffer->lseg, (sizeof(buffer_header) + sizeof(buffer_term) * nterm));
{
int i;
for (i = 0; i < nhits; i++) {
@@ -6758,24 +6752,60 @@ grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
ii_buffer->data_vectors[1].data_size = nrecs;
ii_buffer->data_vectors[2].data_size = nposts;
- ii_buffer->data_vectors[0].flags = ((nrecs < 16) || (nrecs <= (lr >> 8))) ? 0 : USE_P_ENC;
- ii_buffer->data_vectors[1].flags = (nrecs < 3) ? 0 : USE_P_ENC;
+ ii_buffer->data_vectors[0].flags =
+ ((nrecs < 16) || (nrecs <= (lr >> 8))) ? 0 : USE_P_ENC;
+ ii_buffer->data_vectors[1].flags =
+ (nrecs < 3) ? 0 : USE_P_ENC;
ii_buffer->data_vectors[2].flags =
(((nposts < 32) || (nposts <= (spos >> 13))) ? 0 : USE_P_ENC)|ODD;
}
- if (!ii_buffer->packed) { ii_buffer->packed = GRN_MALLOC(II_BUFFER_PACKED_BUFFER_SIZE * 2); }
{
- int packed_len = grn_p_encv(ctx, ii_buffer->data_vectors, ii_buffer->ii->n_elements,
- ii_buffer->packed + ii_buffer->packed_len);
- bt->tid = tid;
- bt->size_in_buffer = 0;
- bt->pos_in_buffer = 0;
- bt->size_in_chunk = packed_len;
- bt->pos_in_chunk = ii_buffer->packed_len;
- ii_buffer->packed_len += packed_len;
+ uint32_t max_size = (nrecs * 2 + nposts);
+ if (ii_buffer->packed &&
+ ii_buffer->packed_buf_size <
+ ii_buffer->packed_len + max_size) {
+ grn_ii_buffer_chunk_flush(ctx, ii_buffer);
+ }
+ if (!ii_buffer->packed) {
+ uint32_t buf_size = (max_size > II_BUFFER_PACKED_BUFFER_SIZE)
+ ? max_size : II_BUFFER_PACKED_BUFFER_SIZE;
+ if ((ii_buffer->packed = GRN_MALLOC(buf_size))) {
+ ii_buffer->packed_buf_size = buf_size;
+ }
+ }
}
- if (nterm == II_BUFFER_NTERMS_PER_BUFFER || ii_buffer->packed_len > II_BUFFER_PACKED_BUFFER_SIZE) {
- grn_ii_buffer_chunk_flush(ctx, ii_buffer);
+ {
+ uint16_t nterm;
+ buffer_term *bt;
+ if (!ii_buffer->term_buffer) {
+ uint32_t lseg;
+ void *term_buffer;
+ for (lseg = 0; lseg < GRN_II_MAX_LSEG; lseg++) {
+ if (ii_buffer->ii->header->binfo[lseg] == NOT_ASSIGNED) { break; }
+ }
+ ii_buffer->lseg = lseg;
+ ii_buffer->dseg = segment_get(ctx, ii_buffer->ii);
+ GRN_IO_SEG_REF(ii_buffer->ii->seg, ii_buffer->dseg, term_buffer);
+ ii_buffer->term_buffer = (buffer *)term_buffer;
+ }
+ nterm = ii_buffer->term_buffer->header.nterms++;
+ bt = &ii_buffer->term_buffer->terms[nterm];
+ a[0] = SEG2POS(ii_buffer->lseg,
+ (sizeof(buffer_header) + sizeof(buffer_term) * nterm));
+ {
+ int packed_len = grn_p_encv(ctx, ii_buffer->data_vectors,
+ ii_buffer->ii->n_elements,
+ ii_buffer->packed + ii_buffer->packed_len);
+ bt->tid = tid;
+ bt->size_in_buffer = 0;
+ bt->pos_in_buffer = 0;
+ bt->size_in_chunk = packed_len;
+ bt->pos_in_chunk = ii_buffer->packed_len;
+ ii_buffer->packed_len += packed_len;
+ }
+ if (ii_buffer->term_buffer->header.nterms == II_BUFFER_NTERMS_PER_BUFFER) {
+ grn_ii_buffer_chunk_flush(ctx, ii_buffer);
+ }
}
}
}