null+****@clear*****
null+****@clear*****
2010年 7月 8日 (木) 19:03:09 JST
Daijiro MORI 2010-07-08 10:03:09 +0000 (Thu, 08 Jul 2010)
New Revision: 11b3a26028d6f5537dd98bc2ca39b174f26b3a8e
Log:
Enhanced grn_ii_buffer_check().
Modified files:
lib/ii.c
Modified: lib/ii.c (+99 -104)
===================================================================
--- lib/ii.c 2010-07-08 04:51:01 +0000 (27d9f4b)
+++ lib/ii.c 2010-07-08 10:03:09 +0000 (45b0151)
@@ -2939,8 +2939,14 @@ grn_ii_buffer_check(grn_ctx *ctx, grn_ii *ii, uint32_t seg)
grn_rc rc;
grn_io_win sw;
buffer *sb;
- uint8_t *sc;
- uint32_t pseg, scn;
+ uint8_t *sc = NULL;
+ uint32_t pseg, scn, nerrors = 0, nterm_with_chunk = 0;
+ buffer_term *bt;
+ uint8_t *sbp = NULL;
+ datavec rdv[MAX_N_ELEMENTS + 1];
+ uint16_t n;
+ int nterms_void = 0;
+ int size_in_buffer = 0;
if (ii->header->binfo[seg] == NOT_ASSIGNED) {
GRN_OUTPUT_BOOL(GRN_FALSE);
return;
@@ -2950,116 +2956,105 @@ grn_ii_buffer_check(grn_ctx *ctx, grn_ii *ii, uint32_t seg)
GRN_OUTPUT_BOOL(GRN_FALSE);
return;
}
+ datavec_init(ctx, rdv, ii->n_elements, 0, 0);
+ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
+ rdv[ii->n_elements - 1].flags = ODD;
+ }
GRN_OUTPUT_CSTR("buffer id");
GRN_OUTPUT_INT64(seg);
+ if ((scn = sb->header.chunk) == NOT_ASSIGNED) {
+ GRN_OUTPUT_CSTR("void chunk size");
+ GRN_OUTPUT_INT64(sb->header.chunk_size);
+ } else {
+ if ((sc = WIN_MAP2(ii->chunk, ctx, &sw, scn, 0, sb->header.chunk_size, grn_io_rdonly))) {
+ GRN_OUTPUT_CSTR("chunk size");
+ GRN_OUTPUT_INT64(sb->header.chunk_size);
+ } else {
+ GRN_OUTPUT_CSTR("unmappable chunk size");
+ GRN_OUTPUT_INT64(sb->header.chunk_size);
+ }
+ }
+ GRN_OUTPUT_CSTR("buffer term");
+ GRN_OUTPUT_ARRAY_OPEN("TERMS", sb->header.nterms);
+
+ for (bt = sb->terms, n = sb->header.nterms; n; n--, bt++) {
+ char key[GRN_TABLE_MAX_KEY_SIZE];
+ int key_size;
+ uint16_t nextb;
+ uint32_t nchunks = 0;
+ chunk_info *cinfo = NULL;
+ grn_id crid = GRN_ID_NIL;
+ docinfo bid = {0, 0};
+ uint32_t sdf = 0, snn = 0;
+ uint32_t *srp = NULL, *ssp = NULL, *stp = NULL, *sop = NULL, *snp = NULL;
+ if (!bt->tid) {
+ nterms_void++;
+ continue;
+ }
+ key_size = grn_table_get_key(ctx, ii->lexicon, bt->tid, key, GRN_TABLE_MAX_KEY_SIZE);
+ GRN_OUTPUT_STR(key, key_size);
+ nextb = bt->pos_in_buffer;
+ size_in_buffer += bt->size_in_buffer;
+ GETNEXTB();
+ if (sc && bt->size_in_chunk) {
+ uint8_t *scp = sc + bt->pos_in_chunk;
+ uint8_t *sce = scp + bt->size_in_chunk;
+ size_t size = S_SEGMENT * ii->n_elements;
+ if ((bt->tid & CHUNK_SPLIT)) {
+ int i;
+ GRN_B_DEC(nchunks, scp);
+ if (!(cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) {
+ datavec_fin(ctx, rdv);
+ return;
+ }
+ for (i = 0; i < nchunks; i++) {
+ GRN_B_DEC(cinfo[i].segno, scp);
+ GRN_B_DEC(cinfo[i].size, scp);
+ GRN_B_DEC(cinfo[i].dgap, scp);
+ crid += cinfo[i].dgap;
+ }
+ }
+ if (sce > scp) {
+ size += grn_p_decv(ctx, scp, sce - scp, rdv, ii->n_elements);
+ {
+ int j = 0;
+ sdf = rdv[j].data_size;
+ srp = rdv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; }
+ if (sdf != rdv[j].data_size) {
+ nerrors++;
+ }
+ stp = rdv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; }
+ snn = rdv[j].data_size;
+ snp = rdv[j].data;
+ }
+ nterm_with_chunk++;
+ }
+ }
+ if (cinfo) { GRN_FREE(cinfo); }
+ }
+ GRN_OUTPUT_ARRAY_CLOSE();
- GRN_OUTPUT_CSTR("chunk");
- GRN_OUTPUT_INT64(sb->header.chunk);
- GRN_OUTPUT_CSTR("chunk size");
- GRN_OUTPUT_INT64(sb->header.chunk_size);
GRN_OUTPUT_CSTR("buffer free");
GRN_OUTPUT_INT64(sb->header.buffer_free);
+ GRN_OUTPUT_CSTR("size in buffer");
+ GRN_OUTPUT_INT64(size_in_buffer);
GRN_OUTPUT_CSTR("nterms");
GRN_OUTPUT_INT64(sb->header.nterms);
- GRN_OUTPUT_CSTR("nterms void");
- GRN_OUTPUT_INT64(sb->header.nterms_void);
-
- if ((scn = sb->header.chunk) != NOT_ASSIGNED &&
- (sc = WIN_MAP2(ii->chunk, ctx, &sw, scn, 0,
- sb->header.chunk_size, grn_io_rdonly))) {
- buffer_term *bt;
- uint8_t *sbp = NULL;
- datavec rdv[MAX_N_ELEMENTS + 1];
- uint16_t n = sb->header.nterms, nterms_void = 0;
- datavec_init(ctx, rdv, ii->n_elements, 0, 0);
- if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
- rdv[ii->n_elements - 1].flags = ODD;
- }
- for (bt = sb->terms; n; n--, bt++) {
- uint16_t nextb;
- uint64_t spos = 0;
- uint32_t *ridp, *sidp = NULL, *tfp, *weightp = NULL, *posp, nchunks = 0;
- chunk_info *cinfo = NULL;
- grn_id crid = GRN_ID_NIL;
- docinfo cid = {0, 0, 0, 0, 0}, lid = {0, 0, 0, 0, 0}, bid = {0, 0};
- uint32_t sdf = 0, snn = 0;
- uint32_t *srp = NULL, *ssp = NULL, *stp = NULL, *sop = NULL, *snp = NULL;
- if (!bt->tid) {
- nterms_void++;
- continue;
- }
- nextb = bt->pos_in_buffer;
- GETNEXTB();
- if (sc && bt->size_in_chunk) {
- uint8_t *scp = sc + bt->pos_in_chunk;
- uint8_t *sce = scp + bt->size_in_chunk;
- size_t size = S_SEGMENT * ii->n_elements;
- if ((bt->tid & CHUNK_SPLIT)) {
- int i;
- GRN_B_DEC(nchunks, scp);
- if (!(cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) {
- datavec_fin(ctx, rdv);
- return;
- }
- for (i = 0; i < nchunks; i++) {
- GRN_B_DEC(cinfo[i].segno, scp);
- GRN_B_DEC(cinfo[i].size, scp);
- GRN_B_DEC(cinfo[i].dgap, scp);
- crid += cinfo[i].dgap;
- }
- }
- if (sce > scp) {
- size += grn_p_decv(ctx, scp, sce - scp, rdv, ii->n_elements);
- {
- int j = 0;
- sdf = rdv[j].data_size;
- srp = rdv[j++].data;
- if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; }
- stp = rdv[j++].data;
- if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; }
- snn = rdv[j].data_size;
- snp = rdv[j].data;
- }
- }
- }
- GETNEXTC();
- /*
- {
- grn_obj buf;
- uint32_t rid, sid, tf, i, pos, *pp;
- GRN_TEXT_INIT(&buf, 0);
- rid = 0;
- pp = dv[3].data;
- for (i = 0; i < ndf; i++) {
- GRN_BULK_REWIND(&buf);
- rid += dv[0].data[i];
- if (dv[0].data[i]) { sid = 0; }
- sid += dv[1].data[i] + 1;
- tf = dv[2].data[i] + 1;
- pos = 0;
- grn_text_itoa(ctx, &buf, rid);
- GRN_TEXT_PUTC(ctx, &buf, ':');
- grn_text_itoa(ctx, &buf, sid);
- GRN_TEXT_PUTC(ctx, &buf, ':');
- grn_text_itoa(ctx, &buf, tf);
- GRN_TEXT_PUTC(ctx, &buf, ':');
- while (tf--) {
- pos += *pp++;
- grn_text_itoa(ctx, &buf, pos);
- if (tf) { GRN_TEXT_PUTC(ctx, &buf, ','); }
- }
- GRN_TEXT_PUTC(ctx, &buf, '\0');
- GRN_LOG(ctx, GRN_LOG_NOTICE, "Posting:%s", GRN_TEXT_VALUE(&buf));
- }
- GRN_OBJ_FIN(ctx, &buf);
- }
- */
-
- if (cinfo) { GRN_FREE(cinfo); }
- }
- datavec_fin(ctx, rdv);
- grn_io_win_unmap2(&sw);
+ if (nterms_void != sb->header.nterms_void) {
+ GRN_OUTPUT_CSTR("nterms void gap");
+ GRN_OUTPUT_INT64(nterms_void - sb->header.nterms_void);
}
+ GRN_OUTPUT_CSTR("nterms with chunk");
+ GRN_OUTPUT_INT64(nterm_with_chunk);
+ if (nerrors) {
+ GRN_OUTPUT_CSTR("nterms with corrupt chunk");
+ GRN_OUTPUT_INT64(nerrors);
+ }
+ datavec_fin(ctx, rdv);
+ if (sc) { grn_io_win_unmap2(&sw); }
+
buffer_close(ctx, ii, pseg);
}