Kouhei Sutou
null+****@clear*****
Sun Apr 5 23:56:56 JST 2015
Kouhei Sutou 2015-04-05 23:56:56 +0900 (Sun, 05 Apr 2015) New Revision: c13d7da7be33e0270615b85b91fe3ab62f90e409 https://github.com/pgroonga/pgroonga/commit/c13d7da7be33e0270615b85b91fe3ab62f90e409 Message: Re-support pgroonga.score() It requires primary key in index target columns. Modified files: expected/full-text-search/text/single/score/and.out expected/full-text-search/text/single/score/or.out expected/full-text-search/text/single/score/updated.out pgroonga.c Renamed files: sql/full-text-search/text/single/score/and.sql (from sql/full-text-search/text/single/score/and.sql.disabled) sql/full-text-search/text/single/score/or.sql (from sql/full-text-search/text/single/score/or.sql.disabled) sql/full-text-search/text/single/score/updated.sql (from sql/full-text-search/text/single/score/updated.sql.disabled) Modified: expected/full-text-search/text/single/score/and.out (+2 -2) =================================================================== --- expected/full-text-search/text/single/score/and.out 2015-04-05 16:03:04 +0900 (b9e5db7) +++ expected/full-text-search/text/single/score/and.out 2015-04-05 23:56:56 +0900 (8b79c9f) @@ -1,11 +1,11 @@ CREATE TABLE memos ( - id integer, + id integer PRIMARY KEY, content text ); INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); -CREATE INDEX grnindex ON memos USING pgroonga (content); +CREATE INDEX grnindex ON memos USING pgroonga (id, content); SET enable_seqscan = off; SET enable_indexscan = on; SET enable_bitmapscan = off; Modified: expected/full-text-search/text/single/score/or.out (+2 -2) =================================================================== --- expected/full-text-search/text/single/score/or.out 2015-04-05 16:03:04 +0900 (fe0b3e8) +++ expected/full-text-search/text/single/score/or.out 2015-04-05 23:56:56 +0900 (e8087dc) @@ -1,11 +1,11 @@ CREATE TABLE memos ( - id integer, + id integer PRIMARY KEY, content text ); INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); -CREATE INDEX grnindex ON memos USING pgroonga (content); +CREATE INDEX grnindex ON memos USING pgroonga (id, content); SET enable_seqscan = off; SET enable_indexscan = off; SET enable_bitmapscan = on; Modified: expected/full-text-search/text/single/score/updated.out (+2 -2) =================================================================== --- expected/full-text-search/text/single/score/updated.out 2015-04-05 16:03:04 +0900 (c79db68) +++ expected/full-text-search/text/single/score/updated.out 2015-04-05 23:56:56 +0900 (4d12381) @@ -1,8 +1,8 @@ CREATE TABLE memos ( - id integer, + id integer PRIMARY KEY, content text ); -CREATE INDEX grnindex ON memos USING pgroonga (content); +CREATE INDEX grnindex ON memos USING pgroonga (id, content); INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); Modified: pgroonga.c (+186 -25) =================================================================== --- pgroonga.c 2015-04-05 16:03:04 +0900 (cf734d6) +++ pgroonga.c 2015-04-05 23:56:56 +0900 (ecdfc92) @@ -14,13 +14,16 @@ #include <lib/ilist.h> #include <mb/pg_wchar.h> #include <miscadmin.h> +#include <storage/bufmgr.h> #include <storage/ipc.h> #include <storage/lmgr.h> #include <utils/array.h> #include <utils/builtins.h> #include <utils/lsyscache.h> #include <utils/selfuncs.h> +#include <utils/snapmgr.h> #include <utils/timestamp.h> +#include <utils/tqual.h> #include <utils/typcache.h> #include <groonga.h> @@ -69,6 +72,15 @@ typedef struct PGrnScanOpaqueData { slist_node node; Oid dataTableID; + struct + { + AttrNumber number; + Oid type; + grn_id domain; + unsigned char flags; + grn_obj *lexicon; + grn_obj *indexColumn; + } primaryKey; grn_obj *sourcesTable; grn_obj *sourcesCtidColumn; grn_obj minBorderValue; @@ -124,6 +136,7 @@ static grn_ctx grnContext; static grn_ctx *ctx = &grnContext; static grn_obj buffer; static grn_obj ctidBuffer; +static grn_obj scoreBuffer; static grn_obj headBuffer; static grn_obj bodyBuffer; static grn_obj footBuffer; @@ -199,6 +212,7 @@ PGrnOnProcExit(int code, Datum arg) GRN_OBJ_FIN(ctx, &bodyBuffer); GRN_OBJ_FIN(ctx, &headBuffer); GRN_OBJ_FIN(ctx, &ctidBuffer); + GRN_OBJ_FIN(ctx, &scoreBuffer); GRN_OBJ_FIN(ctx, &buffer); db = grn_ctx_db(ctx); @@ -332,6 +346,7 @@ _PG_init(void) on_proc_exit(PGrnOnProcExit, 0); GRN_VOID_INIT(&buffer); + GRN_FLOAT_INIT(&scoreBuffer, 0); GRN_UINT64_INIT(&ctidBuffer, 0); GRN_TEXT_INIT(&headBuffer, 0); GRN_TEXT_INIT(&bodyBuffer, 0); @@ -892,40 +907,89 @@ UInt64ToCtid(uint64 key) return ctid; } +static bool +PGrnIsAliveCtid(Relation table, ItemPointer ctid) +{ + Buffer buffer; + HeapTupleData tuple; + Snapshot snapshot; + bool allDead; + bool found; + bool isAlive; + + buffer = ReadBuffer(table, ItemPointerGetBlockNumber(ctid)); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + found = heap_hot_search_buffer(ctid, table, buffer, snapshot, &tuple, + &allDead, true); + isAlive = (found && CtidToUInt64(&(tuple.t_self)) == CtidToUInt64(ctid)); + UnregisterSnapshot(snapshot); + ReleaseBuffer(buffer); + + return isAlive; +} + static double -PGrnCollectScore(Oid tableID, ItemPointer ctid) +PGrnCollectScoreScanOpaque(Relation table, HeapTuple tuple, PGrnScanOpaque so) { double score = 0.0; - uint64_t key; - grn_id recordID = GRN_ID_NIL; - slist_iter iter; - grn_obj scoreBuffer; + TupleDesc desc; + bool isNULL; + Datum primaryKeyValue; + grn_table_cursor *tableCursor; + grn_obj *indexCursor; + grn_id recordID; - key = CtidToUInt64(ctid); - GRN_FLOAT_INIT(&scoreBuffer, 0); + if (so->dataTableID != tuple->t_tableOid) + return 0.0; - slist_foreach(iter, &PGrnScanOpaques) - { - PGrnScanOpaque so; - grn_id id; + if (!so->scoreAccessor) + return 0.0; - so = slist_container(PGrnScanOpaqueData, node, iter.cur); - if (so->dataTableID != tableID) - continue; + if (!OidIsValid(so->primaryKey.type)) + return 0.0; - if (!so->scoreAccessor) - continue; + grn_obj_reinit(ctx, &buffer, so->primaryKey.domain, so->primaryKey.flags); + + desc = RelationGetDescr(table); + primaryKeyValue = heap_getattr(tuple, so->primaryKey.number, desc, &isNULL); + PGrnConvertDatum(primaryKeyValue, so->primaryKey.type, &buffer); + + tableCursor = grn_table_cursor_open(ctx, so->primaryKey.lexicon, + GRN_BULK_HEAD(&buffer), + GRN_BULK_VSIZE(&buffer), + GRN_BULK_HEAD(&buffer), + GRN_BULK_VSIZE(&buffer), + 0, -1, GRN_CURSOR_ASCENDING); + if (!tableCursor) + return 0.0; - if (recordID == GRN_ID_NIL) - { - recordID = grn_table_get(ctx, so->sourcesTable, - &key, sizeof(uint64_t)); - } + + indexCursor = grn_index_cursor_open(ctx, + tableCursor, + so->primaryKey.indexColumn, + GRN_ID_NIL, GRN_ID_MAX, 0); + if (!indexCursor) + { + grn_table_cursor_close(ctx, tableCursor); + return 0.0; + } + + while ((recordID = grn_table_cursor_next(ctx, indexCursor)) != GRN_ID_NIL) + { + grn_id id; + ItemPointerData ctid; id = grn_table_get(ctx, so->searched, &recordID, sizeof(grn_id)); if (id == GRN_ID_NIL) continue; + GRN_BULK_REWIND(&ctidBuffer); + grn_obj_get_value(ctx, so->ctidAccessor, id, &ctidBuffer); + ctid = UInt64ToCtid(GRN_UINT64_VALUE(&ctidBuffer)); + + if (!PGrnIsAliveCtid(table, &ctid)) + continue; + GRN_BULK_REWIND(&scoreBuffer); grn_obj_get_value(ctx, so->scoreAccessor, id, &scoreBuffer); if (scoreBuffer.header.domain == GRN_DB_FLOAT) @@ -938,7 +1002,25 @@ PGrnCollectScore(Oid tableID, ItemPointer ctid) } } - GRN_OBJ_FIN(ctx, &scoreBuffer); + grn_obj_unlink(ctx, indexCursor); + grn_obj_unlink(ctx, tableCursor); + + return score; +} + +static double +PGrnCollectScore(Relation table, HeapTuple tuple) +{ + double score = 0.0; + slist_iter iter; + + slist_foreach(iter, &PGrnScanOpaques) + { + PGrnScanOpaque so; + + so = slist_container(PGrnScanOpaqueData, node, iter.cur); + score += PGrnCollectScoreScanOpaque(table, tuple, so); + } return score; } @@ -959,10 +1041,22 @@ pgroonga_score(PG_FUNCTION_ARGS) recordType = HeapTupleHeaderGetTypMod(header); desc = lookup_rowtype_tupdesc(type, recordType); - if (desc->natts > 0) + if (desc->natts > 0 && !slist_is_empty(&PGrnScanOpaques)) { - Oid tableID = desc->attrs[0]->attrelid; - score = PGrnCollectScore(tableID, &(header->t_ctid)); + HeapTupleData tupleData; + HeapTuple tuple; + Relation table; + + tupleData.t_len = HeapTupleHeaderGetDatumLength(header); + tupleData.t_tableOid = desc->attrs[0]->attrelid; + tupleData.t_data = header; + tuple = &tupleData; + + table = RelationIdGetRelation(tuple->t_tableOid); + + score = PGrnCollectScore(table, tuple); + + RelationClose(table); } ReleaseTupleDesc(desc); @@ -1276,9 +1370,76 @@ pgroonga_insert(PG_FUNCTION_ARGS) } static void +PGrnScanOpaqueInitPrimaryKey(PGrnScanOpaque so, Relation index) +{ + Relation table; + AttrNumber primaryKeyNumber = InvalidAttrNumber; + Oid primaryKeyTypeID = InvalidOid; + bool havePrimaryKeyInIndex = false; + + table = RelationIdGetRelation(so->dataTableID); + if (OidIsValid(table->rd_replidindex)) + { + Relation primaryKeyIndex = NULL; + primaryKeyIndex = index_open(table->rd_replidindex, NoLock); + if (primaryKeyIndex->rd_index->indnatts == 1) + { + TupleDesc desc; + + primaryKeyNumber = primaryKeyIndex->rd_index->indkey.values[0]; + + desc = RelationGetDescr(table); + primaryKeyTypeID = desc->attrs[primaryKeyNumber - 1]->atttypid; + } + index_close(primaryKeyIndex, NoLock); + } + + if (AttributeNumberIsValid(primaryKeyNumber)) + { + int i, nColumns; + + nColumns = index->rd_index->indkey.ndim; + for (i = 0; i < nColumns; i++) + { + if (index->rd_index->indkey.values[i] == primaryKeyNumber) + { + havePrimaryKeyInIndex = true; + break; + } + } + } + + if (havePrimaryKeyInIndex) + { + so->primaryKey.number = primaryKeyNumber; + so->primaryKey.type = primaryKeyTypeID; + so->primaryKey.domain = PGrnGetType(index, + primaryKeyNumber, + &(so->primaryKey.flags)); + so->primaryKey.indexColumn = PGrnLookupIndexColumn(index, + primaryKeyNumber - 1, + ERROR); + so->primaryKey.lexicon = + grn_ctx_at(ctx, so->primaryKey.indexColumn->header.domain); + } + else + { + so->primaryKey.number = InvalidAttrNumber; + so->primaryKey.type = InvalidOid; + so->primaryKey.domain = GRN_ID_NIL; + so->primaryKey.flags = 0; + so->primaryKey.lexicon = NULL; + so->primaryKey.indexColumn = NULL; + } + + RelationClose(table); +} + +static void PGrnScanOpaqueInit(PGrnScanOpaque so, Relation index) { so->dataTableID = index->rd_index->indrelid; + PGrnScanOpaqueInitPrimaryKey(so, index); so->sourcesTable = PGrnLookupSourcesTable(index, ERROR); so->sourcesCtidColumn = PGrnLookupSourcesCtidColumn(index, ERROR); GRN_VOID_INIT(&(so->minBorderValue)); Renamed: sql/full-text-search/text/single/score/and.sql (+2 -2) 84% =================================================================== --- sql/full-text-search/text/single/score/and.sql.disabled 2015-04-05 16:03:04 +0900 (c06b2ba) +++ sql/full-text-search/text/single/score/and.sql 2015-04-05 23:56:56 +0900 (ba95096) @@ -1,5 +1,5 @@ CREATE TABLE memos ( - id integer, + id integer PRIMARY KEY, content text ); @@ -7,7 +7,7 @@ INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); -CREATE INDEX grnindex ON memos USING pgroonga (content); +CREATE INDEX grnindex ON memos USING pgroonga (id, content); SET enable_seqscan = off; SET enable_indexscan = on; Renamed: sql/full-text-search/text/single/score/or.sql (+2 -2) 84% =================================================================== --- sql/full-text-search/text/single/score/or.sql.disabled 2015-04-05 16:03:04 +0900 (3ed7173) +++ sql/full-text-search/text/single/score/or.sql 2015-04-05 23:56:56 +0900 (ee3b1bd) @@ -1,5 +1,5 @@ CREATE TABLE memos ( - id integer, + id integer PRIMARY KEY, content text ); @@ -7,7 +7,7 @@ INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); -CREATE INDEX grnindex ON memos USING pgroonga (content); +CREATE INDEX grnindex ON memos USING pgroonga (id, content); SET enable_seqscan = off; SET enable_indexscan = off; Renamed: sql/full-text-search/text/single/score/updated.sql (+2 -2) 86% =================================================================== --- sql/full-text-search/text/single/score/updated.sql.disabled 2015-04-05 16:03:04 +0900 (45802b0) +++ sql/full-text-search/text/single/score/updated.sql 2015-04-05 23:56:56 +0900 (2b1fb9e) @@ -1,9 +1,9 @@ CREATE TABLE memos ( - id integer, + id integer PRIMARY KEY, content text ); -CREATE INDEX grnindex ON memos USING pgroonga (content); +CREATE INDEX grnindex ON memos USING pgroonga (id, content); INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); -------------- next part -------------- HTML����������������������������...Download