Kouhei Sutou
null+****@clear*****
Wed Jan 27 00:05:02 JST 2016
Kouhei Sutou 2016-01-27 00:05:02 +0900 (Wed, 27 Jan 2016) New Revision: 8809a0414430a3d95e992dd35eac8f03aeb37485 https://github.com/pgroonga/pgroonga/commit/8809a0414430a3d95e992dd35eac8f03aeb37485 Message: Add &@> operator It matches when any query in RHS array is matched. Added files: expected/full-text-search/text/single/query-contain/bitmapscan.out expected/full-text-search/text/single/query-contain/indexscan.out expected/full-text-search/text/single/query-contain/seqscan.out sql/full-text-search/text/single/query-contain/bitmapscan.sql sql/full-text-search/text/single/query-contain/indexscan.sql sql/full-text-search/text/single/query-contain/seqscan.sql Modified files: pgroonga.sql src/pgroonga.c src/pgroonga.h Added: expected/full-text-search/text/single/query-contain/bitmapscan.out (+22 -0) 100644 =================================================================== --- /dev/null +++ expected/full-text-search/text/single/query-contain/bitmapscan.out 2016-01-27 00:05:02 +0900 (097ac75) @@ -0,0 +1,22 @@ +CREATE TABLE memos ( + id integer, + content text +); +INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); +INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); +INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); +CREATE INDEX pgrn_index ON memos + USING pgroonga (content pgroonga.text_full_text_search_ops_v2); +SET enable_seqscan = off; +SET enable_indexscan = off; +SET enable_bitmapscan = on; +SELECT id, content + FROM memos + WHERE content &@> Array['rdbms', 'engine']; + id | content +----+------------------------------------------ + 1 | PostgreSQL is a RDBMS. + 2 | Groonga is fast full text search engine. +(2 rows) + +DROP TABLE memos; Added: expected/full-text-search/text/single/query-contain/indexscan.out (+22 -0) 100644 =================================================================== --- /dev/null +++ expected/full-text-search/text/single/query-contain/indexscan.out 2016-01-27 00:05:02 +0900 (af38fc9) @@ -0,0 +1,22 @@ +CREATE TABLE memos ( + id integer, + content text +); +INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); +INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); +INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); +CREATE INDEX pgrn_index ON memos + USING pgroonga (content pgroonga.text_full_text_search_ops_v2); +SET enable_seqscan = off; +SET enable_indexscan = on; +SET enable_bitmapscan = off; +SELECT id, content + FROM memos + WHERE content &@> Array['rdbms', 'engine']; + id | content +----+------------------------------------------ + 1 | PostgreSQL is a RDBMS. + 2 | Groonga is fast full text search engine. +(2 rows) + +DROP TABLE memos; Added: expected/full-text-search/text/single/query-contain/seqscan.out (+20 -0) 100644 =================================================================== --- /dev/null +++ expected/full-text-search/text/single/query-contain/seqscan.out 2016-01-27 00:05:02 +0900 (d830ff5) @@ -0,0 +1,20 @@ +CREATE TABLE memos ( + id integer, + content text +); +INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); +INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); +INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); +SET enable_seqscan = on; +SET enable_indexscan = off; +SET enable_bitmapscan = off; +SELECT id, content + FROM memos + WHERE content &@> Array['rdbms', 'engine']; + id | content +----+------------------------------------------ + 1 | PostgreSQL is a RDBMS. + 2 | Groonga is fast full text search engine. +(2 rows) + +DROP TABLE memos; Modified: pgroonga.sql (+20 -1) =================================================================== --- pgroonga.sql 2016-01-26 22:56:58 +0900 (8d18d40) +++ pgroonga.sql 2016-01-27 00:05:02 +0900 (9dce550) @@ -206,7 +206,7 @@ CREATE FUNCTION pgroonga.options(internal) DELETE FROM pg_catalog.pg_am WHERE amname = 'pgroonga'; INSERT INTO pg_catalog.pg_am VALUES( 'pgroonga', -- amname - 11, -- amstrategies + 12, -- amstrategies 0, -- amsupport true, -- amcanorder true, -- amcanorderbyop @@ -372,3 +372,22 @@ CREATE OPERATOR CLASS pgroonga.text_regexp_ops FOR TYPE text CREATE OPERATOR CLASS pgroonga.varchar_regexp_ops FOR TYPE varchar USING pgroonga AS OPERATOR 10 @~; + + +/* v2 */ +CREATE FUNCTION pgroonga.query_contain_text(text, text[]) + RETURNS bool + AS 'MODULE_PATHNAME', 'pgroonga_query_contain_text' + LANGUAGE C + IMMUTABLE + STRICT; + +CREATE OPERATOR &@> ( + PROCEDURE = pgroonga.query_contain_text, + LEFTARG = text, + RIGHTARG = text[] +); + +CREATE OPERATOR CLASS pgroonga.text_full_text_search_ops_v2 FOR TYPE text + USING pgroonga AS + OPERATOR 12 &@> (text, text[]); Added: sql/full-text-search/text/single/query-contain/bitmapscan.sql (+21 -0) 100644 =================================================================== --- /dev/null +++ sql/full-text-search/text/single/query-contain/bitmapscan.sql 2016-01-27 00:05:02 +0900 (d2bfdb1) @@ -0,0 +1,21 @@ +CREATE TABLE memos ( + id integer, + content text +); + +INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); +INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); +INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); + +CREATE INDEX pgrn_index ON memos + USING pgroonga (content pgroonga.text_full_text_search_ops_v2); + +SET enable_seqscan = off; +SET enable_indexscan = off; +SET enable_bitmapscan = on; + +SELECT id, content + FROM memos + WHERE content &@> Array['rdbms', 'engine']; + +DROP TABLE memos; Added: sql/full-text-search/text/single/query-contain/indexscan.sql (+21 -0) 100644 =================================================================== --- /dev/null +++ sql/full-text-search/text/single/query-contain/indexscan.sql 2016-01-27 00:05:02 +0900 (60fed74) @@ -0,0 +1,21 @@ +CREATE TABLE memos ( + id integer, + content text +); + +INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); +INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); +INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); + +CREATE INDEX pgrn_index ON memos + USING pgroonga (content pgroonga.text_full_text_search_ops_v2); + +SET enable_seqscan = off; +SET enable_indexscan = on; +SET enable_bitmapscan = off; + +SELECT id, content + FROM memos + WHERE content &@> Array['rdbms', 'engine']; + +DROP TABLE memos; Added: sql/full-text-search/text/single/query-contain/seqscan.sql (+18 -0) 100644 =================================================================== --- /dev/null +++ sql/full-text-search/text/single/query-contain/seqscan.sql 2016-01-27 00:05:02 +0900 (1897290) @@ -0,0 +1,18 @@ +CREATE TABLE memos ( + id integer, + content text +); + +INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.'); +INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.'); +INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.'); + +SET enable_seqscan = on; +SET enable_indexscan = off; +SET enable_bitmapscan = off; + +SELECT id, content + FROM memos + WHERE content &@> Array['rdbms', 'engine']; + +DROP TABLE memos; Modified: src/pgroonga.c (+198 -70) =================================================================== --- src/pgroonga.c 2016-01-26 22:56:58 +0900 (8b8e598) +++ src/pgroonga.c 2016-01-27 00:05:02 +0900 (5d28a8f) @@ -130,6 +130,9 @@ PG_FUNCTION_INFO_V1(pgroonga_match_query_varchar); PG_FUNCTION_INFO_V1(pgroonga_match_regexp_text); PG_FUNCTION_INFO_V1(pgroonga_match_regexp_varchar); +/* v2 */ +PG_FUNCTION_INFO_V1(pgroonga_query_contain_text); + PG_FUNCTION_INFO_V1(pgroonga_insert); PG_FUNCTION_INFO_V1(pgroonga_beginscan); PG_FUNCTION_INFO_V1(pgroonga_gettuple); @@ -304,17 +307,12 @@ _PG_init(void) } static grn_id -PGrnGetType(Relation index, AttrNumber n, unsigned char *flags) +PGrnPGTypeToGrnType(Oid pgTypeID, unsigned char *flags) { - TupleDesc desc = RelationGetDescr(index); - Form_pg_attribute attr; grn_id typeID = GRN_ID_NIL; unsigned char typeFlags = 0; - int32 maxLength; - attr = desc->attrs[n]; - - switch (attr->atttypid) + switch (pgTypeID) { case BOOLOID: typeID = GRN_DB_BOOL; @@ -341,15 +339,6 @@ PGrnGetType(Relation index, AttrNumber n, unsigned char *flags) typeID = GRN_DB_LONG_TEXT; break; case VARCHAROID: - maxLength = type_maximum_size(attr->atttypid, attr->atttypmod); - if (maxLength > 4096) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("pgroonga: " - "4097bytes over size varchar isn't supported: %d", - maxLength))); - } typeID = GRN_DB_SHORT_TEXT; /* 4KB */ break; #ifdef NOT_USED @@ -358,16 +347,6 @@ PGrnGetType(Relation index, AttrNumber n, unsigned char *flags) break; #endif case VARCHARARRAYOID: - maxLength = type_maximum_size(VARCHAROID, attr->atttypmod); - if (maxLength > 4096) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("pgroonga: " - "array of 4097bytes over size varchar " - "isn't supported: %d", - maxLength))); - } typeID = GRN_DB_SHORT_TEXT; typeFlags |= GRN_OBJ_VECTOR; break; @@ -378,7 +357,7 @@ PGrnGetType(Relation index, AttrNumber n, unsigned char *flags) default: ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("pgroonga: unsupported type: %u", attr->atttypid))); + errmsg("pgroonga: unsupported type: %u", pgTypeID))); break; } @@ -390,6 +369,35 @@ PGrnGetType(Relation index, AttrNumber n, unsigned char *flags) return typeID; } +static grn_id +PGrnGetType(Relation index, AttrNumber n, unsigned char *flags) +{ + TupleDesc desc = RelationGetDescr(index); + Form_pg_attribute attr; + int32 maxLength; + + attr = desc->attrs[n]; + switch (attr->atttypid) + { + case VARCHAROID: + case VARCHARARRAYOID: + maxLength = type_maximum_size(VARCHAROID, attr->atttypmod); + if (maxLength > 4096) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("pgroonga: " + "4097bytes over size varchar isn't supported: %d", + maxLength))); + } + break; + default: + break; + } + + return PGrnPGTypeToGrnType(attr->atttypid, flags); +} + #ifdef PGRN_SUPPORT_INDEX_ONLY_SCAN static Datum PGrnConvertToDatumArrayType(grn_obj *vector, Oid typeID) @@ -517,13 +525,14 @@ PGrnConvertToDatum(grn_obj *value, Oid typeID) #endif static bool -PGrnIsForFullTextSearchIndex(Relation index, int nthAttribute) +PGrnIsQueryStrategyIndex(Relation index, int nthAttribute) { - Oid queryStrategyOID; + Oid strategyOID; Oid leftType; Oid rightType; leftType = index->rd_opcintype[nthAttribute]; + switch (leftType) { case VARCHARARRAYOID: @@ -536,11 +545,53 @@ PGrnIsForFullTextSearchIndex(Relation index, int nthAttribute) rightType = leftType; break; } - queryStrategyOID = get_opfamily_member(index->rd_opfamily[nthAttribute], - leftType, - rightType, - PGrnQueryStrategyNumber); - return OidIsValid(queryStrategyOID); + + strategyOID = get_opfamily_member(index->rd_opfamily[nthAttribute], + leftType, + rightType, + PGrnQueryStrategyNumber); + return OidIsValid(strategyOID); +} + +static bool +PGrnIsQueryContainStrategyIndex(Relation index, int nthAttribute) +{ + Oid strategyOID; + Oid leftType; + Oid rightType; + + leftType = index->rd_opcintype[nthAttribute]; + + switch (leftType) + { + case VARCHAROID: + rightType = VARCHARARRAYOID; + break; + case TEXTOID: + rightType = TEXTARRAYOID; + break; + default: + rightType = leftType; + break; + } + + strategyOID = get_opfamily_member(index->rd_opfamily[nthAttribute], + leftType, + rightType, + PGrnQueryContainStrategyNumber); + return OidIsValid(strategyOID); +} + +static bool +PGrnIsForFullTextSearchIndex(Relation index, int nthAttribute) +{ + if (PGrnIsQueryStrategyIndex(index, nthAttribute)) + return true; + + if (PGrnIsQueryContainStrategyIndex(index, nthAttribute)) + return true; + + return false; } static bool @@ -1374,6 +1425,41 @@ pgroonga_match_regexp_varchar(PG_FUNCTION_ARGS) PG_RETURN_BOOL(matched); } +/* v2 */ +/** + * pgroonga.query_contain(target text, queries text[]) : bool + */ +Datum +pgroonga_query_contain_text(PG_FUNCTION_ARGS) +{ + text *target = PG_GETARG_TEXT_PP(0); + ArrayType *queries = PG_GETARG_ARRAYTYPE_P(1); + grn_bool matched; + int i, n; + + n = ARR_DIMS(queries)[0]; + for (i = 1; i <= n; i++) + { + Datum queryDatum; + text *query; + bool isNULL; + + queryDatum = array_ref(queries, 1, &i, -1, -1, false, 'i', &isNULL); + if (isNULL) + continue; + + query = DatumGetTextPP(queryDatum); + matched = pgroonga_match_query_raw(VARDATA_ANY(target), + VARSIZE_ANY_EXHDR(target), + VARDATA_ANY(query), + VARSIZE_ANY_EXHDR(query)); + if (matched) + break; + } + + PG_RETURN_BOOL(matched); +} + static void PGrnInsert(Relation index, grn_obj *sourcesTable, @@ -1863,6 +1949,35 @@ PGrnSearchBuildConditionLikeRegexp(PGrnSearchData *data, grn_expr_append_op(ctx, expression, GRN_OP_REGEXP, 2); } +static void +PGrnSearchBuildConditionQuery(PGrnScanOpaque so, + PGrnSearchData *data, + grn_obj *targetColumn, + const char *query, + unsigned int querySize) +{ + grn_rc rc; + grn_obj *matchTarget, *matchTargetVariable; + grn_expr_flags flags = GRN_EXPR_SYNTAX_QUERY | GRN_EXPR_ALLOW_LEADING_NOT; + + GRN_EXPR_CREATE_FOR_QUERY(ctx, so->sourcesTable, + matchTarget, matchTargetVariable); + GRN_PTR_PUT(ctx, &(data->matchTargets), matchTarget); + grn_expr_append_obj(ctx, matchTarget, targetColumn, GRN_OP_PUSH, 1); + + rc = grn_expr_parse(ctx, data->expression, + query, querySize, + matchTarget, GRN_OP_MATCH, GRN_OP_AND, + flags); + if (rc != GRN_SUCCESS) + { + ereport(ERROR, + (errcode(PGrnRCToPgErrorCode(rc)), + errmsg("pgroonga: failed to parse expression: %s", + ctx->errbuf))); + } +} + static bool PGrnSearchBuildCondition(IndexScanDesc scan, PGrnScanOpaque so, @@ -1876,6 +1991,7 @@ PGrnSearchBuildCondition(IndexScanDesc scan, const char *targetColumnName; grn_obj *targetColumn; grn_operator operator = GRN_OP_NOP; + Oid valueTypeID; /* NULL key is not supported */ if (key->sk_flags & SK_ISNULL) @@ -1891,24 +2007,15 @@ PGrnSearchBuildCondition(IndexScanDesc scan, if (PGrnAttributeIsJSONB(attribute->atttypid)) return PGrnJSONBBuildSearchCondition(data, key, targetColumn); + valueTypeID = attribute->atttypid; + switch (valueTypeID) { - grn_id domain; - unsigned char flags = 0; - domain = PGrnGetType(index, key->sk_attno - 1, NULL); - grn_obj_reinit(ctx, &(buffers->general), domain, flags); - } - { - Oid valueTypeID = attribute->atttypid; - switch (valueTypeID) - { - case VARCHARARRAYOID: - valueTypeID = VARCHAROID; - break; - case TEXTARRAYOID: - valueTypeID = TEXTOID; - break; - } - PGrnConvertFromData(key->sk_argument, valueTypeID, &(buffers->general)); + case VARCHARARRAYOID: + valueTypeID = VARCHAROID; + break; + case TEXTARRAYOID: + valueTypeID = TEXTOID; + break; } switch (key->sk_strategy) @@ -1939,6 +2046,14 @@ PGrnSearchBuildCondition(IndexScanDesc scan, case PGrnRegexpStrategyNumber: operator = GRN_OP_REGEXP; break; + case PGrnQueryContainStrategyNumber: + switch (attribute->atttypid) + { + case TEXTOID: + valueTypeID = TEXTARRAYOID; + break; + } + break; default: ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -1947,6 +2062,14 @@ PGrnSearchBuildCondition(IndexScanDesc scan, break; } + { + grn_id domain; + unsigned char flags = 0; + domain = PGrnPGTypeToGrnType(valueTypeID, &flags); + grn_obj_reinit(ctx, &(buffers->general), domain, flags); + PGrnConvertFromData(key->sk_argument, valueTypeID, &(buffers->general)); + } + switch (key->sk_strategy) { case PGrnLikeStrategyNumber: @@ -1959,27 +2082,32 @@ PGrnSearchBuildCondition(IndexScanDesc scan, PGrnSearchBuildConditionLikeMatch(data, targetColumn, &(buffers->general)); break; case PGrnQueryStrategyNumber: + PGrnSearchBuildConditionQuery(so, + data, + targetColumn, + GRN_TEXT_VALUE(&(buffers->general)), + GRN_TEXT_LEN(&(buffers->general))); + break; + case PGrnQueryContainStrategyNumber: { - grn_rc rc; - grn_obj *matchTarget, *matchTargetVariable; - grn_expr_flags flags = - GRN_EXPR_SYNTAX_QUERY | GRN_EXPR_ALLOW_LEADING_NOT; - - GRN_EXPR_CREATE_FOR_QUERY(ctx, so->sourcesTable, - matchTarget, matchTargetVariable); - GRN_PTR_PUT(ctx, &(data->matchTargets), matchTarget); - grn_expr_append_obj(ctx, matchTarget, targetColumn, GRN_OP_PUSH, 1); - - rc = grn_expr_parse(ctx, data->expression, - GRN_TEXT_VALUE(&(buffers->general)), GRN_TEXT_LEN(&(buffers->general)), - matchTarget, GRN_OP_MATCH, GRN_OP_AND, - flags); - if (rc != GRN_SUCCESS) + grn_obj *queries = &(buffers->general); + unsigned int i, n; + + n = grn_vector_size(ctx, queries); + for (i = 0; i < n; i++) { - ereport(ERROR, - (errcode(PGrnRCToPgErrorCode(rc)), - errmsg("pgroonga: failed to parse expression: %s", - ctx->errbuf))); + const char *query; + unsigned int querySize; + + querySize = grn_vector_get_element(ctx, queries, i, + &query, NULL, NULL); + PGrnSearchBuildConditionQuery(so, + data, + targetColumn, + query, + querySize); + if (i > 0) + grn_expr_append_op(ctx, data->expression, GRN_OP_OR, 2); } break; } Modified: src/pgroonga.h (+2 -0) =================================================================== --- src/pgroonga.h 2016-01-26 22:56:58 +0900 (54b34c6) +++ src/pgroonga.h 2016-01-27 00:05:02 +0900 (a933373) @@ -24,6 +24,8 @@ #define PGrnRegexpStrategyNumber 10 /* operator @~ (@~ in Groonga) */ #define PGrnJSONContainStrategyNumber 11 /* operator @> */ +#define PGrnQueryContainStrategyNumber 12 /* operator &@> */ + /* file and table names */ #define PGrnLogBasename "pgroonga.log" #define PGrnDatabaseBasename "pgrn" -------------- next part -------------- HTML����������������������������...Download