[Groonga-commit] pgroonga/pgroonga at 8809a04 [master] Add &@> operator

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Jan 27 00:05:02 JST 2016


Kouhei Sutou	2016-01-27 00:05:02 +0900 (Wed, 27 Jan 2016)

  New Revision: 8809a0414430a3d95e992dd35eac8f03aeb37485
  https://github.com/pgroonga/pgroonga/commit/8809a0414430a3d95e992dd35eac8f03aeb37485

  Message:
    Add &@> operator
    
    It matches when any query in RHS array is matched.

  Added files:
    expected/full-text-search/text/single/query-contain/bitmapscan.out
    expected/full-text-search/text/single/query-contain/indexscan.out
    expected/full-text-search/text/single/query-contain/seqscan.out
    sql/full-text-search/text/single/query-contain/bitmapscan.sql
    sql/full-text-search/text/single/query-contain/indexscan.sql
    sql/full-text-search/text/single/query-contain/seqscan.sql
  Modified files:
    pgroonga.sql
    src/pgroonga.c
    src/pgroonga.h

  Added: expected/full-text-search/text/single/query-contain/bitmapscan.out (+22 -0) 100644
===================================================================
--- /dev/null
+++ expected/full-text-search/text/single/query-contain/bitmapscan.out    2016-01-27 00:05:02 +0900 (097ac75)
@@ -0,0 +1,22 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+CREATE INDEX pgrn_index ON memos
+ USING pgroonga (content pgroonga.text_full_text_search_ops_v2);
+SET enable_seqscan = off;
+SET enable_indexscan = off;
+SET enable_bitmapscan = on;
+SELECT id, content
+  FROM memos
+ WHERE content &@> Array['rdbms', 'engine'];
+ id |                 content                  
+----+------------------------------------------
+  1 | PostgreSQL is a RDBMS.
+  2 | Groonga is fast full text search engine.
+(2 rows)
+
+DROP TABLE memos;

  Added: expected/full-text-search/text/single/query-contain/indexscan.out (+22 -0) 100644
===================================================================
--- /dev/null
+++ expected/full-text-search/text/single/query-contain/indexscan.out    2016-01-27 00:05:02 +0900 (af38fc9)
@@ -0,0 +1,22 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+CREATE INDEX pgrn_index ON memos
+ USING pgroonga (content pgroonga.text_full_text_search_ops_v2);
+SET enable_seqscan = off;
+SET enable_indexscan = on;
+SET enable_bitmapscan = off;
+SELECT id, content
+  FROM memos
+ WHERE content &@> Array['rdbms', 'engine'];
+ id |                 content                  
+----+------------------------------------------
+  1 | PostgreSQL is a RDBMS.
+  2 | Groonga is fast full text search engine.
+(2 rows)
+
+DROP TABLE memos;

  Added: expected/full-text-search/text/single/query-contain/seqscan.out (+20 -0) 100644
===================================================================
--- /dev/null
+++ expected/full-text-search/text/single/query-contain/seqscan.out    2016-01-27 00:05:02 +0900 (d830ff5)
@@ -0,0 +1,20 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+SET enable_seqscan = on;
+SET enable_indexscan = off;
+SET enable_bitmapscan = off;
+SELECT id, content
+  FROM memos
+ WHERE content &@> Array['rdbms', 'engine'];
+ id |                 content                  
+----+------------------------------------------
+  1 | PostgreSQL is a RDBMS.
+  2 | Groonga is fast full text search engine.
+(2 rows)
+
+DROP TABLE memos;

  Modified: pgroonga.sql (+20 -1)
===================================================================
--- pgroonga.sql    2016-01-26 22:56:58 +0900 (8d18d40)
+++ pgroonga.sql    2016-01-27 00:05:02 +0900 (9dce550)
@@ -206,7 +206,7 @@ CREATE FUNCTION pgroonga.options(internal)
 DELETE FROM pg_catalog.pg_am WHERE amname = 'pgroonga';
 INSERT INTO pg_catalog.pg_am VALUES(
 	'pgroonga',	-- amname
-	11,		-- amstrategies
+	12,		-- amstrategies
 	0,		-- amsupport
 	true,		-- amcanorder
 	true,		-- amcanorderbyop
@@ -372,3 +372,22 @@ CREATE OPERATOR CLASS pgroonga.text_regexp_ops FOR TYPE text
 CREATE OPERATOR CLASS pgroonga.varchar_regexp_ops FOR TYPE varchar
 	USING pgroonga AS
 		OPERATOR 10 @~;
+
+
+/* v2 */
+CREATE FUNCTION pgroonga.query_contain_text(text, text[])
+	RETURNS bool
+	AS 'MODULE_PATHNAME', 'pgroonga_query_contain_text'
+	LANGUAGE C
+	IMMUTABLE
+	STRICT;
+
+CREATE OPERATOR &@> (
+	PROCEDURE = pgroonga.query_contain_text,
+	LEFTARG = text,
+	RIGHTARG = text[]
+);
+
+CREATE OPERATOR CLASS pgroonga.text_full_text_search_ops_v2 FOR TYPE text
+	USING pgroonga AS
+		OPERATOR 12 &@> (text, text[]);

  Added: sql/full-text-search/text/single/query-contain/bitmapscan.sql (+21 -0) 100644
===================================================================
--- /dev/null
+++ sql/full-text-search/text/single/query-contain/bitmapscan.sql    2016-01-27 00:05:02 +0900 (d2bfdb1)
@@ -0,0 +1,21 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+
+CREATE INDEX pgrn_index ON memos
+ USING pgroonga (content pgroonga.text_full_text_search_ops_v2);
+
+SET enable_seqscan = off;
+SET enable_indexscan = off;
+SET enable_bitmapscan = on;
+
+SELECT id, content
+  FROM memos
+ WHERE content &@> Array['rdbms', 'engine'];
+
+DROP TABLE memos;

  Added: sql/full-text-search/text/single/query-contain/indexscan.sql (+21 -0) 100644
===================================================================
--- /dev/null
+++ sql/full-text-search/text/single/query-contain/indexscan.sql    2016-01-27 00:05:02 +0900 (60fed74)
@@ -0,0 +1,21 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+
+CREATE INDEX pgrn_index ON memos
+ USING pgroonga (content pgroonga.text_full_text_search_ops_v2);
+
+SET enable_seqscan = off;
+SET enable_indexscan = on;
+SET enable_bitmapscan = off;
+
+SELECT id, content
+  FROM memos
+ WHERE content &@> Array['rdbms', 'engine'];
+
+DROP TABLE memos;

  Added: sql/full-text-search/text/single/query-contain/seqscan.sql (+18 -0) 100644
===================================================================
--- /dev/null
+++ sql/full-text-search/text/single/query-contain/seqscan.sql    2016-01-27 00:05:02 +0900 (1897290)
@@ -0,0 +1,18 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+
+SET enable_seqscan = on;
+SET enable_indexscan = off;
+SET enable_bitmapscan = off;
+
+SELECT id, content
+  FROM memos
+ WHERE content &@> Array['rdbms', 'engine'];
+
+DROP TABLE memos;

  Modified: src/pgroonga.c (+198 -70)
===================================================================
--- src/pgroonga.c    2016-01-26 22:56:58 +0900 (8b8e598)
+++ src/pgroonga.c    2016-01-27 00:05:02 +0900 (5d28a8f)
@@ -130,6 +130,9 @@ PG_FUNCTION_INFO_V1(pgroonga_match_query_varchar);
 PG_FUNCTION_INFO_V1(pgroonga_match_regexp_text);
 PG_FUNCTION_INFO_V1(pgroonga_match_regexp_varchar);
 
+/* v2 */
+PG_FUNCTION_INFO_V1(pgroonga_query_contain_text);
+
 PG_FUNCTION_INFO_V1(pgroonga_insert);
 PG_FUNCTION_INFO_V1(pgroonga_beginscan);
 PG_FUNCTION_INFO_V1(pgroonga_gettuple);
@@ -304,17 +307,12 @@ _PG_init(void)
 }
 
 static grn_id
-PGrnGetType(Relation index, AttrNumber n, unsigned char *flags)
+PGrnPGTypeToGrnType(Oid pgTypeID, unsigned char *flags)
 {
-	TupleDesc desc = RelationGetDescr(index);
-	Form_pg_attribute attr;
 	grn_id typeID = GRN_ID_NIL;
 	unsigned char typeFlags = 0;
-	int32 maxLength;
 
-	attr = desc->attrs[n];
-
-	switch (attr->atttypid)
+	switch (pgTypeID)
 	{
 	case BOOLOID:
 		typeID = GRN_DB_BOOL;
@@ -341,15 +339,6 @@ PGrnGetType(Relation index, AttrNumber n, unsigned char *flags)
 		typeID = GRN_DB_LONG_TEXT;
 		break;
 	case VARCHAROID:
-		maxLength = type_maximum_size(attr->atttypid, attr->atttypmod);
-		if (maxLength > 4096)
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("pgroonga: "
-							"4097bytes over size varchar isn't supported: %d",
-							maxLength)));
-		}
 		typeID = GRN_DB_SHORT_TEXT;	/* 4KB */
 		break;
 #ifdef NOT_USED
@@ -358,16 +347,6 @@ PGrnGetType(Relation index, AttrNumber n, unsigned char *flags)
 		break;
 #endif
 	case VARCHARARRAYOID:
-		maxLength = type_maximum_size(VARCHAROID, attr->atttypmod);
-		if (maxLength > 4096)
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("pgroonga: "
-							"array of 4097bytes over size varchar "
-							"isn't supported: %d",
-							maxLength)));
-		}
 		typeID = GRN_DB_SHORT_TEXT;
 		typeFlags |= GRN_OBJ_VECTOR;
 		break;
@@ -378,7 +357,7 @@ PGrnGetType(Relation index, AttrNumber n, unsigned char *flags)
 	default:
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("pgroonga: unsupported type: %u", attr->atttypid)));
+				 errmsg("pgroonga: unsupported type: %u", pgTypeID)));
 		break;
 	}
 
@@ -390,6 +369,35 @@ PGrnGetType(Relation index, AttrNumber n, unsigned char *flags)
 	return typeID;
 }
 
+static grn_id
+PGrnGetType(Relation index, AttrNumber n, unsigned char *flags)
+{
+	TupleDesc desc = RelationGetDescr(index);
+	Form_pg_attribute attr;
+	int32 maxLength;
+
+	attr = desc->attrs[n];
+	switch (attr->atttypid)
+	{
+	case VARCHAROID:
+	case VARCHARARRAYOID:
+		maxLength = type_maximum_size(VARCHAROID, attr->atttypmod);
+		if (maxLength > 4096)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("pgroonga: "
+							"4097bytes over size varchar isn't supported: %d",
+							maxLength)));
+		}
+		break;
+	default:
+		break;
+	}
+
+	return PGrnPGTypeToGrnType(attr->atttypid, flags);
+}
+
 #ifdef PGRN_SUPPORT_INDEX_ONLY_SCAN
 static Datum
 PGrnConvertToDatumArrayType(grn_obj *vector, Oid typeID)
@@ -517,13 +525,14 @@ PGrnConvertToDatum(grn_obj *value, Oid typeID)
 #endif
 
 static bool
-PGrnIsForFullTextSearchIndex(Relation index, int nthAttribute)
+PGrnIsQueryStrategyIndex(Relation index, int nthAttribute)
 {
-	Oid queryStrategyOID;
+	Oid strategyOID;
 	Oid leftType;
 	Oid rightType;
 
 	leftType = index->rd_opcintype[nthAttribute];
+
 	switch (leftType)
 	{
 	case VARCHARARRAYOID:
@@ -536,11 +545,53 @@ PGrnIsForFullTextSearchIndex(Relation index, int nthAttribute)
 		rightType = leftType;
 		break;
 	}
-	queryStrategyOID = get_opfamily_member(index->rd_opfamily[nthAttribute],
-										   leftType,
-										   rightType,
-										   PGrnQueryStrategyNumber);
-	return OidIsValid(queryStrategyOID);
+
+	strategyOID = get_opfamily_member(index->rd_opfamily[nthAttribute],
+									  leftType,
+									  rightType,
+									  PGrnQueryStrategyNumber);
+	return OidIsValid(strategyOID);
+}
+
+static bool
+PGrnIsQueryContainStrategyIndex(Relation index, int nthAttribute)
+{
+	Oid strategyOID;
+	Oid leftType;
+	Oid rightType;
+
+	leftType = index->rd_opcintype[nthAttribute];
+
+	switch (leftType)
+	{
+	case VARCHAROID:
+		rightType = VARCHARARRAYOID;
+		break;
+	case TEXTOID:
+		rightType = TEXTARRAYOID;
+		break;
+	default:
+		rightType = leftType;
+		break;
+	}
+
+	strategyOID = get_opfamily_member(index->rd_opfamily[nthAttribute],
+									  leftType,
+									  rightType,
+									  PGrnQueryContainStrategyNumber);
+	return OidIsValid(strategyOID);
+}
+
+static bool
+PGrnIsForFullTextSearchIndex(Relation index, int nthAttribute)
+{
+	if (PGrnIsQueryStrategyIndex(index, nthAttribute))
+		return true;
+
+	if (PGrnIsQueryContainStrategyIndex(index, nthAttribute))
+		return true;
+
+	return false;
 }
 
 static bool
@@ -1374,6 +1425,41 @@ pgroonga_match_regexp_varchar(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(matched);
 }
 
+/* v2 */
+/**
+ * pgroonga.query_contain(target text, queries text[]) : bool
+ */
+Datum
+pgroonga_query_contain_text(PG_FUNCTION_ARGS)
+{
+	text *target = PG_GETARG_TEXT_PP(0);
+	ArrayType *queries = PG_GETARG_ARRAYTYPE_P(1);
+	grn_bool matched;
+	int i, n;
+
+	n = ARR_DIMS(queries)[0];
+	for (i = 1; i <= n; i++)
+	{
+		Datum queryDatum;
+		text *query;
+		bool isNULL;
+
+		queryDatum = array_ref(queries, 1, &i, -1, -1, false, 'i', &isNULL);
+		if (isNULL)
+			continue;
+
+		query = DatumGetTextPP(queryDatum);
+		matched = pgroonga_match_query_raw(VARDATA_ANY(target),
+										   VARSIZE_ANY_EXHDR(target),
+										   VARDATA_ANY(query),
+										   VARSIZE_ANY_EXHDR(query));
+		if (matched)
+			break;
+	}
+
+	PG_RETURN_BOOL(matched);
+}
+
 static void
 PGrnInsert(Relation index,
 		   grn_obj *sourcesTable,
@@ -1863,6 +1949,35 @@ PGrnSearchBuildConditionLikeRegexp(PGrnSearchData *data,
 	grn_expr_append_op(ctx, expression, GRN_OP_REGEXP, 2);
 }
 
+static void
+PGrnSearchBuildConditionQuery(PGrnScanOpaque so,
+							  PGrnSearchData *data,
+							  grn_obj *targetColumn,
+							  const char *query,
+							  unsigned int querySize)
+{
+	grn_rc rc;
+	grn_obj *matchTarget, *matchTargetVariable;
+	grn_expr_flags flags = GRN_EXPR_SYNTAX_QUERY | GRN_EXPR_ALLOW_LEADING_NOT;
+
+	GRN_EXPR_CREATE_FOR_QUERY(ctx, so->sourcesTable,
+							  matchTarget, matchTargetVariable);
+	GRN_PTR_PUT(ctx, &(data->matchTargets), matchTarget);
+	grn_expr_append_obj(ctx, matchTarget, targetColumn, GRN_OP_PUSH, 1);
+
+	rc = grn_expr_parse(ctx, data->expression,
+						query, querySize,
+						matchTarget, GRN_OP_MATCH, GRN_OP_AND,
+						flags);
+	if (rc != GRN_SUCCESS)
+	{
+		ereport(ERROR,
+				(errcode(PGrnRCToPgErrorCode(rc)),
+				 errmsg("pgroonga: failed to parse expression: %s",
+						ctx->errbuf)));
+	}
+}
+
 static bool
 PGrnSearchBuildCondition(IndexScanDesc scan,
 						 PGrnScanOpaque so,
@@ -1876,6 +1991,7 @@ PGrnSearchBuildCondition(IndexScanDesc scan,
 	const char *targetColumnName;
 	grn_obj *targetColumn;
 	grn_operator operator = GRN_OP_NOP;
+	Oid valueTypeID;
 
 	/* NULL key is not supported */
 	if (key->sk_flags & SK_ISNULL)
@@ -1891,24 +2007,15 @@ PGrnSearchBuildCondition(IndexScanDesc scan,
 	if (PGrnAttributeIsJSONB(attribute->atttypid))
 		return PGrnJSONBBuildSearchCondition(data, key, targetColumn);
 
+	valueTypeID = attribute->atttypid;
+	switch (valueTypeID)
 	{
-		grn_id domain;
-		unsigned char flags = 0;
-		domain = PGrnGetType(index, key->sk_attno - 1, NULL);
-		grn_obj_reinit(ctx, &(buffers->general), domain, flags);
-	}
-	{
-		Oid valueTypeID = attribute->atttypid;
-		switch (valueTypeID)
-		{
-		case VARCHARARRAYOID:
-			valueTypeID = VARCHAROID;
-			break;
-		case TEXTARRAYOID:
-			valueTypeID = TEXTOID;
-			break;
-		}
-		PGrnConvertFromData(key->sk_argument, valueTypeID, &(buffers->general));
+	case VARCHARARRAYOID:
+		valueTypeID = VARCHAROID;
+		break;
+	case TEXTARRAYOID:
+		valueTypeID = TEXTOID;
+		break;
 	}
 
 	switch (key->sk_strategy)
@@ -1939,6 +2046,14 @@ PGrnSearchBuildCondition(IndexScanDesc scan,
 	case PGrnRegexpStrategyNumber:
 		operator = GRN_OP_REGEXP;
 		break;
+	case PGrnQueryContainStrategyNumber:
+		switch (attribute->atttypid)
+		{
+		case TEXTOID:
+			valueTypeID = TEXTARRAYOID;
+			break;
+		}
+		break;
 	default:
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -1947,6 +2062,14 @@ PGrnSearchBuildCondition(IndexScanDesc scan,
 		break;
 	}
 
+	{
+		grn_id domain;
+		unsigned char flags = 0;
+		domain = PGrnPGTypeToGrnType(valueTypeID, &flags);
+		grn_obj_reinit(ctx, &(buffers->general), domain, flags);
+		PGrnConvertFromData(key->sk_argument, valueTypeID, &(buffers->general));
+	}
+
 	switch (key->sk_strategy)
 	{
 	case PGrnLikeStrategyNumber:
@@ -1959,27 +2082,32 @@ PGrnSearchBuildCondition(IndexScanDesc scan,
 		PGrnSearchBuildConditionLikeMatch(data, targetColumn, &(buffers->general));
 		break;
 	case PGrnQueryStrategyNumber:
+		PGrnSearchBuildConditionQuery(so,
+									  data,
+									  targetColumn,
+									  GRN_TEXT_VALUE(&(buffers->general)),
+									  GRN_TEXT_LEN(&(buffers->general)));
+		break;
+	case PGrnQueryContainStrategyNumber:
 	{
-		grn_rc rc;
-		grn_obj *matchTarget, *matchTargetVariable;
-		grn_expr_flags flags =
-			GRN_EXPR_SYNTAX_QUERY | GRN_EXPR_ALLOW_LEADING_NOT;
-
-		GRN_EXPR_CREATE_FOR_QUERY(ctx, so->sourcesTable,
-								  matchTarget, matchTargetVariable);
-		GRN_PTR_PUT(ctx, &(data->matchTargets), matchTarget);
-		grn_expr_append_obj(ctx, matchTarget, targetColumn, GRN_OP_PUSH, 1);
-
-		rc = grn_expr_parse(ctx, data->expression,
-							GRN_TEXT_VALUE(&(buffers->general)), GRN_TEXT_LEN(&(buffers->general)),
-							matchTarget, GRN_OP_MATCH, GRN_OP_AND,
-							flags);
-		if (rc != GRN_SUCCESS)
+		grn_obj *queries = &(buffers->general);
+		unsigned int i, n;
+
+		n = grn_vector_size(ctx, queries);
+		for (i = 0; i < n; i++)
 		{
-			ereport(ERROR,
-					(errcode(PGrnRCToPgErrorCode(rc)),
-					 errmsg("pgroonga: failed to parse expression: %s",
-							ctx->errbuf)));
+			const char *query;
+			unsigned int querySize;
+
+			querySize = grn_vector_get_element(ctx, queries, i,
+												&query, NULL, NULL);
+			PGrnSearchBuildConditionQuery(so,
+										  data,
+										  targetColumn,
+										  query,
+										  querySize);
+			if (i > 0)
+				grn_expr_append_op(ctx, data->expression, GRN_OP_OR, 2);
 		}
 		break;
 	}

  Modified: src/pgroonga.h (+2 -0)
===================================================================
--- src/pgroonga.h    2016-01-26 22:56:58 +0900 (54b34c6)
+++ src/pgroonga.h    2016-01-27 00:05:02 +0900 (a933373)
@@ -24,6 +24,8 @@
 #define PGrnRegexpStrategyNumber		10	/* operator @~ (@~ in Groonga)  */
 #define PGrnJSONContainStrategyNumber	11	/* operator @> */
 
+#define PGrnQueryContainStrategyNumber	12	/* operator &@> */
+
 /* file and table names */
 #define PGrnLogBasename					"pgroonga.log"
 #define PGrnDatabaseBasename			"pgrn"
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index