[Groonga-commit] pgroonga/pgroonga at 77d7208 [master] Support `column LIKE '%keyword%'`

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Feb 11 14:01:53 JST 2015


Kouhei Sutou	2015-02-11 14:01:53 +0900 (Wed, 11 Feb 2015)

  New Revision: 77d7208a743f792643ad1537019716a53103305e
  https://github.com/pgroonga/pgroonga/commit/77d7208a743f792643ad1537019716a53103305e

  Message:
    Support `column LIKE '%keyword%'`
    
    It's not the original LIKE compatible feature. It's just a short cut of
    `column @@ 'keyword'`. So sequential scan result and index scan (or
    bitmap scan) results may be different.
    
    If you want to the same result for sequential scan and index scan (or
    bitmap scan), use `TokenBigramSplitSymbolAlphaDigit` tokenizer and not
    use normalizer.

  Added files:
    expected/full-text-search/text/single/like/bitmapscan.out
    expected/full-text-search/text/single/like/indexscan.out
    expected/full-text-search/text/single/like/seqscan.out
    sql/full-text-search/text/single/like/bitmapscan.sql
    sql/full-text-search/text/single/like/indexscan.sql
    sql/full-text-search/text/single/like/seqscan.sql
  Modified files:
    Makefile
    pgroonga.c
    pgroonga.h
    pgroonga.sql

  Modified: Makefile (+3 -0)
===================================================================
--- Makefile    2015-02-09 23:28:33 +0900 (994da55)
+++ Makefile    2015-02-11 14:01:53 +0900 (e984cf4)
@@ -28,6 +28,7 @@ pgroonga--$(EXTENSION_VERSION).sql: pgroonga.sql
 installcheck: results/full-text-search/text/single/contain
 installcheck: results/full-text-search/text/single/match
 installcheck: results/full-text-search/text/single/and
+installcheck: results/full-text-search/text/single/like
 installcheck: results/full-text-search/text/multiple/contain
 installcheck: results/full-text-search/text/options/tokenizer
 installcheck: results/full-text-search/text/options/normalizer
@@ -43,6 +44,8 @@ results/full-text-search/text/single/match:
 	@mkdir -p $@
 results/full-text-search/text/single/and:
 	@mkdir -p $@
+results/full-text-search/text/single/like:
+	@mkdir -p $@
 results/full-text-search/text/multiple/contain:
 	@mkdir -p $@
 results/full-text-search/text/options/tokenizer:

  Added: expected/full-text-search/text/single/like/bitmapscan.out (+23 -0) 100644
===================================================================
--- /dev/null
+++ expected/full-text-search/text/single/like/bitmapscan.out    2015-02-11 14:01:53 +0900 (6296d36)
@@ -0,0 +1,23 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+INSERT INTO memos VALUES (4, 'groonga command is provided.');
+CREATE INDEX grnindex ON memos USING pgroonga (content);
+SET enable_seqscan = off;
+SET enable_indexscan = off;
+SET enable_bitmapscan = on;
+SELECT id, content
+  FROM memos
+ WHERE content LIKE '%groonga%';
+ id |                        content                        
+----+-------------------------------------------------------
+  2 | Groonga is fast full text search engine.
+  3 | PGroonga is a PostgreSQL extension that uses Groonga.
+  4 | groonga command is provided.
+(3 rows)
+
+DROP TABLE memos;

  Added: expected/full-text-search/text/single/like/indexscan.out (+23 -0) 100644
===================================================================
--- /dev/null
+++ expected/full-text-search/text/single/like/indexscan.out    2015-02-11 14:01:53 +0900 (2788b46)
@@ -0,0 +1,23 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+INSERT INTO memos VALUES (4, 'groonga command is provided.');
+CREATE INDEX grnindex ON memos USING pgroonga (content);
+SET enable_seqscan = off;
+SET enable_indexscan = on;
+SET enable_bitmapscan = off;
+SELECT id, content
+  FROM memos
+ WHERE content LIKE '%groonga%';
+ id |                        content                        
+----+-------------------------------------------------------
+  2 | Groonga is fast full text search engine.
+  3 | PGroonga is a PostgreSQL extension that uses Groonga.
+  4 | groonga command is provided.
+(3 rows)
+
+DROP TABLE memos;

  Added: expected/full-text-search/text/single/like/seqscan.out (+21 -0) 100644
===================================================================
--- /dev/null
+++ expected/full-text-search/text/single/like/seqscan.out    2015-02-11 14:01:53 +0900 (df93bba)
@@ -0,0 +1,21 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+INSERT INTO memos VALUES (4, 'groonga command is provided.');
+CREATE INDEX grnindex ON memos USING pgroonga (content);
+SET enable_seqscan = on;
+SET enable_indexscan = off;
+SET enable_bitmapscan = off;
+SELECT id, content
+  FROM memos
+ WHERE content LIKE '%groonga%';
+ id |           content            
+----+------------------------------
+  4 | groonga command is provided.
+(1 row)
+
+DROP TABLE memos;

  Modified: pgroonga.c (+55 -8)
===================================================================
--- pgroonga.c    2015-02-09 23:28:33 +0900 (ece6cd8)
+++ pgroonga.c    2015-02-11 14:01:53 +0900 (98b6675)
@@ -66,6 +66,7 @@ typedef struct PGrnSearchData
 	grn_obj sectionID;
 	grn_obj *expression;
 	grn_obj *expressionVariable;
+	bool    isEmptyCondition;
 } PGrnSearchData;
 
 
@@ -812,6 +813,38 @@ pgroonga_beginscan(PG_FUNCTION_ARGS)
 }
 
 static void
+PGrnSearchBuildConditionLike(PGrnSearchData *data,
+							 grn_obj *matchTarget,
+							 grn_obj *query)
+{
+	grn_obj *expression;
+	const char *queryRaw;
+	size_t querySize;
+
+	expression = data->expression;
+	queryRaw = GRN_TEXT_VALUE(query);
+	querySize = GRN_TEXT_LEN(query);
+
+	if (querySize == 0)
+	{
+		data->isEmptyCondition = true;
+		return;
+	}
+
+	if (!(queryRaw[0] == '%' && queryRaw[querySize - 1] == '%'))
+	{
+		data->isEmptyCondition = true;
+		return;
+	}
+
+	grn_expr_append_obj(ctx, expression, matchTarget, GRN_OP_PUSH, 1);
+	grn_expr_append_const_str(ctx, expression,
+							  queryRaw + 1, querySize - 2,
+							  GRN_OP_PUSH, 1);
+	grn_expr_append_op(ctx, expression, GRN_OP_MATCH, 2);
+}
+
+static void
 PGrnSearchBuildConditions(IndexScanDesc scan,
 						  PGrnScanOpaque so,
 						  PGrnSearchData *data)
@@ -863,6 +896,8 @@ PGrnSearchBuildConditions(IndexScanDesc scan,
 		case PGrnGreaterStrategyNumber:
 			operator = GRN_OP_GREATER;
 			break;
+		case PGrnLikeStrategyNumber:
+			break;
 		case PGrnContainStrategyNumber:
 			operator = GRN_OP_MATCH;
 			break;
@@ -880,7 +915,14 @@ PGrnSearchBuildConditions(IndexScanDesc scan,
 		if (!isValidStrategy)
 			continue;
 
-		if (key->sk_strategy == PGrnQueryStrategyNumber)
+		switch (key->sk_strategy)
+		{
+		case PGrnLikeStrategyNumber:
+			PGrnSearchBuildConditionLike(data, matchTarget, &buffer);
+			if (data->isEmptyCondition)
+				return;
+			break;
+		case PGrnQueryStrategyNumber:
 		{
 			grn_rc rc;
 			grn_expr_flags flags =
@@ -896,14 +938,15 @@ PGrnSearchBuildConditions(IndexScanDesc scan,
 						 errmsg("pgroonga: failed to parse expression: %s",
 								ctx->errbuf)));
 			}
+			break;
 		}
-		else
-		{
+		default:
 			grn_expr_append_obj(ctx, data->expression,
 								matchTarget, GRN_OP_PUSH, 1);
 			grn_expr_append_const(ctx, data->expression,
 								  &buffer, GRN_OP_PUSH, 1);
 			grn_expr_append_op(ctx, data->expression, operator, 2);
+			break;
 		}
 
 		if (nExpressions > 0)
@@ -945,6 +988,7 @@ PGrnSearch(IndexScanDesc scan)
 
 	GRN_EXPR_CREATE_FOR_QUERY(ctx, so->idsTable,
 							  data.expression, data.expressionVariable);
+	data.isEmptyCondition = false;
 
 	PG_TRY();
 	{
@@ -961,11 +1005,14 @@ PGrnSearch(IndexScanDesc scan)
 	so->searched = grn_table_create(ctx, NULL, 0, NULL,
 									GRN_OBJ_TABLE_HASH_KEY | GRN_OBJ_WITH_SUBREC,
 									so->idsTable, 0);
-    grn_table_select(ctx,
-					 so->idsTable,
-					 data.expression,
-					 so->searched,
-					 GRN_OP_OR);
+	if (!data.isEmptyCondition)
+	{
+		grn_table_select(ctx,
+						 so->idsTable,
+						 data.expression,
+						 so->searched,
+						 GRN_OP_OR);
+	}
 	PGrnSearchDataFree(&data);
 }
 

  Modified: pgroonga.h (+1 -0)
===================================================================
--- pgroonga.h    2015-02-09 23:28:33 +0900 (3a3fe0c)
+++ pgroonga.h    2015-02-11 14:01:53 +0900 (eb946b7)
@@ -27,6 +27,7 @@
 #define PGrnEqualStrategyNumber			3	/* operator = */
 #define PGrnGreaterEqualStrategyNumber	4	/* operator >= */
 #define PGrnGreaterStrategyNumber		5	/* operator > */
+#define PGrnLikeStrategyNumber			6	/* operator ~~ (LIKE) */
 #define PGrnContainStrategyNumber		7	/* operator %% (@ in Groonga) */
 #define PGrnQueryStrategyNumber			8	/* operator @@ (Groonga query) */
 

  Modified: pgroonga.sql (+1 -0)
===================================================================
--- pgroonga.sql    2015-02-09 23:28:33 +0900 (e270488)
+++ pgroonga.sql    2015-02-11 14:01:53 +0900 (435e5f2)
@@ -186,6 +186,7 @@ INSERT INTO pg_catalog.pg_am VALUES(
 
 CREATE OPERATOR CLASS pgroonga.full_text_search_text_ops DEFAULT FOR TYPE text
 	USING pgroonga AS
+		OPERATOR 6 pg_catalog.~~,
 		OPERATOR 7 %%,
 		OPERATOR 8 @@,
 		FUNCTION 1 pgroonga.typeof(oid, integer),

  Added: sql/full-text-search/text/single/like/bitmapscan.sql (+21 -0) 100644
===================================================================
--- /dev/null
+++ sql/full-text-search/text/single/like/bitmapscan.sql    2015-02-11 14:01:53 +0900 (b102ea2)
@@ -0,0 +1,21 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+INSERT INTO memos VALUES (4, 'groonga command is provided.');
+
+CREATE INDEX grnindex ON memos USING pgroonga (content);
+
+SET enable_seqscan = off;
+SET enable_indexscan = off;
+SET enable_bitmapscan = on;
+
+SELECT id, content
+  FROM memos
+ WHERE content LIKE '%groonga%';
+
+DROP TABLE memos;

  Added: sql/full-text-search/text/single/like/indexscan.sql (+21 -0) 100644
===================================================================
--- /dev/null
+++ sql/full-text-search/text/single/like/indexscan.sql    2015-02-11 14:01:53 +0900 (4fdd972)
@@ -0,0 +1,21 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+INSERT INTO memos VALUES (4, 'groonga command is provided.');
+
+CREATE INDEX grnindex ON memos USING pgroonga (content);
+
+SET enable_seqscan = off;
+SET enable_indexscan = on;
+SET enable_bitmapscan = off;
+
+SELECT id, content
+  FROM memos
+ WHERE content LIKE '%groonga%';
+
+DROP TABLE memos;

  Added: sql/full-text-search/text/single/like/seqscan.sql (+21 -0) 100644
===================================================================
--- /dev/null
+++ sql/full-text-search/text/single/like/seqscan.sql    2015-02-11 14:01:53 +0900 (10a5d37)
@@ -0,0 +1,21 @@
+CREATE TABLE memos (
+  id integer,
+  content text
+);
+
+INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
+INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
+INSERT INTO memos VALUES (4, 'groonga command is provided.');
+
+CREATE INDEX grnindex ON memos USING pgroonga (content);
+
+SET enable_seqscan = on;
+SET enable_indexscan = off;
+SET enable_bitmapscan = off;
+
+SELECT id, content
+  FROM memos
+ WHERE content LIKE '%groonga%';
+
+DROP TABLE memos;
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index