[Groonga-commit] pgroonga/pgroonga at 181c399 [master] Add pgroonga.match_positions_byte

Back to archive index

Kouhei Sutou null+****@clear*****
Sun Apr 24 00:02:23 JST 2016


Kouhei Sutou	2016-04-24 00:02:23 +0900 (Sun, 24 Apr 2016)

  New Revision: 181c3995f964e85bb2b92674e06043e29adbe18b
  https://github.com/pgroonga/pgroonga/commit/181c3995f964e85bb2b92674e06043e29adbe18b

  Message:
    Add pgroonga.match_positions_byte

  Added files:
    expected/function/match-positions-byte/different-size-keyword.out
    expected/function/match-positions-byte/multibyte.out
    expected/function/match-positions-byte/multiple-keywords.out
    expected/function/match-positions-byte/one-keyword.out
    sql/function/match-positions-byte/different-size-keyword.sql
    sql/function/match-positions-byte/multibyte.sql
    sql/function/match-positions-byte/multiple-keywords.sql
    sql/function/match-positions-byte/one-keyword.sql
    src/pgrn_match_positions_byte.c
    src/pgrn_match_positions_byte.h
  Modified files:
    CMakeLists.txt
    Makefile
    pgroonga--1.0.6--1.0.7.sql
    pgroonga.sql
    src/pgroonga.c
    src/pgroonga.h

  Modified: CMakeLists.txt (+1 -0)
===================================================================
--- CMakeLists.txt    2016-04-23 23:11:05 +0900 (596d756)
+++ CMakeLists.txt    2016-04-24 00:02:23 +0900 (c0cd25a)
@@ -65,6 +65,7 @@ set(PGRN_SOURCES
   "src/pgrn_highlight_html.c"
   "src/pgrn_keywords.c"
   "src/pgrn_jsonb.c"
+  "src/pgrn_match_positions_byte.c"
   "src/pgrn_options.c"
   "src/pgrn_snippet_html.c"
   "src/pgrn_value.c"

  Modified: Makefile (+1 -0)
===================================================================
--- Makefile    2016-04-23 23:11:05 +0900 (25f8a3d)
+++ Makefile    2016-04-24 00:02:23 +0900 (00acf9e)
@@ -12,6 +12,7 @@ SRCS =						\
 	src/pgrn_highlight_html.c		\
 	src/pgrn_keywords.c			\
 	src/pgrn_jsonb.c			\
+	src/pgrn_match_positions_byte.c		\
 	src/pgrn_options.c			\
 	src/pgrn_snippet_html.c			\
 	src/pgrn_value.c			\

  Added: expected/function/match-positions-byte/different-size-keyword.out (+8 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/match-positions-byte/different-size-keyword.out    2016-04-24 00:02:23 +0900 (44c4b28)
@@ -0,0 +1,8 @@
+SELECT pgroonga.match_positions_byte(
+  '100㍉メートル',
+  ARRAY['ミリ']);
+ match_positions_byte 
+----------------------
+ {{3,3}}
+(1 row)
+

  Added: expected/function/match-positions-byte/multibyte.out (+9 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/match-positions-byte/multibyte.out    2016-04-24 00:02:23 +0900 (7b2d3e3)
@@ -0,0 +1,9 @@
+SELECT pgroonga.match_positions_byte(
+  'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' ||
+  '登録された文書をすぐに検索結果に反映できます。',
+  ARRAY['検索']);
+ match_positions_byte 
+----------------------
+ {{61,6},{124,6}}
+(1 row)
+

  Added: expected/function/match-positions-byte/multiple-keywords.out (+19 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/match-positions-byte/multiple-keywords.out    2016-04-24 00:02:23 +0900 (e8cba42)
@@ -0,0 +1,19 @@
+SELECT pgroonga.match_positions_byte(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  E'\n' ||
+  E'\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['fast', 'PostgreSQL']);
+ match_positions_byte 
+----------------------
+ {{13,4},{455,10}}
+(1 row)
+

  Added: expected/function/match-positions-byte/one-keyword.out (+19 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/match-positions-byte/one-keyword.out    2016-04-24 00:02:23 +0900 (ee381ff)
@@ -0,0 +1,19 @@
+SELECT pgroonga.match_positions_byte(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  E'\n' ||
+  E'\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['Groonga']);
+          match_positions_byte           
+-----------------------------------------
+ {{0,7},{110,7},{197,7},{319,9},{553,7}}
+(1 row)
+

  Modified: pgroonga--1.0.6--1.0.7.sql (+7 -0)
===================================================================
--- pgroonga--1.0.6--1.0.7.sql    2016-04-23 23:11:05 +0900 (87416d7)
+++ pgroonga--1.0.6--1.0.7.sql    2016-04-24 00:02:23 +0900 (2bba1ef)
@@ -4,3 +4,10 @@ CREATE FUNCTION pgroonga.highlight_html(target text, keywords text[])
 	LANGUAGE C
 	VOLATILE
 	STRICT;
+
+CREATE FUNCTION pgroonga.match_positions_byte(target text, keywords text[])
+	RETURNS integer[2][]
+	AS 'MODULE_PATHNAME', 'pgroonga_match_positions_byte'
+	LANGUAGE C
+	VOLATILE
+	STRICT;

  Modified: pgroonga.sql (+7 -0)
===================================================================
--- pgroonga.sql    2016-04-23 23:11:05 +0900 (9c30f6d)
+++ pgroonga.sql    2016-04-24 00:02:23 +0900 (5018fd3)
@@ -37,6 +37,13 @@ CREATE FUNCTION pgroonga.highlight_html(target text, keywords text[])
 	VOLATILE
 	STRICT;
 
+CREATE FUNCTION pgroonga.match_positions_byte(target text, keywords text[])
+	RETURNS integer[2][]
+	AS 'MODULE_PATHNAME', 'pgroonga_match_positions_byte'
+	LANGUAGE C
+	VOLATILE
+	STRICT;
+
 CREATE FUNCTION pgroonga.match_term(target text, term text)
 	RETURNS bool
 	AS 'MODULE_PATHNAME', 'pgroonga_match_term_text'

  Added: sql/function/match-positions-byte/different-size-keyword.sql (+3 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/match-positions-byte/different-size-keyword.sql    2016-04-24 00:02:23 +0900 (947c546)
@@ -0,0 +1,3 @@
+SELECT pgroonga.match_positions_byte(
+  '100㍉メートル',
+  ARRAY['ミリ']);

  Added: sql/function/match-positions-byte/multibyte.sql (+4 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/match-positions-byte/multibyte.sql    2016-04-24 00:02:23 +0900 (2ab075b)
@@ -0,0 +1,4 @@
+SELECT pgroonga.match_positions_byte(
+  'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' ||
+  '登録された文書をすぐに検索結果に反映できます。',
+  ARRAY['検索']);

  Added: sql/function/match-positions-byte/multiple-keywords.sql (+14 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/match-positions-byte/multiple-keywords.sql    2016-04-24 00:02:23 +0900 (c9f0bfb)
@@ -0,0 +1,14 @@
+SELECT pgroonga.match_positions_byte(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  E'\n' ||
+  E'\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['fast', 'PostgreSQL']);

  Added: sql/function/match-positions-byte/one-keyword.sql (+14 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/match-positions-byte/one-keyword.sql    2016-04-24 00:02:23 +0900 (05cc123)
@@ -0,0 +1,14 @@
+SELECT pgroonga.match_positions_byte(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  E'\n' ||
+  E'\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['Groonga']);

  Added: src/pgrn_match_positions_byte.c (+128 -0) 100644
===================================================================
--- /dev/null
+++ src/pgrn_match_positions_byte.c    2016-04-24 00:02:23 +0900 (0808493)
@@ -0,0 +1,128 @@
+#include "pgroonga.h"
+
+#include "pgrn_global.h"
+#include "pgrn_groonga.h"
+#include "pgrn_match_positions_byte.h"
+#include "pgrn_keywords.h"
+
+#include <catalog/pg_type.h>
+#include <utils/builtins.h>
+
+static grn_ctx *ctx = &PGrnContext;
+static grn_obj *keywordsTable = NULL;
+
+PG_FUNCTION_INFO_V1(pgroonga_match_positions_byte);
+
+void
+PGrnInitializeMatchPositionsByte(void)
+{
+	keywordsTable = grn_table_create(ctx, NULL, 0, NULL,
+									 GRN_OBJ_TABLE_PAT_KEY,
+									 grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
+									 NULL);
+	grn_obj_set_info(ctx,
+					 keywordsTable,
+					 GRN_INFO_NORMALIZER,
+					 grn_ctx_get(ctx, "NormalizerAuto", -1));
+}
+
+void
+PGrnFinalizeMatchPositionsByte(void)
+{
+	if (!keywordsTable)
+		return;
+
+	grn_obj_close(ctx, keywordsTable);
+	keywordsTable = NULL;
+}
+
+static ArrayType *
+PGrnMatchPositionsByte(text *target)
+{
+	grn_obj buffer;
+	ArrayType *positions;
+
+	GRN_UINT32_INIT(&buffer, GRN_OBJ_VECTOR);
+
+	{
+		const char *string;
+		size_t stringLength;
+
+		string = VARDATA_ANY(target);
+		stringLength = VARSIZE_ANY_EXHDR(target);
+
+		while (stringLength > 0) {
+#define MAX_N_HITS 16
+			grn_pat_scan_hit hits[MAX_N_HITS];
+			const char *rest;
+			int i, nHits;
+			size_t chunkLength;
+
+			nHits = grn_pat_scan(ctx, (grn_pat *)keywordsTable,
+								 string, stringLength,
+								 hits, MAX_N_HITS, &rest);
+			for (i = 0; i < nHits; i++) {
+				GRN_UINT32_PUT(ctx, &buffer, hits[i].offset);
+				GRN_UINT32_PUT(ctx, &buffer, hits[i].length);
+			}
+
+			chunkLength = rest - string;
+			stringLength -= chunkLength;
+			string = rest;
+#undef MAX_N_HITS
+		}
+	}
+
+	{
+		int i, nElements;
+		Datum *elements;
+		int dims[2];
+		int lbs[2];
+
+		nElements = GRN_BULK_VSIZE(&buffer) / (sizeof(uint32_t) * 2);
+		elements = palloc(sizeof(Datum) * 2 * nElements);
+		for (i = 0; i < nElements; i++)
+		{
+			uint32_t offset;
+			uint32_t length;
+
+			offset = GRN_UINT32_VALUE_AT(&buffer, i * 2);
+			length = GRN_UINT32_VALUE_AT(&buffer, i * 2 + 1);
+			elements[i * 2] = Int32GetDatum(offset);
+			elements[i * 2 + 1] = Int32GetDatum(length);
+		}
+		dims[0] = nElements;
+		dims[1] = 2;
+		lbs[0] = 1;
+		lbs[1] = 1;
+		positions = construct_md_array(elements,
+									   NULL,
+									   2,
+									   dims,
+									   lbs,
+									   INT4OID,
+									   sizeof(int32_t),
+									   true,
+									   'i');
+	}
+
+	GRN_OBJ_FIN(ctx, &buffer);
+
+	return positions;
+}
+
+/**
+ * pgroonga.match_positions_byte(target text, keywords text[]) : integer[2][]
+ */
+Datum
+pgroonga_match_positions_byte(PG_FUNCTION_ARGS)
+{
+	text *target = PG_GETARG_TEXT_PP(0);
+	ArrayType *keywords = PG_GETARG_ARRAYTYPE_P(1);
+	ArrayType *positions;
+
+	PGrnKeywordsUpdateTable(keywords, keywordsTable);
+	positions = PGrnMatchPositionsByte(target);
+
+	PG_RETURN_POINTER(positions);
+}

  Added: src/pgrn_match_positions_byte.h (+4 -0) 100644
===================================================================
--- /dev/null
+++ src/pgrn_match_positions_byte.h    2016-04-24 00:02:23 +0900 (0ff85ae)
@@ -0,0 +1,4 @@
+#pragma once
+
+void PGrnInitializeMatchPositionsByte(void);
+void PGrnFinalizeMatchPositionsByte(void);

  Modified: src/pgroonga.c (+5 -0)
===================================================================
--- src/pgroonga.c    2016-04-23 23:11:05 +0900 (d51d863)
+++ src/pgroonga.c    2016-04-24 00:02:23 +0900 (5d1219f)
@@ -9,6 +9,7 @@
 #include "pgrn_highlight_html.h"
 #include "pgrn_keywords.h"
 #include "pgrn_jsonb.h"
+#include "pgrn_match_positions_byte.h"
 #include "pgrn_options.h"
 #include "pgrn_search.h"
 #include "pgrn_value.h"
@@ -262,6 +263,8 @@ PGrnOnProcExit(int code, Datum arg)
 	{
 		grn_obj *db;
 
+		PGrnFinalizeMatchPositionsByte();
+
 		PGrnFinalizeHighlightHTML();
 
 		PGrnFinalizeKeywords();
@@ -373,6 +376,8 @@ _PG_init(void)
 	PGrnInitializeKeywords();
 
 	PGrnInitializeHighlightHTML();
+
+	PGrnInitializeMatchPositionsByte();
 }
 
 static grn_id

  Modified: src/pgroonga.h (+1 -0)
===================================================================
--- src/pgroonga.h    2016-04-23 23:11:05 +0900 (f916b86)
+++ src/pgroonga.h    2016-04-24 00:02:23 +0900 (d7051f3)
@@ -57,6 +57,7 @@ extern Datum PGDLLEXPORT pgroonga_table_name(PG_FUNCTION_ARGS);
 extern Datum PGDLLEXPORT pgroonga_command(PG_FUNCTION_ARGS);
 extern Datum PGDLLEXPORT pgroonga_snippet_html(PG_FUNCTION_ARGS);
 extern Datum PGDLLEXPORT pgroonga_highlight_html(PG_FUNCTION_ARGS);
+extern Datum PGDLLEXPORT pgroonga_match_positions_byte(PG_FUNCTION_ARGS);
 
 extern Datum PGDLLEXPORT pgroonga_match_term_text(PG_FUNCTION_ARGS);
 extern Datum PGDLLEXPORT pgroonga_match_term_text_array(PG_FUNCTION_ARGS);
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index