[Groonga-commit] pgroonga/pgroonga at a5769e7 [master] Add pgrooonga.match_positions_character

Back to archive index

Kouhei Sutou null+****@clear*****
Sat Aug 27 14:59:26 JST 2016


Kouhei Sutou	2016-08-27 14:59:26 +0900 (Sat, 27 Aug 2016)

  New Revision: a5769e75945cb2a1356e248c211ddb32c2b1f7e0
  https://github.com/pgroonga/pgroonga/commit/a5769e75945cb2a1356e248c211ddb32c2b1f7e0

  Message:
    Add pgrooonga.match_positions_character

  Added files:
    expected/function/match-positions-character/different-size-keyword.out
    expected/function/match-positions-character/multibyte.out
    expected/function/match-positions-character/multiple-keywords.out
    expected/function/match-positions-character/one-keyword.out
    sql/function/match-positions-character/different-size-keyword.sql
    sql/function/match-positions-character/multibyte.sql
    sql/function/match-positions-character/multiple-keywords.sql
    sql/function/match-positions-character/one-keyword.sql
    src/pgrn_match_positions_character.c
    src/pgrn_match_positions_character.h
  Modified files:
    Makefile
    pgroonga--1.1.0--1.1.1.sql
    pgroonga.sql
    src/pgroonga.c

  Modified: Makefile (+1 -0)
===================================================================
--- Makefile    2016-08-27 12:40:25 +0900 (c349991)
+++ Makefile    2016-08-27 14:59:26 +0900 (e4c60e4)
@@ -13,6 +13,7 @@ SRCS =						\
 	src/pgrn_keywords.c			\
 	src/pgrn_jsonb.c			\
 	src/pgrn_match_positions_byte.c		\
+	src/pgrn_match_positions_character.c	\
 	src/pgrn_options.c			\
 	src/pgrn_query_extract_keywords.c	\
 	src/pgrn_snippet_html.c			\

  Added: expected/function/match-positions-character/different-size-keyword.out (+8 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/match-positions-character/different-size-keyword.out    2016-08-27 14:59:26 +0900 (6ee5927)
@@ -0,0 +1,8 @@
+SELECT pgroonga.match_positions_character(
+  '100㍉メートル',
+  ARRAY['ミリ']);
+ match_positions_character 
+---------------------------
+ {{3,1}}
+(1 row)
+

  Added: expected/function/match-positions-character/multibyte.out (+9 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/match-positions-character/multibyte.out    2016-08-27 14:59:26 +0900 (19a7f34)
@@ -0,0 +1,9 @@
+SELECT pgroonga.match_positions_character(
+  'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' ||
+  '登録された文書をすぐに検索結果に反映できます。',
+  ARRAY['検索']);
+ match_positions_character 
+---------------------------
+ {{25,2},{46,2}}
+(1 row)
+

  Added: expected/function/match-positions-character/multiple-keywords.out (+19 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/match-positions-character/multiple-keywords.out    2016-08-27 14:59:26 +0900 (215b9cd)
@@ -0,0 +1,19 @@
+SELECT pgroonga.match_positions_character(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  E'\n' ||
+  E'\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['fast', 'PostgreSQL']);
+ match_positions_character 
+---------------------------
+ {{13,4},{455,10}}
+(1 row)
+

  Added: expected/function/match-positions-character/one-keyword.out (+19 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/match-positions-character/one-keyword.out    2016-08-27 14:59:26 +0900 (374d4d9)
@@ -0,0 +1,19 @@
+SELECT pgroonga.match_positions_character(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  E'\n' ||
+  E'\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['Groonga']);
+        match_positions_character        
+-----------------------------------------
+ {{0,7},{110,7},{197,7},{319,9},{553,7}}
+(1 row)
+

  Modified: pgroonga--1.1.0--1.1.1.sql (+6 -0)
===================================================================
--- pgroonga--1.1.0--1.1.1.sql    2016-08-27 12:40:25 +0900 (e69de29)
+++ pgroonga--1.1.0--1.1.1.sql    2016-08-27 14:59:26 +0900 (88cad6d)
@@ -0,0 +1,6 @@
+CREATE FUNCTION pgroonga.match_positions_character(target text, keywords text[])
+	RETURNS integer[2][]
+	AS 'MODULE_PATHNAME', 'pgroonga_match_positions_character'
+	LANGUAGE C
+	VOLATILE
+	STRICT;

  Modified: pgroonga.sql (+7 -0)
===================================================================
--- pgroonga.sql    2016-08-27 12:40:25 +0900 (0c14453)
+++ pgroonga.sql    2016-08-27 14:59:26 +0900 (ccd9e73)
@@ -44,6 +44,13 @@ CREATE FUNCTION pgroonga.match_positions_byte(target text, keywords text[])
 	VOLATILE
 	STRICT;
 
+CREATE FUNCTION pgroonga.match_positions_character(target text, keywords text[])
+	RETURNS integer[2][]
+	AS 'MODULE_PATHNAME', 'pgroonga_match_positions_character'
+	LANGUAGE C
+	VOLATILE
+	STRICT;
+
 CREATE FUNCTION pgroonga.query_extract_keywords(query text)
 	RETURNS text[]
 	AS 'MODULE_PATHNAME', 'pgroonga_query_extract_keywords'

  Added: sql/function/match-positions-character/different-size-keyword.sql (+3 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/match-positions-character/different-size-keyword.sql    2016-08-27 14:59:26 +0900 (3eafbbb)
@@ -0,0 +1,3 @@
+SELECT pgroonga.match_positions_character(
+  '100㍉メートル',
+  ARRAY['ミリ']);

  Added: sql/function/match-positions-character/multibyte.sql (+4 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/match-positions-character/multibyte.sql    2016-08-27 14:59:26 +0900 (9cad188)
@@ -0,0 +1,4 @@
+SELECT pgroonga.match_positions_character(
+  'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' ||
+  '登録された文書をすぐに検索結果に反映できます。',
+  ARRAY['検索']);

  Added: sql/function/match-positions-character/multiple-keywords.sql (+14 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/match-positions-character/multiple-keywords.sql    2016-08-27 14:59:26 +0900 (3a69d89)
@@ -0,0 +1,14 @@
+SELECT pgroonga.match_positions_character(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  E'\n' ||
+  E'\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['fast', 'PostgreSQL']);

  Added: sql/function/match-positions-character/one-keyword.sql (+14 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/match-positions-character/one-keyword.sql    2016-08-27 14:59:26 +0900 (6475802)
@@ -0,0 +1,14 @@
+SELECT pgroonga.match_positions_character(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  E'\n' ||
+  E'\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['Groonga']);

  Added: src/pgrn_match_positions_character.c (+158 -0) 100644
===================================================================
--- /dev/null
+++ src/pgrn_match_positions_character.c    2016-08-27 14:59:26 +0900 (40d3583)
@@ -0,0 +1,158 @@
+#include "pgroonga.h"
+
+#include "pgrn_global.h"
+#include "pgrn_groonga.h"
+#include "pgrn_match_positions_character.h"
+#include "pgrn_keywords.h"
+
+#include <catalog/pg_type.h>
+#include <utils/builtins.h>
+
+static grn_ctx *ctx = &PGrnContext;
+static grn_obj *keywordsTable = NULL;
+
+PG_FUNCTION_INFO_V1(pgroonga_match_positions_character);
+
+void
+PGrnInitializeMatchPositionsCharacter(void)
+{
+	keywordsTable = grn_table_create(ctx, NULL, 0, NULL,
+									 GRN_OBJ_TABLE_PAT_KEY,
+									 grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
+									 NULL);
+	grn_obj_set_info(ctx,
+					 keywordsTable,
+					 GRN_INFO_NORMALIZER,
+					 grn_ctx_get(ctx, "NormalizerAuto", -1));
+}
+
+void
+PGrnFinalizeMatchPositionsCharacter(void)
+{
+	if (!keywordsTable)
+		return;
+
+	grn_obj_close(ctx, keywordsTable);
+	keywordsTable = NULL;
+}
+
+static ArrayType *
+PGrnMatchPositionsCharacter(text *target)
+{
+	grn_obj buffer;
+	ArrayType *positions;
+
+	GRN_UINT32_INIT(&buffer, GRN_OBJ_VECTOR);
+
+	{
+		const char *string;
+		size_t stringLength;
+		const char *stringForNCharacters;
+		size_t nCharacters = 0;
+
+		string = VARDATA_ANY(target);
+		stringLength = VARSIZE_ANY_EXHDR(target);
+
+		stringForNCharacters = string;
+
+		while (stringLength > 0) {
+#define MAX_N_HITS 16
+			grn_pat_scan_hit hits[MAX_N_HITS];
+			const char *rest;
+			int i, nHits;
+			size_t chunkLength;
+
+			nHits = grn_pat_scan(ctx, (grn_pat *)keywordsTable,
+								 string, stringLength,
+								 hits, MAX_N_HITS, &rest);
+			for (i = 0; i < nHits; i++) {
+				const char *start;
+				const char *end;
+				size_t startNCharacters = 0;
+
+				start = string + hits[i].offset;
+				end = start + hits[i].length;
+				while (stringForNCharacters < end) {
+					int characterLength;
+					characterLength = grn_charlen(ctx,
+												  stringForNCharacters,
+												  end);
+					if (characterLength == 0) {
+						GRN_OBJ_FIN(ctx, &buffer);
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+								 errmsg("invalid string: %s",
+										stringForNCharacters)));
+					}
+					if (stringForNCharacters == start) {
+						startNCharacters = nCharacters;
+					}
+					nCharacters++;
+					stringForNCharacters += characterLength;
+				}
+
+				GRN_UINT32_PUT(ctx, &buffer, startNCharacters);
+				GRN_UINT32_PUT(ctx, &buffer, nCharacters - startNCharacters);
+			}
+
+			chunkLength = rest - string;
+			stringLength -= chunkLength;
+			string = rest;
+#undef MAX_N_HITS
+		}
+	}
+
+	{
+		int i, nElements;
+		Datum *elements;
+		int dims[2];
+		int lbs[2];
+
+		nElements = GRN_BULK_VSIZE(&buffer) / (sizeof(uint32_t) * 2);
+		elements = palloc(sizeof(Datum) * 2 * nElements);
+		for (i = 0; i < nElements; i++)
+		{
+			uint32_t offset;
+			uint32_t length;
+
+			offset = GRN_UINT32_VALUE_AT(&buffer, i * 2);
+			length = GRN_UINT32_VALUE_AT(&buffer, i * 2 + 1);
+			elements[i * 2] = Int32GetDatum(offset);
+			elements[i * 2 + 1] = Int32GetDatum(length);
+		}
+		dims[0] = nElements;
+		dims[1] = 2;
+		lbs[0] = 1;
+		lbs[1] = 1;
+		positions = construct_md_array(elements,
+									   NULL,
+									   2,
+									   dims,
+									   lbs,
+									   INT4OID,
+									   sizeof(int32_t),
+									   true,
+									   'i');
+		pfree(elements);
+	}
+
+	GRN_OBJ_FIN(ctx, &buffer);
+
+	return positions;
+}
+
+/**
+ * pgroonga.match_positions_character(target text, keywords text[]) : integer[2][]
+ */
+Datum
+pgroonga_match_positions_character(PG_FUNCTION_ARGS)
+{
+	text *target = PG_GETARG_TEXT_PP(0);
+	ArrayType *keywords = PG_GETARG_ARRAYTYPE_P(1);
+	ArrayType *positions;
+
+	PGrnKeywordsUpdateTable(keywords, keywordsTable);
+	positions = PGrnMatchPositionsCharacter(target);
+
+	PG_RETURN_POINTER(positions);
+}

  Added: src/pgrn_match_positions_character.h (+4 -0) 100644
===================================================================
--- /dev/null
+++ src/pgrn_match_positions_character.h    2016-08-27 14:59:26 +0900 (814fcd2)
@@ -0,0 +1,4 @@
+#pragma once
+
+void PGrnInitializeMatchPositionsCharacter(void);
+void PGrnFinalizeMatchPositionsCharacter(void);

  Modified: src/pgroonga.c (+3 -0)
===================================================================
--- src/pgroonga.c    2016-08-27 12:40:25 +0900 (77fa517)
+++ src/pgroonga.c    2016-08-27 14:59:26 +0900 (dc7a832)
@@ -10,6 +10,7 @@
 #include "pgrn_keywords.h"
 #include "pgrn_jsonb.h"
 #include "pgrn_match_positions_byte.h"
+#include "pgrn_match_positions_character.h"
 #include "pgrn_options.h"
 #include "pgrn_query_extract_keywords.h"
 #include "pgrn_search.h"
@@ -283,6 +284,7 @@ PGrnOnProcExit(int code, Datum arg)
 		PGrnFinalizeQueryExtractKeywords();
 
 		PGrnFinalizeMatchPositionsByte();
+		PGrnFinalizeMatchPositionsCharacter();
 
 		PGrnFinalizeHighlightHTML();
 
@@ -400,6 +402,7 @@ _PG_init(void)
 	PGrnInitializeHighlightHTML();
 
 	PGrnInitializeMatchPositionsByte();
+	PGrnInitializeMatchPositionsCharacter();
 
 	PGrnInitializeQueryExtractKeywords();
 }
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index