Kouhei Sutou
null+****@clear*****
Sat Aug 27 14:59:26 JST 2016
Kouhei Sutou 2016-08-27 14:59:26 +0900 (Sat, 27 Aug 2016) New Revision: a5769e75945cb2a1356e248c211ddb32c2b1f7e0 https://github.com/pgroonga/pgroonga/commit/a5769e75945cb2a1356e248c211ddb32c2b1f7e0 Message: Add pgrooonga.match_positions_character Added files: expected/function/match-positions-character/different-size-keyword.out expected/function/match-positions-character/multibyte.out expected/function/match-positions-character/multiple-keywords.out expected/function/match-positions-character/one-keyword.out sql/function/match-positions-character/different-size-keyword.sql sql/function/match-positions-character/multibyte.sql sql/function/match-positions-character/multiple-keywords.sql sql/function/match-positions-character/one-keyword.sql src/pgrn_match_positions_character.c src/pgrn_match_positions_character.h Modified files: Makefile pgroonga--1.1.0--1.1.1.sql pgroonga.sql src/pgroonga.c Modified: Makefile (+1 -0) =================================================================== --- Makefile 2016-08-27 12:40:25 +0900 (c349991) +++ Makefile 2016-08-27 14:59:26 +0900 (e4c60e4) @@ -13,6 +13,7 @@ SRCS = \ src/pgrn_keywords.c \ src/pgrn_jsonb.c \ src/pgrn_match_positions_byte.c \ + src/pgrn_match_positions_character.c \ src/pgrn_options.c \ src/pgrn_query_extract_keywords.c \ src/pgrn_snippet_html.c \ Added: expected/function/match-positions-character/different-size-keyword.out (+8 -0) 100644 =================================================================== --- /dev/null +++ expected/function/match-positions-character/different-size-keyword.out 2016-08-27 14:59:26 +0900 (6ee5927) @@ -0,0 +1,8 @@ +SELECT pgroonga.match_positions_character( + '100㍉メートル', + ARRAY['ミリ']); + match_positions_character +--------------------------- + {{3,1}} +(1 row) + Added: expected/function/match-positions-character/multibyte.out (+9 -0) 100644 =================================================================== --- /dev/null +++ expected/function/match-positions-character/multibyte.out 2016-08-27 14:59:26 +0900 (19a7f34) @@ -0,0 +1,9 @@ +SELECT pgroonga.match_positions_character( + 'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' || + '登録された文書をすぐに検索結果に反映できます。', + ARRAY['検索']); + match_positions_character +--------------------------- + {{25,2},{46,2}} +(1 row) + Added: expected/function/match-positions-character/multiple-keywords.out (+19 -0) 100644 =================================================================== --- /dev/null +++ expected/function/match-positions-character/multiple-keywords.out 2016-08-27 14:59:26 +0900 (215b9cd) @@ -0,0 +1,19 @@ +SELECT pgroonga.match_positions_character( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + E'\n' || + E'\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['fast', 'PostgreSQL']); + match_positions_character +--------------------------- + {{13,4},{455,10}} +(1 row) + Added: expected/function/match-positions-character/one-keyword.out (+19 -0) 100644 =================================================================== --- /dev/null +++ expected/function/match-positions-character/one-keyword.out 2016-08-27 14:59:26 +0900 (374d4d9) @@ -0,0 +1,19 @@ +SELECT pgroonga.match_positions_character( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + E'\n' || + E'\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['Groonga']); + match_positions_character +----------------------------------------- + {{0,7},{110,7},{197,7},{319,9},{553,7}} +(1 row) + Modified: pgroonga--1.1.0--1.1.1.sql (+6 -0) =================================================================== --- pgroonga--1.1.0--1.1.1.sql 2016-08-27 12:40:25 +0900 (e69de29) +++ pgroonga--1.1.0--1.1.1.sql 2016-08-27 14:59:26 +0900 (88cad6d) @@ -0,0 +1,6 @@ +CREATE FUNCTION pgroonga.match_positions_character(target text, keywords text[]) + RETURNS integer[2][] + AS 'MODULE_PATHNAME', 'pgroonga_match_positions_character' + LANGUAGE C + VOLATILE + STRICT; Modified: pgroonga.sql (+7 -0) =================================================================== --- pgroonga.sql 2016-08-27 12:40:25 +0900 (0c14453) +++ pgroonga.sql 2016-08-27 14:59:26 +0900 (ccd9e73) @@ -44,6 +44,13 @@ CREATE FUNCTION pgroonga.match_positions_byte(target text, keywords text[]) VOLATILE STRICT; +CREATE FUNCTION pgroonga.match_positions_character(target text, keywords text[]) + RETURNS integer[2][] + AS 'MODULE_PATHNAME', 'pgroonga_match_positions_character' + LANGUAGE C + VOLATILE + STRICT; + CREATE FUNCTION pgroonga.query_extract_keywords(query text) RETURNS text[] AS 'MODULE_PATHNAME', 'pgroonga_query_extract_keywords' Added: sql/function/match-positions-character/different-size-keyword.sql (+3 -0) 100644 =================================================================== --- /dev/null +++ sql/function/match-positions-character/different-size-keyword.sql 2016-08-27 14:59:26 +0900 (3eafbbb) @@ -0,0 +1,3 @@ +SELECT pgroonga.match_positions_character( + '100㍉メートル', + ARRAY['ミリ']); Added: sql/function/match-positions-character/multibyte.sql (+4 -0) 100644 =================================================================== --- /dev/null +++ sql/function/match-positions-character/multibyte.sql 2016-08-27 14:59:26 +0900 (9cad188) @@ -0,0 +1,4 @@ +SELECT pgroonga.match_positions_character( + 'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' || + '登録された文書をすぐに検索結果に反映できます。', + ARRAY['検索']); Added: sql/function/match-positions-character/multiple-keywords.sql (+14 -0) 100644 =================================================================== --- /dev/null +++ sql/function/match-positions-character/multiple-keywords.sql 2016-08-27 14:59:26 +0900 (3a69d89) @@ -0,0 +1,14 @@ +SELECT pgroonga.match_positions_character( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + E'\n' || + E'\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['fast', 'PostgreSQL']); Added: sql/function/match-positions-character/one-keyword.sql (+14 -0) 100644 =================================================================== --- /dev/null +++ sql/function/match-positions-character/one-keyword.sql 2016-08-27 14:59:26 +0900 (6475802) @@ -0,0 +1,14 @@ +SELECT pgroonga.match_positions_character( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + E'\n' || + E'\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['Groonga']); Added: src/pgrn_match_positions_character.c (+158 -0) 100644 =================================================================== --- /dev/null +++ src/pgrn_match_positions_character.c 2016-08-27 14:59:26 +0900 (40d3583) @@ -0,0 +1,158 @@ +#include "pgroonga.h" + +#include "pgrn_global.h" +#include "pgrn_groonga.h" +#include "pgrn_match_positions_character.h" +#include "pgrn_keywords.h" + +#include <catalog/pg_type.h> +#include <utils/builtins.h> + +static grn_ctx *ctx = &PGrnContext; +static grn_obj *keywordsTable = NULL; + +PG_FUNCTION_INFO_V1(pgroonga_match_positions_character); + +void +PGrnInitializeMatchPositionsCharacter(void) +{ + keywordsTable = grn_table_create(ctx, NULL, 0, NULL, + GRN_OBJ_TABLE_PAT_KEY, + grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), + NULL); + grn_obj_set_info(ctx, + keywordsTable, + GRN_INFO_NORMALIZER, + grn_ctx_get(ctx, "NormalizerAuto", -1)); +} + +void +PGrnFinalizeMatchPositionsCharacter(void) +{ + if (!keywordsTable) + return; + + grn_obj_close(ctx, keywordsTable); + keywordsTable = NULL; +} + +static ArrayType * +PGrnMatchPositionsCharacter(text *target) +{ + grn_obj buffer; + ArrayType *positions; + + GRN_UINT32_INIT(&buffer, GRN_OBJ_VECTOR); + + { + const char *string; + size_t stringLength; + const char *stringForNCharacters; + size_t nCharacters = 0; + + string = VARDATA_ANY(target); + stringLength = VARSIZE_ANY_EXHDR(target); + + stringForNCharacters = string; + + while (stringLength > 0) { +#define MAX_N_HITS 16 + grn_pat_scan_hit hits[MAX_N_HITS]; + const char *rest; + int i, nHits; + size_t chunkLength; + + nHits = grn_pat_scan(ctx, (grn_pat *)keywordsTable, + string, stringLength, + hits, MAX_N_HITS, &rest); + for (i = 0; i < nHits; i++) { + const char *start; + const char *end; + size_t startNCharacters = 0; + + start = string + hits[i].offset; + end = start + hits[i].length; + while (stringForNCharacters < end) { + int characterLength; + characterLength = grn_charlen(ctx, + stringForNCharacters, + end); + if (characterLength == 0) { + GRN_OBJ_FIN(ctx, &buffer); + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid string: %s", + stringForNCharacters))); + } + if (stringForNCharacters == start) { + startNCharacters = nCharacters; + } + nCharacters++; + stringForNCharacters += characterLength; + } + + GRN_UINT32_PUT(ctx, &buffer, startNCharacters); + GRN_UINT32_PUT(ctx, &buffer, nCharacters - startNCharacters); + } + + chunkLength = rest - string; + stringLength -= chunkLength; + string = rest; +#undef MAX_N_HITS + } + } + + { + int i, nElements; + Datum *elements; + int dims[2]; + int lbs[2]; + + nElements = GRN_BULK_VSIZE(&buffer) / (sizeof(uint32_t) * 2); + elements = palloc(sizeof(Datum) * 2 * nElements); + for (i = 0; i < nElements; i++) + { + uint32_t offset; + uint32_t length; + + offset = GRN_UINT32_VALUE_AT(&buffer, i * 2); + length = GRN_UINT32_VALUE_AT(&buffer, i * 2 + 1); + elements[i * 2] = Int32GetDatum(offset); + elements[i * 2 + 1] = Int32GetDatum(length); + } + dims[0] = nElements; + dims[1] = 2; + lbs[0] = 1; + lbs[1] = 1; + positions = construct_md_array(elements, + NULL, + 2, + dims, + lbs, + INT4OID, + sizeof(int32_t), + true, + 'i'); + pfree(elements); + } + + GRN_OBJ_FIN(ctx, &buffer); + + return positions; +} + +/** + * pgroonga.match_positions_character(target text, keywords text[]) : integer[2][] + */ +Datum +pgroonga_match_positions_character(PG_FUNCTION_ARGS) +{ + text *target = PG_GETARG_TEXT_PP(0); + ArrayType *keywords = PG_GETARG_ARRAYTYPE_P(1); + ArrayType *positions; + + PGrnKeywordsUpdateTable(keywords, keywordsTable); + positions = PGrnMatchPositionsCharacter(target); + + PG_RETURN_POINTER(positions); +} Added: src/pgrn_match_positions_character.h (+4 -0) 100644 =================================================================== --- /dev/null +++ src/pgrn_match_positions_character.h 2016-08-27 14:59:26 +0900 (814fcd2) @@ -0,0 +1,4 @@ +#pragma once + +void PGrnInitializeMatchPositionsCharacter(void); +void PGrnFinalizeMatchPositionsCharacter(void); Modified: src/pgroonga.c (+3 -0) =================================================================== --- src/pgroonga.c 2016-08-27 12:40:25 +0900 (77fa517) +++ src/pgroonga.c 2016-08-27 14:59:26 +0900 (dc7a832) @@ -10,6 +10,7 @@ #include "pgrn_keywords.h" #include "pgrn_jsonb.h" #include "pgrn_match_positions_byte.h" +#include "pgrn_match_positions_character.h" #include "pgrn_options.h" #include "pgrn_query_extract_keywords.h" #include "pgrn_search.h" @@ -283,6 +284,7 @@ PGrnOnProcExit(int code, Datum arg) PGrnFinalizeQueryExtractKeywords(); PGrnFinalizeMatchPositionsByte(); + PGrnFinalizeMatchPositionsCharacter(); PGrnFinalizeHighlightHTML(); @@ -400,6 +402,7 @@ _PG_init(void) PGrnInitializeHighlightHTML(); PGrnInitializeMatchPositionsByte(); + PGrnInitializeMatchPositionsCharacter(); PGrnInitializeQueryExtractKeywords(); } -------------- next part -------------- HTML����������������������������...Download