Kouhei Sutou
null+****@clear*****
Sun Apr 24 00:02:23 JST 2016
Kouhei Sutou 2016-04-24 00:02:23 +0900 (Sun, 24 Apr 2016) New Revision: 181c3995f964e85bb2b92674e06043e29adbe18b https://github.com/pgroonga/pgroonga/commit/181c3995f964e85bb2b92674e06043e29adbe18b Message: Add pgroonga.match_positions_byte Added files: expected/function/match-positions-byte/different-size-keyword.out expected/function/match-positions-byte/multibyte.out expected/function/match-positions-byte/multiple-keywords.out expected/function/match-positions-byte/one-keyword.out sql/function/match-positions-byte/different-size-keyword.sql sql/function/match-positions-byte/multibyte.sql sql/function/match-positions-byte/multiple-keywords.sql sql/function/match-positions-byte/one-keyword.sql src/pgrn_match_positions_byte.c src/pgrn_match_positions_byte.h Modified files: CMakeLists.txt Makefile pgroonga--1.0.6--1.0.7.sql pgroonga.sql src/pgroonga.c src/pgroonga.h Modified: CMakeLists.txt (+1 -0) =================================================================== --- CMakeLists.txt 2016-04-23 23:11:05 +0900 (596d756) +++ CMakeLists.txt 2016-04-24 00:02:23 +0900 (c0cd25a) @@ -65,6 +65,7 @@ set(PGRN_SOURCES "src/pgrn_highlight_html.c" "src/pgrn_keywords.c" "src/pgrn_jsonb.c" + "src/pgrn_match_positions_byte.c" "src/pgrn_options.c" "src/pgrn_snippet_html.c" "src/pgrn_value.c" Modified: Makefile (+1 -0) =================================================================== --- Makefile 2016-04-23 23:11:05 +0900 (25f8a3d) +++ Makefile 2016-04-24 00:02:23 +0900 (00acf9e) @@ -12,6 +12,7 @@ SRCS = \ src/pgrn_highlight_html.c \ src/pgrn_keywords.c \ src/pgrn_jsonb.c \ + src/pgrn_match_positions_byte.c \ src/pgrn_options.c \ src/pgrn_snippet_html.c \ src/pgrn_value.c \ Added: expected/function/match-positions-byte/different-size-keyword.out (+8 -0) 100644 =================================================================== --- /dev/null +++ expected/function/match-positions-byte/different-size-keyword.out 2016-04-24 00:02:23 +0900 (44c4b28) @@ -0,0 +1,8 @@ +SELECT pgroonga.match_positions_byte( + '100㍉メートル', + ARRAY['ミリ']); + match_positions_byte +---------------------- + {{3,3}} +(1 row) + Added: expected/function/match-positions-byte/multibyte.out (+9 -0) 100644 =================================================================== --- /dev/null +++ expected/function/match-positions-byte/multibyte.out 2016-04-24 00:02:23 +0900 (7b2d3e3) @@ -0,0 +1,9 @@ +SELECT pgroonga.match_positions_byte( + 'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' || + '登録された文書をすぐに検索結果に反映できます。', + ARRAY['検索']); + match_positions_byte +---------------------- + {{61,6},{124,6}} +(1 row) + Added: expected/function/match-positions-byte/multiple-keywords.out (+19 -0) 100644 =================================================================== --- /dev/null +++ expected/function/match-positions-byte/multiple-keywords.out 2016-04-24 00:02:23 +0900 (e8cba42) @@ -0,0 +1,19 @@ +SELECT pgroonga.match_positions_byte( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + E'\n' || + E'\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['fast', 'PostgreSQL']); + match_positions_byte +---------------------- + {{13,4},{455,10}} +(1 row) + Added: expected/function/match-positions-byte/one-keyword.out (+19 -0) 100644 =================================================================== --- /dev/null +++ expected/function/match-positions-byte/one-keyword.out 2016-04-24 00:02:23 +0900 (ee381ff) @@ -0,0 +1,19 @@ +SELECT pgroonga.match_positions_byte( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + E'\n' || + E'\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['Groonga']); + match_positions_byte +----------------------------------------- + {{0,7},{110,7},{197,7},{319,9},{553,7}} +(1 row) + Modified: pgroonga--1.0.6--1.0.7.sql (+7 -0) =================================================================== --- pgroonga--1.0.6--1.0.7.sql 2016-04-23 23:11:05 +0900 (87416d7) +++ pgroonga--1.0.6--1.0.7.sql 2016-04-24 00:02:23 +0900 (2bba1ef) @@ -4,3 +4,10 @@ CREATE FUNCTION pgroonga.highlight_html(target text, keywords text[]) LANGUAGE C VOLATILE STRICT; + +CREATE FUNCTION pgroonga.match_positions_byte(target text, keywords text[]) + RETURNS integer[2][] + AS 'MODULE_PATHNAME', 'pgroonga_match_positions_byte' + LANGUAGE C + VOLATILE + STRICT; Modified: pgroonga.sql (+7 -0) =================================================================== --- pgroonga.sql 2016-04-23 23:11:05 +0900 (9c30f6d) +++ pgroonga.sql 2016-04-24 00:02:23 +0900 (5018fd3) @@ -37,6 +37,13 @@ CREATE FUNCTION pgroonga.highlight_html(target text, keywords text[]) VOLATILE STRICT; +CREATE FUNCTION pgroonga.match_positions_byte(target text, keywords text[]) + RETURNS integer[2][] + AS 'MODULE_PATHNAME', 'pgroonga_match_positions_byte' + LANGUAGE C + VOLATILE + STRICT; + CREATE FUNCTION pgroonga.match_term(target text, term text) RETURNS bool AS 'MODULE_PATHNAME', 'pgroonga_match_term_text' Added: sql/function/match-positions-byte/different-size-keyword.sql (+3 -0) 100644 =================================================================== --- /dev/null +++ sql/function/match-positions-byte/different-size-keyword.sql 2016-04-24 00:02:23 +0900 (947c546) @@ -0,0 +1,3 @@ +SELECT pgroonga.match_positions_byte( + '100㍉メートル', + ARRAY['ミリ']); Added: sql/function/match-positions-byte/multibyte.sql (+4 -0) 100644 =================================================================== --- /dev/null +++ sql/function/match-positions-byte/multibyte.sql 2016-04-24 00:02:23 +0900 (2ab075b) @@ -0,0 +1,4 @@ +SELECT pgroonga.match_positions_byte( + 'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' || + '登録された文書をすぐに検索結果に反映できます。', + ARRAY['検索']); Added: sql/function/match-positions-byte/multiple-keywords.sql (+14 -0) 100644 =================================================================== --- /dev/null +++ sql/function/match-positions-byte/multiple-keywords.sql 2016-04-24 00:02:23 +0900 (c9f0bfb) @@ -0,0 +1,14 @@ +SELECT pgroonga.match_positions_byte( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + E'\n' || + E'\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['fast', 'PostgreSQL']); Added: sql/function/match-positions-byte/one-keyword.sql (+14 -0) 100644 =================================================================== --- /dev/null +++ sql/function/match-positions-byte/one-keyword.sql 2016-04-24 00:02:23 +0900 (05cc123) @@ -0,0 +1,14 @@ +SELECT pgroonga.match_positions_byte( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + E'\n' || + E'\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['Groonga']); Added: src/pgrn_match_positions_byte.c (+128 -0) 100644 =================================================================== --- /dev/null +++ src/pgrn_match_positions_byte.c 2016-04-24 00:02:23 +0900 (0808493) @@ -0,0 +1,128 @@ +#include "pgroonga.h" + +#include "pgrn_global.h" +#include "pgrn_groonga.h" +#include "pgrn_match_positions_byte.h" +#include "pgrn_keywords.h" + +#include <catalog/pg_type.h> +#include <utils/builtins.h> + +static grn_ctx *ctx = &PGrnContext; +static grn_obj *keywordsTable = NULL; + +PG_FUNCTION_INFO_V1(pgroonga_match_positions_byte); + +void +PGrnInitializeMatchPositionsByte(void) +{ + keywordsTable = grn_table_create(ctx, NULL, 0, NULL, + GRN_OBJ_TABLE_PAT_KEY, + grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), + NULL); + grn_obj_set_info(ctx, + keywordsTable, + GRN_INFO_NORMALIZER, + grn_ctx_get(ctx, "NormalizerAuto", -1)); +} + +void +PGrnFinalizeMatchPositionsByte(void) +{ + if (!keywordsTable) + return; + + grn_obj_close(ctx, keywordsTable); + keywordsTable = NULL; +} + +static ArrayType * +PGrnMatchPositionsByte(text *target) +{ + grn_obj buffer; + ArrayType *positions; + + GRN_UINT32_INIT(&buffer, GRN_OBJ_VECTOR); + + { + const char *string; + size_t stringLength; + + string = VARDATA_ANY(target); + stringLength = VARSIZE_ANY_EXHDR(target); + + while (stringLength > 0) { +#define MAX_N_HITS 16 + grn_pat_scan_hit hits[MAX_N_HITS]; + const char *rest; + int i, nHits; + size_t chunkLength; + + nHits = grn_pat_scan(ctx, (grn_pat *)keywordsTable, + string, stringLength, + hits, MAX_N_HITS, &rest); + for (i = 0; i < nHits; i++) { + GRN_UINT32_PUT(ctx, &buffer, hits[i].offset); + GRN_UINT32_PUT(ctx, &buffer, hits[i].length); + } + + chunkLength = rest - string; + stringLength -= chunkLength; + string = rest; +#undef MAX_N_HITS + } + } + + { + int i, nElements; + Datum *elements; + int dims[2]; + int lbs[2]; + + nElements = GRN_BULK_VSIZE(&buffer) / (sizeof(uint32_t) * 2); + elements = palloc(sizeof(Datum) * 2 * nElements); + for (i = 0; i < nElements; i++) + { + uint32_t offset; + uint32_t length; + + offset = GRN_UINT32_VALUE_AT(&buffer, i * 2); + length = GRN_UINT32_VALUE_AT(&buffer, i * 2 + 1); + elements[i * 2] = Int32GetDatum(offset); + elements[i * 2 + 1] = Int32GetDatum(length); + } + dims[0] = nElements; + dims[1] = 2; + lbs[0] = 1; + lbs[1] = 1; + positions = construct_md_array(elements, + NULL, + 2, + dims, + lbs, + INT4OID, + sizeof(int32_t), + true, + 'i'); + } + + GRN_OBJ_FIN(ctx, &buffer); + + return positions; +} + +/** + * pgroonga.match_positions_byte(target text, keywords text[]) : integer[2][] + */ +Datum +pgroonga_match_positions_byte(PG_FUNCTION_ARGS) +{ + text *target = PG_GETARG_TEXT_PP(0); + ArrayType *keywords = PG_GETARG_ARRAYTYPE_P(1); + ArrayType *positions; + + PGrnKeywordsUpdateTable(keywords, keywordsTable); + positions = PGrnMatchPositionsByte(target); + + PG_RETURN_POINTER(positions); +} Added: src/pgrn_match_positions_byte.h (+4 -0) 100644 =================================================================== --- /dev/null +++ src/pgrn_match_positions_byte.h 2016-04-24 00:02:23 +0900 (0ff85ae) @@ -0,0 +1,4 @@ +#pragma once + +void PGrnInitializeMatchPositionsByte(void); +void PGrnFinalizeMatchPositionsByte(void); Modified: src/pgroonga.c (+5 -0) =================================================================== --- src/pgroonga.c 2016-04-23 23:11:05 +0900 (d51d863) +++ src/pgroonga.c 2016-04-24 00:02:23 +0900 (5d1219f) @@ -9,6 +9,7 @@ #include "pgrn_highlight_html.h" #include "pgrn_keywords.h" #include "pgrn_jsonb.h" +#include "pgrn_match_positions_byte.h" #include "pgrn_options.h" #include "pgrn_search.h" #include "pgrn_value.h" @@ -262,6 +263,8 @@ PGrnOnProcExit(int code, Datum arg) { grn_obj *db; + PGrnFinalizeMatchPositionsByte(); + PGrnFinalizeHighlightHTML(); PGrnFinalizeKeywords(); @@ -373,6 +376,8 @@ _PG_init(void) PGrnInitializeKeywords(); PGrnInitializeHighlightHTML(); + + PGrnInitializeMatchPositionsByte(); } static grn_id Modified: src/pgroonga.h (+1 -0) =================================================================== --- src/pgroonga.h 2016-04-23 23:11:05 +0900 (f916b86) +++ src/pgroonga.h 2016-04-24 00:02:23 +0900 (d7051f3) @@ -57,6 +57,7 @@ extern Datum PGDLLEXPORT pgroonga_table_name(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_command(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_snippet_html(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_highlight_html(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_match_positions_byte(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_match_term_text(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_match_term_text_array(PG_FUNCTION_ARGS); -------------- next part -------------- HTML����������������������������...Download