Kouhei Sutou
null+****@clear*****
Mon Jan 12 23:39:34 JST 2015
Kouhei Sutou 2015-01-12 23:39:34 +0900 (Mon, 12 Jan 2015) New Revision: c29e252192052c4b35eaffbe80043d3d57864389 https://github.com/pgroonga/pgroonga/commit/c29e252192052c4b35eaffbe80043d3d57864389 Message: Import Added files: .dir-locals.el .gitignore COPYING Makefile README.md pgroonga--0.2.0.sql pgroonga.c pgroonga.control pgroonga.h pgroonga_types.c Added: .dir-locals.el (+5 -0) 100644 =================================================================== --- /dev/null +++ .dir-locals.el 2015-01-12 23:39:34 +0900 (c1d09fa) @@ -0,0 +1,5 @@ +((c-mode . ((c-file-style . "bsd") + (c-basic-offset . 4) + (tab-width . 4) + (indent-tabs-mode . t) + (show-trailing-whitespace . t)))) Added: .gitignore (+2 -0) 100644 =================================================================== --- /dev/null +++ .gitignore 2015-01-12 23:39:34 +0900 (9d22eb4) @@ -0,0 +1,2 @@ +*.o +*.so Added: COPYING (+21 -0) 100644 =================================================================== --- /dev/null +++ COPYING 2015-01-12 23:39:34 +0900 (176e8f5) @@ -0,0 +1,21 @@ +Portions Copyright (c) 2015, Kouhei Sutou +Portions Copyright (c) 2010, Itagaki Takahiro +Portions Copyright (c) 2009-2010, NIPPON TELEGRAPH AND TELEPHONE CORPORATION +Portions Copyright (c) 1996-2010, The PostgreSQL Global Development Group +Portions Copyright (c) 1994, The Regents of the University of California + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose, without fee, and without a written +agreement is hereby granted, provided that the above copyright notice and +this paragraph and the following two paragraphs appear in all copies. + +IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE TO ANY PARTY FOR DIRECT, +INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST +PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN +IF THE COPYRIGHT HOLDERS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, +AND THE COPYRIGHT HOLDERS HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, +SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Added: Makefile (+18 -0) 100644 =================================================================== --- /dev/null +++ Makefile 2015-01-12 23:39:34 +0900 (78f5e4e) @@ -0,0 +1,18 @@ +MODULE_big = pgroonga +SRCS = pgroonga.c pgroonga_types.c +OBJS = $(SRCS:.c=.o) +EXTENSION = pgroonga +DATA = pgroonga--0.2.0.sql +PG_CPPFLAGS = $(shell pkg-config --cflags groonga) +SHLIB_LINK = $(shell pkg-config --libs groonga) +REGRESS = pgroonga update bench + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) + +.PHONY: subclean +clean: subclean + +subclean: + rm -f pgroonga.sql.in Added: README.md (+140 -0) 100644 =================================================================== --- /dev/null +++ README.md 2015-01-12 23:39:34 +0900 (b163775) @@ -0,0 +1,140 @@ +# PGroonga(ぴーじーるんが) + +## 概要 + +PGroongaはPostgreSQLからインデックスとして +[Groonga](http://groonga.org/ja/)を使うための拡張機能です。 + +PostgreSQLは標準では日本語で全文検索できませんが、PGroongaを使うと日本 +語で高速に全文検索できるようになります。日本語で全文検索機能を実現する +ための類似の拡張機能は次のものがあります。 + + * [pg_trgm](https://www.postgresql.jp/document/9.3/html/pgtrgm.html) + * PostgreSQLに付属しているがデフォルトではインストールされない。 + * 日本語に対応させるにはソースコードを変更する必要がある。 + * [pg_bigm](http://pgbigm.sourceforge.jp/) + * ソースコードを変更しなくても日本語に対応している。 + * 正確な全文検索機能を使うには + [Recheck機能](http://pgbigm.sourceforge.jp/pg_bigm-1-1.html#enable_recheck) + を有効にする必要がある。 + * Recheck機能を有効にするとインデックスを使った検索をしてから、イ + ンデックスを使って見つかったレコードに対してシーケンシャルに検索 + をするのでインデックスを使った検索でのヒット件数が多くなると遅く + なりやすい。 + * Recheck機能を無効にするとキーワードが含まれていないレコードもヒッ + トする可能性がある。 + +PGroongaはpg\_trgmのようにソースコードを変更しなくても日本語に対応して +います。 + +PGroongaはpg\_bigmのようにRecheck機能を使わなくてもインデックスを使っ +た検索だけで正確な検索結果を返せます。そのため、インデックスを使った検 +索でヒット件数が多くてもpg\_bigmほど遅くなりません。(仕組みの上は。要 +ベンチマーク。協力者募集。) + +ただし、PGroongaは現時点ではWALに対応していないためクラッシュリカバリー +機能やレプリケーションに対応していません。(pg\_trgmとpg\_bigmは対応し +ています。正確に言うとpg\_trgmとpg\_bigmが対応しているわけではなく、 +pg\_trgmとpg\_bigmが使っているGINやGiSTが対応しています。) + +## インストール + +PostgreSQLをインストールします。 + +[Groongaをインストール](http://groonga.org/ja/docs/install.html)します。 +パッケージでのインストールがオススメです。 + +パッケージでインストールするときは次のパッケージをインストールしてください。 + + * `groonga-devel`: CentOSの場合 + * `libgroonga-dev`: Debian GNU/Linux, Ubuntuの場合 + +PGroongaをインストールします。 + + % git clone https://github.com/pgroonga/pgroonga.git + % cd pgroonga + % make + % sudo make install + +データベースに接続して`CREATE EXTENSION pgroonga`を実行します。 + + % psql -d db + ... + db=# CREATE EXTENSION pgroonga; + CREATE EXTNESION + +## 使い方 + +`text`型のカラムを作って`pgroonga`インデックスを張ります。 + +```sql +CREATE TABLE memos ( + id integer, + content text +); + +CREATE INDEX pgroonga_index ON memos USING pgroonga (content); +``` + +データを投入します。 + +```sql +INSERT INTO memos VALUES (1, 'PostgreSQLはリレーショナル・データベース管理システムです。'); +INSERT INTO memos VALUES (2, 'Groongaは日本語対応の高速な全文検索エンジンです。'); +INSERT INTO memos VALUES (3, 'PGroongaはインデックスとしてGroongaを使うためのPostgreSQLの拡張機能です。'); +``` + +検索します。現時点ではシーケンシャルスキャンでの全文検索(インデックス +を使わない全文検索)には対応していないので、シーケンシャルスキャン機能 +を無効にします。(あるいはもっとたくさんのデータを投入します。) + +```sql +SET enable_seqscan = off; +``` + +全文検索をするときは`%%`演算子を使います。 + +```sql +SELECT * FROM memos WHERE content %% '全文検索'; +-- id | content +-- ----+--------------------------------------------------- +-- 2 | Groongaは日本語対応の高速な全文検索エンジンです。 +-- (1 行) +``` + +## アンインストール + +次のSQLでアンインストールできます。 + +```sql +DROP EXTENSION pgroonga CASCADE; +DELETE FROM pg_catalog.pg_am WHERE amname = 'pgroonga'; +``` + +`pg_catalog.pg_am`から手動でレコードを消さないといけないのはおかしい気 +がするので、何がおかしいか知っている人は教えてくれるとうれしいです。 + +## ライセンス + +ライセンスはBSDライセンスやMITライセンスと類似の +[PostgreSQLライセンス](http://opensource.org/licenses/postgresql)です。 + +著作権保持者などの詳細は[COPYING](COPYING)ファイルを参照してください。 + +## TODO + + * 実装 + * WAL対応 + * スコアー対応 + * クエリー構文対応(`キーワード1 OR キーワード2`のようなやつ) + * シーケンシャルスキャン対応(grn式を作って検索する?) + * COLLATE対応(今は必ずGroongaのNormalizerAutoを使っている) + * トークナイザーのカスタマイズ対応(今は必ずTokenBigramを使っている) + * ドキュメント + * 英語で書く + * サイトを用意する + +## 感謝 + + * 板垣さん + * PGroongaは板垣さんが開発した[textsearch_groonga](http://textsearch-ja.projects.pgfoundry.org/textsearch_groonga.html)をベースにしています。 Added: pgroonga--0.2.0.sql (+215 -0) 100644 =================================================================== --- /dev/null +++ pgroonga--0.2.0.sql 2015-01-12 23:39:34 +0900 (931d6a0) @@ -0,0 +1,215 @@ +SET search_path = public; + +CREATE SCHEMA pgroonga; + +CREATE FUNCTION pgroonga.contains(text, text) + RETURNS bool + AS 'MODULE_PATHNAME', 'pgroonga_contains_text' + LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION pgroonga.contains(bpchar, bpchar) + RETURNS bool + AS 'MODULE_PATHNAME', 'pgroonga_contains_bpchar' + LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR %% ( + PROCEDURE = pgroonga.contains, + LEFTARG = text, + RIGHTARG = text +); + +CREATE OPERATOR %% ( + PROCEDURE = pgroonga.contains, + LEFTARG = bpchar, + RIGHTARG = bpchar +); + + +CREATE FUNCTION pgroonga.insert(internal) RETURNS bool AS 'MODULE_PATHNAME','pgroonga_insert' LANGUAGE C; +CREATE FUNCTION pgroonga.beginscan(internal) RETURNS internal AS 'MODULE_PATHNAME','pgroonga_beginscan' LANGUAGE C; +CREATE FUNCTION pgroonga.gettuple(internal) RETURNS bool AS 'MODULE_PATHNAME','pgroonga_gettuple' LANGUAGE C; +CREATE FUNCTION pgroonga.getbitmap(internal) RETURNS bigint AS 'MODULE_PATHNAME','pgroonga_getbitmap' LANGUAGE C; +CREATE FUNCTION pgroonga.rescan(internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_rescan' LANGUAGE C; +CREATE FUNCTION pgroonga.endscan(internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_endscan' LANGUAGE C; +CREATE FUNCTION pgroonga.build(internal) RETURNS internal AS 'MODULE_PATHNAME','pgroonga_build' LANGUAGE C; +CREATE FUNCTION pgroonga.bulkdelete(internal) RETURNS internal AS 'MODULE_PATHNAME','pgroonga_bulkdelete' LANGUAGE C; +CREATE FUNCTION pgroonga.vacuumcleanup(internal) RETURNS internal AS 'MODULE_PATHNAME','pgroonga_vacuumcleanup' LANGUAGE C; +CREATE FUNCTION pgroonga.costestimate(internal) RETURNS internal AS 'MODULE_PATHNAME','pgroonga_costestimate' LANGUAGE C; +CREATE FUNCTION pgroonga.options(internal) RETURNS internal AS 'MODULE_PATHNAME','pgroonga_options' LANGUAGE C; + +CREATE FUNCTION pgroonga.typeof(oid, integer) RETURNS integer AS 'MODULE_PATHNAME','pgroonga_typeof' LANGUAGE C; +CREATE FUNCTION pgroonga.get_text(text, internal, internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_get_text' LANGUAGE C; +CREATE FUNCTION pgroonga.get_bpchar(bpchar, internal, internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_get_bpchar' LANGUAGE C; +CREATE FUNCTION pgroonga.get_bool(bool, internal, internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_get_bool' LANGUAGE C; +CREATE FUNCTION pgroonga.get_int2(int2, internal, internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_get_int2' LANGUAGE C; +CREATE FUNCTION pgroonga.get_int4(int4, internal, internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_get_int4' LANGUAGE C; +CREATE FUNCTION pgroonga.get_int8(int8, internal, internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_get_int8' LANGUAGE C; +CREATE FUNCTION pgroonga.get_float4(float4, internal, internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_get_float4' LANGUAGE C; +CREATE FUNCTION pgroonga.get_float8(float8, internal, internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_get_float8' LANGUAGE C; +CREATE FUNCTION pgroonga.get_timestamp(timestamp, internal, internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_get_timestamp' LANGUAGE C; +CREATE FUNCTION pgroonga.get_timestamptz(timestamptz, internal, internal) RETURNS void AS 'MODULE_PATHNAME','pgroonga_get_timestamptz' LANGUAGE C; +CREATE FUNCTION pgroonga.set_text(internal, internal, text) RETURNS void AS 'MODULE_PATHNAME','pgroonga_set_text' LANGUAGE C; +CREATE FUNCTION pgroonga.set_bpchar(internal, internal, bpchar) RETURNS void AS 'MODULE_PATHNAME','pgroonga_set_bpchar' LANGUAGE C; +CREATE FUNCTION pgroonga.set_bool(internal, internal, bool) RETURNS void AS 'MODULE_PATHNAME','pgroonga_set_bool' LANGUAGE C; +CREATE FUNCTION pgroonga.set_int2(internal, internal, int2) RETURNS void AS 'MODULE_PATHNAME','pgroonga_set_int2' LANGUAGE C; +CREATE FUNCTION pgroonga.set_int4(internal, internal, int4) RETURNS void AS 'MODULE_PATHNAME','pgroonga_set_int4' LANGUAGE C; +CREATE FUNCTION pgroonga.set_int8(internal, internal, int8) RETURNS void AS 'MODULE_PATHNAME','pgroonga_set_int8' LANGUAGE C; +CREATE FUNCTION pgroonga.set_float4(internal, internal, float4) RETURNS void AS 'MODULE_PATHNAME','pgroonga_set_float4' LANGUAGE C; +CREATE FUNCTION pgroonga.set_float8(internal, internal, float8) RETURNS void AS 'MODULE_PATHNAME','pgroonga_set_float8' LANGUAGE C; +CREATE FUNCTION pgroonga.set_timestamp(internal, internal, timestamp) RETURNS void AS 'MODULE_PATHNAME','pgroonga_set_timestamp' LANGUAGE C; +CREATE FUNCTION pgroonga.set_timestamptz(internal, internal, timestamptz) RETURNS void AS 'MODULE_PATHNAME','pgroonga_set_timestamptz' LANGUAGE C; + +INSERT INTO pg_catalog.pg_am VALUES( + 'pgroonga', -- amname + 7, -- amstrategies + 3, -- amsupport + true, -- amcanorder + true, -- amcanorderbyop + true, -- amcanbackward + true, -- amcanunique + true, -- amcanmulticol + true, -- amoptionalkey + true, -- amindexnulls + false, -- amsearchnulls + false, -- amstorage + true, -- amclusterable + false, -- ampredlocks + 0, -- amkeytype + 'pgroonga.insert', -- aminsert + 'pgroonga.beginscan', -- ambeginscan + 'pgroonga.gettuple', -- amgettuple + 'pgroonga.getbitmap', -- amgetbitmap + 'pgroonga.rescan', -- amrescan + 'pgroonga.endscan', -- amendscan + 0, -- ammarkpos, + 0, -- amrestrpos, + 'pgroonga.build', -- ambuild + 0, -- ambuildempty + 'pgroonga.bulkdelete', -- ambulkdelete + 'pgroonga.vacuumcleanup', -- amvacuumcleanup + 0, -- amcanreturn + 'pgroonga.costestimate', -- amcostestimate + 'pgroonga.options' -- amoptions +); + +CREATE OPERATOR CLASS pgroonga.text_ops DEFAULT FOR TYPE text + USING pgroonga AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + OPERATOR 6 <>, + OPERATOR 7 %%, + FUNCTION 1 pgroonga.typeof(oid, integer), + FUNCTION 2 pgroonga.get_text(text, internal, internal), + FUNCTION 3 pgroonga.set_text(internal, internal, text); + +CREATE OPERATOR CLASS pgroonga.bpchar_ops DEFAULT FOR TYPE bpchar + USING pgroonga AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + OPERATOR 6 <>, + OPERATOR 7 %%, + FUNCTION 1 pgroonga.typeof(oid, integer), + FUNCTION 2 pgroonga.get_bpchar(bpchar, internal, internal), + FUNCTION 3 pgroonga.set_bpchar(internal, internal, bpchar); + +CREATE OPERATOR CLASS pgroonga.bool_ops DEFAULT FOR TYPE bool + USING pgroonga AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + OPERATOR 6 <>, + FUNCTION 1 pgroonga.typeof(oid, integer), + FUNCTION 2 pgroonga.get_bool(bool, internal, internal), + FUNCTION 3 pgroonga.set_bool(internal, internal, bool); + +CREATE OPERATOR CLASS pgroonga.int2_ops DEFAULT FOR TYPE int2 + USING pgroonga AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + OPERATOR 6 <>, + FUNCTION 1 pgroonga.typeof(oid, integer), + FUNCTION 2 pgroonga.get_int2(int2, internal, internal), + FUNCTION 3 pgroonga.set_int2(internal, internal, int2); + +CREATE OPERATOR CLASS pgroonga.int4_ops DEFAULT FOR TYPE int4 + USING pgroonga AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + OPERATOR 6 <>, + FUNCTION 1 pgroonga.typeof(oid, integer), + FUNCTION 2 pgroonga.get_int4(int4, internal, internal), + FUNCTION 3 pgroonga.set_int4(internal, internal, int4); + +CREATE OPERATOR CLASS pgroonga.int8_ops DEFAULT FOR TYPE int8 + USING pgroonga AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + OPERATOR 6 <>, + FUNCTION 1 pgroonga.typeof(oid, integer), + FUNCTION 2 pgroonga.get_int8(int8, internal, internal), + FUNCTION 3 pgroonga.set_int8(internal, internal, int8); + +CREATE OPERATOR CLASS pgroonga.float4_ops DEFAULT FOR TYPE float4 + USING pgroonga AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + OPERATOR 6 <>, + FUNCTION 1 pgroonga.typeof(oid, integer), + FUNCTION 2 pgroonga.get_float4(float4, internal, internal), + FUNCTION 3 pgroonga.set_float4(internal, internal, float4); + +CREATE OPERATOR CLASS pgroonga.float8_ops DEFAULT FOR TYPE float8 + USING pgroonga AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + OPERATOR 6 <>, + FUNCTION 1 pgroonga.typeof(oid, integer), + FUNCTION 2 pgroonga.get_float8(float8, internal, internal), + FUNCTION 3 pgroonga.set_float8(internal, internal, float8); + +CREATE OPERATOR CLASS pgroonga.timestamp_ops DEFAULT FOR TYPE timestamp + USING pgroonga AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + OPERATOR 6 <>, + FUNCTION 1 pgroonga.typeof(oid, integer), + FUNCTION 2 pgroonga.get_timestamp(timestamp, internal, internal), + FUNCTION 3 pgroonga.set_timestamp(internal, internal, timestamp); + +CREATE OPERATOR CLASS pgroonga.timestamptz_ops DEFAULT FOR TYPE timestamptz + USING pgroonga AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + OPERATOR 6 <>, + FUNCTION 1 pgroonga.typeof(oid, integer), + FUNCTION 2 pgroonga.get_timestamptz(timestamptz, internal, internal), + FUNCTION 3 pgroonga.set_timestamptz(internal, internal, timestamptz); Added: pgroonga.c (+1007 -0) 100644 =================================================================== --- /dev/null +++ pgroonga.c 2015-01-12 23:39:34 +0900 (db058ad) @@ -0,0 +1,1007 @@ +/* + * IDENTIFICATION + * pgroonga.c + */ + +#include "pgroonga.h" + +#include <access/relscan.h> +#include <catalog/index.h> +#include <catalog/pg_tablespace.h> +#include <mb/pg_wchar.h> +#include <miscadmin.h> +#include <storage/lmgr.h> + +#include <groonga.h> + +PG_MODULE_MAGIC; + +typedef struct GrnBuildStateData +{ + grn_obj *idsTable; + grn_ii_buffer *buffer; + double nIndexedTuples; +} GrnBuildStateData; + +typedef GrnBuildStateData *GrnBuildState; + +typedef struct GrnScanOpaqueData +{ + grn_obj *idsTable; + grn_obj *searched; + grn_obj *sorted; + grn_obj *targetTable; + grn_table_cursor *cursor; + grn_obj *keyAccessor; + grn_id currentID; +} GrnScanOpaqueData; + +typedef GrnScanOpaqueData *GrnScanOpaque; + +PG_FUNCTION_INFO_V1(pgroonga_contains_text); +PG_FUNCTION_INFO_V1(pgroonga_contains_bpchar); + +PG_FUNCTION_INFO_V1(pgroonga_insert); +PG_FUNCTION_INFO_V1(pgroonga_beginscan); +PG_FUNCTION_INFO_V1(pgroonga_gettuple); +PG_FUNCTION_INFO_V1(pgroonga_getbitmap); +PG_FUNCTION_INFO_V1(pgroonga_rescan); +PG_FUNCTION_INFO_V1(pgroonga_endscan); +PG_FUNCTION_INFO_V1(pgroonga_build); +PG_FUNCTION_INFO_V1(pgroonga_bulkdelete); +PG_FUNCTION_INFO_V1(pgroonga_vacuumcleanup); +PG_FUNCTION_INFO_V1(pgroonga_costestimate); +PG_FUNCTION_INFO_V1(pgroonga_options); + +static grn_ctx grnContext; +static grn_ctx *ctx = &grnContext; +static grn_obj buffer; +static grn_obj inspectBuffer; + +static const char * +GrnInspect(grn_obj *object) +{ + GRN_BULK_REWIND(&inspectBuffer); + grn_inspect(ctx, &inspectBuffer, object); + GRN_TEXT_PUTC(ctx, &inspectBuffer, '\0'); + return GRN_TEXT_VALUE(&inspectBuffer); +} + +static grn_encoding +GrnGetEncoding(void) +{ + int enc = GetDatabaseEncoding(); + + if (pg_encoding_max_length(enc) > 1) + return GRN_ENC_NONE; + + switch (enc) + { + case PG_EUC_JP: + case PG_EUC_JIS_2004: + return GRN_ENC_EUC_JP; + case PG_UTF8: + return GRN_ENC_UTF8; + case PG_LATIN1: + return GRN_ENC_LATIN1; + case PG_KOI8R: + return GRN_ENC_KOI8R; + default: + elog(WARNING, + "groonga: use default encoding instead of '%s'", + GetDatabaseEncodingName()); + return GRN_ENC_DEFAULT; + } +} + +static void +GrnEnsureDatabase(void) +{ + char path[MAXPGPATH]; + grn_obj *db; + + GRN_CTX_SET_ENCODING(ctx, GrnGetEncoding()); + join_path_components(path, + GetDatabasePath(MyDatabaseId, DEFAULTTABLESPACE_OID), + GrnDatabaseBasename); + + db = grn_db_open(ctx, path); + if (db) + return; + + db = grn_db_create(ctx, path, NULL); + if (!db) + ereport(ERROR, + (errcode(ERRCODE_IO_ERROR), + errmsg("groonga: failed to create database: <%s>: %s", + path, ctx->errbuf))); +} + +static void +GrnOnProcExit(int code, Datum arg) +{ + grn_obj *db; + + GRN_OBJ_FIN(ctx, &inspectBuffer); + GRN_OBJ_FIN(ctx, &buffer); + + db = grn_ctx_db(ctx); + if (db) + grn_obj_close(ctx, db); + + grn_ctx_fin(ctx); + grn_fin(); +} + +void +_PG_init(void) +{ + if (grn_init() != GRN_SUCCESS) + ereport(ERROR, + (errcode(ERRCODE_SYSTEM_ERROR), + errmsg("groonga: failed to initialize Groonga"))); + if (grn_ctx_init(ctx, 0)) + ereport(ERROR, + (errcode(ERRCODE_SYSTEM_ERROR), + errmsg("groonga: failed to initialize Groonga context"))); + + on_proc_exit(GrnOnProcExit, 0); + + GRN_VOID_INIT(&buffer); + GRN_TEXT_INIT(&inspectBuffer, 0); + + GrnEnsureDatabase(); +} + +static int +GrnRCToPgErrorCode(grn_rc rc) +{ + int errorCode = ERRCODE_SYSTEM_ERROR; + + /* TODO: Fill me. */ + switch (rc) + { + case GRN_NO_SUCH_FILE_OR_DIRECTORY: + errorCode = ERRCODE_IO_ERROR; + break; + case GRN_INPUT_OUTPUT_ERROR: + errorCode = ERRCODE_IO_ERROR; + break; + case GRN_INVALID_ARGUMENT: + errorCode = ERRCODE_INVALID_PARAMETER_VALUE; + break; + default: + break; + } + + return errorCode; +} + +static grn_bool +GrnCheck(const char *message) +{ + if (ctx->rc == GRN_SUCCESS) + return GRN_TRUE; + + ereport(ERROR, + (errcode(GrnRCToPgErrorCode(ctx->rc)), + errmsg("groonga: %s: %s", message, ctx->errbuf))); + return GRN_FALSE; +} + +/* + * Support functions and type-specific routines + */ + +static grn_builtin_type +GrnGetType(Relation index, AttrNumber n) +{ + FmgrInfo *function; + TupleDesc desc = RelationGetDescr(index); + Datum type; + + function = index_getprocinfo(index, n + 1, GrnTypeOfProc); + type = FunctionCall2(function, + ObjectIdGetDatum(desc->attrs[n]->atttypid), + Int32GetDatum(desc->attrs[n]->atttypmod)); + return (grn_builtin_type) DatumGetInt32(type); +} + +static void +GrnSetValue(Relation index, AttrNumber n, grn_obj *buffer, Datum value) +{ + FmgrInfo *function; + + function = index_getprocinfo(index, n + 1, GrnSetValueProc); + FunctionCall3(function, + PointerGetDatum(ctx), PointerGetDatum(buffer), + value); +} + +static void +GrnGetValue(Relation index, AttrNumber n, Datum value, grn_obj *buffer) +{ + FmgrInfo *function; + + function = index_getprocinfo(index, n + 1, GrnGetValueProc); + FunctionCall3(function, + value, + PointerGetDatum(ctx), PointerGetDatum(buffer)); +} + +static grn_obj * +GrnLookup(const char *name, int errorLevel) +{ + grn_obj *object = grn_ctx_get(ctx, name, strlen(name)); + if (!object) + ereport(errorLevel, + (errcode(ERRCODE_INVALID_NAME), + errmsg("groonga: object isn't found: <%s>", name))); + return object; +} + +static grn_obj * +GrnLookupIDsTable(Relation index, int errorLevel) +{ + char name[GRN_TABLE_MAX_KEY_SIZE]; + + snprintf(name, sizeof(name), GrnIDsTableNameFormat, index->rd_node.relNode); + return GrnLookup(name, errorLevel); +} + +static grn_obj * +GrnLookupIndexColumn(Relation index, int errorLevel) +{ + char name[GRN_TABLE_MAX_KEY_SIZE]; + + snprintf(name, sizeof(name), + GrnLexiconNameFormat ".%s", + index->rd_node.relNode, GrnIndexColumnName); + return GrnLookup(name, errorLevel); +} + +static grn_obj * +GrnCreateTable(const char *name, + grn_obj_flags flags, + grn_obj *type) +{ + grn_obj *table; + + table = grn_table_create(ctx, + name, strlen(name), NULL, + GRN_OBJ_PERSISTENT | flags, + type, + NULL); + GrnCheck("groonga: failed to create table"); + + return table; +} + +static grn_obj * +GrnCreateColumn(grn_obj *table, + const char *name, + grn_obj_flags flags, + grn_obj *type) +{ + grn_obj *column; + + column = grn_column_create(ctx, table, + name, strlen(name), NULL, + GRN_OBJ_PERSISTENT | flags, + type); + GrnCheck("groonga: failed to create column"); + + return column; +} + +/** + * GrnCreate + * + * @param ctx + * @param index + */ +static void +GrnCreate(Relation index, grn_obj **idsTable, + grn_obj **lexicon, grn_obj **indexColumn) +{ + char idsTableName[GRN_TABLE_MAX_KEY_SIZE]; + char lexiconName[GRN_TABLE_MAX_KEY_SIZE]; + grn_id typeID = GRN_ID_NIL; + int i; + TupleDesc desc; + Oid relNode = index->rd_node.relNode; + + desc = RelationGetDescr(index); + + snprintf(idsTableName, sizeof(idsTableName), + GrnIDsTableNameFormat, relNode); + *idsTable = GrnCreateTable(idsTableName, + GRN_OBJ_TABLE_PAT_KEY, + grn_ctx_at(ctx, GRN_DB_UINT64)); + + for (i = 0; i < desc->natts; i++) + { + grn_id attributeTypeID; + + attributeTypeID = GrnGetType(index, i); + if (typeID == GRN_ID_NIL) + typeID = attributeTypeID; + + if (attributeTypeID != typeID) + { + /* TODO: Show details */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("groonga: must be the same type columns " + "for multiple column index"))); + } + } + + switch (typeID) + { + case GRN_DB_TEXT: + case GRN_DB_LONG_TEXT: + typeID = GRN_DB_SHORT_TEXT; + break; + } + + snprintf(lexiconName, sizeof(lexiconName), GrnLexiconNameFormat, relNode); + *lexicon = GrnCreateTable(lexiconName, + GRN_OBJ_TABLE_PAT_KEY, + grn_ctx_at(ctx, typeID)); + if (typeID == GRN_DB_SHORT_TEXT) + { + grn_obj_set_info(ctx, *lexicon, GRN_INFO_NORMALIZER, + GrnLookup("NormalizerAuto", WARNING)); + grn_obj_set_info(ctx, *lexicon, GRN_INFO_DEFAULT_TOKENIZER, + grn_ctx_at(ctx, GRN_DB_BIGRAM)); + } + + { + grn_obj_flags flags = GRN_OBJ_COLUMN_INDEX; + if (typeID == GRN_DB_SHORT_TEXT) + flags |= GRN_OBJ_WITH_POSITION; + if (desc->natts > 1) + flags |= GRN_OBJ_WITH_SECTION; + *indexColumn = GrnCreateColumn(*lexicon, + GrnIndexColumnName, + flags, + *idsTable); + } +} + +static grn_id +CtidToUInt64(ItemPointer ctid) +{ + BlockNumber blockNumber; + OffsetNumber offsetNumber; + + blockNumber = ItemPointerGetBlockNumber(ctid); + offsetNumber = ItemPointerGetOffsetNumber(ctid); + return (blockNumber << 16 | offsetNumber); +} + +static ItemPointerData +UInt64ToCtid(uint64 key) +{ + ItemPointerData ctid; + ItemPointerSet(&ctid, (key >> 16) & 0xFFFFFFFF, key & 0xFFFF); + return ctid; +} + +static void +GrnLock(Relation index, LOCKMODE mode) +{ + const RelFileNode *rnode = &index->rd_node; + LockDatabaseObject(rnode->spcNode, + rnode->dbNode, + rnode->relNode, + mode); +} + +static void +GrnUnlock(Relation index, LOCKMODE mode) +{ + const RelFileNode *rnode = &index->rd_node; + UnlockDatabaseObject(rnode->spcNode, + rnode->dbNode, + rnode->relNode, + mode); +} + +/** + * pgroonga.contains(doc text, key text) : bool + */ +Datum +pgroonga_contains_text(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errmsg("groonga: 'text %%%% text' requires index."))); + PG_RETURN_BOOL(false); +} + +/** + * pgroonga.contains(doc bpchar, key bpchar) : bool + */ +Datum +pgroonga_contains_bpchar(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errmsg("groonga: 'bpchar %%%% bpchar' requires index."))); + PG_RETURN_BOOL(false); +} + +static void +GrnInsert(grn_ctx *ctx, + Relation index, + grn_obj *idsTable, + grn_obj *indexColumn, + Datum *values, + bool *isnull, + ItemPointer ht_ctid) +{ + TupleDesc desc = RelationGetDescr(index); + uint64 key = CtidToUInt64(ht_ctid); + grn_id id; + int i; + + id = grn_table_add(ctx, idsTable, &key, sizeof(uint64), NULL); + + for (i = 0; i < desc->natts; i++) + { + unsigned int sectionID = i + 1; + + if (isnull[i]) + continue; + + index_getprocinfo(index, i, GrnGetValueProc); + grn_obj_reinit(ctx, &buffer, GrnGetType(index, i), 0); + GrnSetValue(index, i, &buffer, values[i]); + grn_column_index_update(ctx, indexColumn, id, sectionID, NULL, &buffer); + if (!GrnCheck("groonga: failed to update index")) { + continue; + } + } +} + +/** + * pgroonga.insert() -- aminsert + */ +Datum +pgroonga_insert(PG_FUNCTION_ARGS) +{ + Relation index = (Relation) PG_GETARG_POINTER(0); + Datum *values = (Datum *) PG_GETARG_POINTER(1); + bool *isnull = (bool *) PG_GETARG_POINTER(2); + ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); +#ifdef NOT_USED + Relation heap = (Relation) PG_GETARG_POINTER(4); + IndexUniqueCheck checkUnique = PG_GETARG_INT32(5); +#endif + grn_obj *idsTable = GrnLookupIDsTable(index, ERROR); + grn_obj *indexColumn = GrnLookupIndexColumn(index, ERROR); + + GrnLock(index, ExclusiveLock); + GrnInsert(ctx, index, idsTable, indexColumn, values, isnull, ht_ctid); + GrnUnlock(index, ExclusiveLock); + + PG_RETURN_BOOL(true); +} + +static void +GrnScanOpaqueInit(GrnScanOpaque so, Relation index) +{ + so->idsTable = GrnLookupIDsTable(index, ERROR); + so->searched = NULL; + so->sorted = NULL; + so->targetTable = NULL; + so->cursor = NULL; + so->keyAccessor = NULL; + so->currentID = GRN_ID_NIL; +} + +static void +GrnScanOpaqueReinit(GrnScanOpaque so) +{ + so->currentID = GRN_ID_NIL; + if (so->keyAccessor) + { + grn_obj_unlink(ctx, so->keyAccessor); + so->keyAccessor = NULL; + } + if (so->cursor) + { + grn_table_cursor_close(ctx, so->cursor); + so->cursor = NULL; + } + if (so->sorted) + { + grn_obj_unlink(ctx, so->sorted); + so->sorted = NULL; + } + if (so->searched) + { + grn_obj_unlink(ctx, so->searched); + so->searched = NULL; + } +} + +/** + * pgroonga.beginscan() -- ambeginscan + */ +Datum +pgroonga_beginscan(PG_FUNCTION_ARGS) +{ + Relation index = (Relation) PG_GETARG_POINTER(0); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); + IndexScanDesc scan; + GrnScanOpaque so; + + scan = RelationGetIndexScan(index, nkeys, norderbys); + + so = (GrnScanOpaque) palloc(sizeof(GrnScanOpaqueData)); + GrnScanOpaqueInit(so, index); + + scan->opaque = so; + + PG_RETURN_POINTER(scan); +} + +static void +GrnSearch(IndexScanDesc scan) +{ + Relation index = scan->indexRelation; + GrnScanOpaque so = (GrnScanOpaque) scan->opaque; + grn_obj *indexColumn; + grn_obj *matchColumns, *matchColumnsVariable; + grn_obj *expression, *expressionVariable; + int i, nExpressions = 0; + + if (scan->numberOfKeys == 0) + return; + + GRN_EXPR_CREATE_FOR_QUERY(ctx, so->idsTable, + matchColumns, matchColumnsVariable); + indexColumn = GrnLookupIndexColumn(index, ERROR); + grn_expr_append_obj(ctx, matchColumns, indexColumn, GRN_OP_PUSH, 1); + + GRN_EXPR_CREATE_FOR_QUERY(ctx, so->idsTable, + expression, expressionVariable); + + for (i = 0; i < scan->numberOfKeys; i++) + { + ScanKey key = &(scan->keyData[i]); + grn_bool isValidStrategy = GRN_TRUE; + + /* NULL key is not supported */ + if (key->sk_flags & SK_ISNULL) + continue; + + grn_obj_reinit(ctx, &buffer, GrnGetType(index, key->sk_attno - 1), 0); + GrnGetValue(index, key->sk_attno - 1, key->sk_argument, &buffer); + + grn_expr_append_obj(ctx, expression, matchColumns, GRN_OP_PUSH, 1); + grn_expr_append_obj(ctx, expression, &buffer, GRN_OP_PUSH, 1); + + switch (key->sk_strategy) + { + case GrnLessStrategyNumber: + grn_expr_append_op(ctx, expression, GRN_OP_LESS, 2); + break; + case GrnLessEqualStrategyNumber: + grn_expr_append_op(ctx, expression, GRN_OP_LESS_EQUAL, 2); + break; + case GrnEqualStrategyNumber: + grn_expr_append_op(ctx, expression, GRN_OP_EQUAL, 2); + break; + case GrnGreaterEqualStrategyNumber: + grn_expr_append_op(ctx, expression, GRN_OP_GREATER_EQUAL, 2); + break; + case GrnGreaterStrategyNumber: + grn_expr_append_op(ctx, expression, GRN_OP_GREATER, 2); + break; + case GrnNotEqualStrategyNumber: + grn_expr_append_op(ctx, expression, GRN_OP_NOT_EQUAL, 2); + break; + case GrnContainStrategyNumber: + grn_expr_append_op(ctx, expression, GRN_OP_MATCH, 2); + break; + default: + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unexpected strategy number: %d", key->sk_strategy))); + isValidStrategy = GRN_FALSE; + break; + } + + if (!isValidStrategy) + continue; + + if (nExpressions > 0) + grn_expr_append_op(ctx, expression, GRN_OP_AND, 2); + nExpressions++; + } + + so->searched = grn_table_create(ctx, NULL, 0, NULL, + GRN_OBJ_TABLE_HASH_KEY | GRN_OBJ_WITH_SUBREC, + so->idsTable, 0); + grn_table_select(ctx, so->idsTable, expression, so->searched, GRN_OP_OR); + grn_obj_unlink(ctx, expression); + grn_obj_unlink(ctx, matchColumns); +} + +static void +GrnSort(IndexScanDesc scan) +{ + /* TODO */ +} + +static void +GrnOpenCursor(IndexScanDesc scan, ScanDirection dir) +{ + GrnScanOpaque so = (GrnScanOpaque) scan->opaque; + grn_obj *table; + int offset = 0; + int limit = -1; + int flags = 0; + + table = so->sorted; + if (!table) + table = so->searched; + if (!table) + table = so->idsTable; + + if (dir == BackwardScanDirection) + flags |= GRN_CURSOR_DESCENDING; + else + flags |= GRN_CURSOR_ASCENDING; + + so->cursor = grn_table_cursor_open(ctx, table, + NULL, 0, NULL, 0, + offset, limit, flags); + so->keyAccessor = grn_obj_column(ctx, table, + GRN_COLUMN_NAME_KEY, + GRN_COLUMN_NAME_KEY_LEN); +} + +static void +GrnEnsureCursorOpened(IndexScanDesc scan, ScanDirection dir) +{ + GrnScanOpaque so = (GrnScanOpaque) scan->opaque; + + if (so->cursor) + return; + + GrnSearch(scan); + GrnSort(scan); + GrnOpenCursor(scan, dir); +} + + +/** + * pgroonga.gettuple() -- amgettuple + */ +Datum +pgroonga_gettuple(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); + GrnScanOpaque so = (GrnScanOpaque) scan->opaque; + + scan->xs_recheck = false; + + GrnEnsureCursorOpened(scan, dir); + + if (scan->kill_prior_tuple && so->currentID != GRN_ID_NIL) + { + grn_obj key; + GRN_UINT64_INIT(&key, 0); + GrnLock(scan->indexRelation, ExclusiveLock); + grn_obj_get_value(ctx, so->keyAccessor, so->currentID, &key); + grn_table_delete(ctx, so->idsTable, + GRN_BULK_HEAD(&key), GRN_BULK_VSIZE(&key)); + GrnUnlock(scan->indexRelation, ExclusiveLock); + GRN_OBJ_FIN(ctx, &key); + } + + so->currentID = grn_table_cursor_next(ctx, so->cursor); + if (so->currentID == GRN_ID_NIL) + { + PG_RETURN_BOOL(false); + } + else + { + grn_obj key; + + GRN_UINT64_INIT(&key, 0); + grn_obj_get_value(ctx, so->keyAccessor, so->currentID, &key); + scan->xs_ctup.t_self = UInt64ToCtid(GRN_UINT64_VALUE(&key)); + GRN_OBJ_FIN(ctx, &key); + + PG_RETURN_BOOL(true); + } + +} + +/** + * pgroonga.getbitmap() -- amgetbitmap + */ +Datum +pgroonga_getbitmap(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1); + GrnScanOpaque so = (GrnScanOpaque) scan->opaque; + int64 nRecords = 0; + grn_id id; + grn_obj key; + + GrnEnsureCursorOpened(scan, ForwardScanDirection); + + GRN_UINT64_INIT(&key, 0); + while ((id = grn_table_cursor_next(ctx, so->cursor)) != GRN_ID_NIL) { + ItemPointerData ctid; + GRN_BULK_REWIND(&key); + grn_obj_get_value(ctx, so->keyAccessor, id, &key); + ctid = UInt64ToCtid(GRN_UINT64_VALUE(&key)); + tbm_add_tuples(tbm, &ctid, 1, false); + nRecords++; + } + GRN_OBJ_FIN(ctx, &key); + + PG_RETURN_INT64(nRecords); +} + +/** + * pgroonga.rescan() -- amrescan + * + * この段階ではスキャンキーがまだ与えられていない場合がある。 + * まだ検索を行わなず、後から gettuple または getbitmap で検索する。 + */ +Datum +pgroonga_rescan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + ScanKey keys = (ScanKey) PG_GETARG_POINTER(1); +#ifdef NOT_USED + int nkeys = PG_GETARG_INT32(2); + ScanKey orderbys = (ScanKey) PG_GETARG_POINTER(3); + int norderbys = PG_GETARG_INT32(4); +#endif + GrnScanOpaque so = (GrnScanOpaque) scan->opaque; + + GrnScanOpaqueReinit(so); + + if (keys && scan->numberOfKeys > 0) + memmove(scan->keyData, keys, scan->numberOfKeys * sizeof(ScanKeyData)); + + PG_RETURN_VOID(); +} + +/** + * pgroonga.endscan() -- amendscan + */ +Datum +pgroonga_endscan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + GrnScanOpaque so = (GrnScanOpaque) scan->opaque; + + GrnScanOpaqueReinit(so); + pfree(so); + + PG_RETURN_VOID(); +} + +static void +GrnBuildCallback(Relation index, + HeapTuple htup, + Datum *values, + bool *isnull, + bool tupleIsAlive, + void *state) +{ + GrnBuildState bs = (GrnBuildState) state; + TupleDesc desc = RelationGetDescr(index); + uint64 key = CtidToUInt64(&htup->t_self); + grn_id id; + int i; + + id = grn_table_add(ctx, bs->idsTable, &key, sizeof(uint64), NULL); + for (i = 0; i < desc->natts; i++) + { + unsigned int sectionID = i + 1; + + if (isnull[i]) + continue; + + index_getprocinfo(index, i, GrnGetValueProc); + grn_obj_reinit(ctx, &buffer, GrnGetType(index, i), 0); + GrnSetValue(index, i, &buffer, values[i]); + grn_ii_buffer_append(ctx, bs->buffer, id, sectionID, &buffer); + if (!GrnCheck("groonga: failed to append data to index")) { + continue; + } + } + + bs->nIndexedTuples++; +} + +/** + * pgroonga.build() -- ambuild + */ +Datum +pgroonga_build(PG_FUNCTION_ARGS) +{ + Relation heap = (Relation) PG_GETARG_POINTER(0); + Relation index = (Relation) PG_GETARG_POINTER(1); + IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); + IndexBuildResult *result; + double nHeapTuples = 0.0; + GrnBuildStateData bs; + grn_obj *lexicon = NULL; + grn_obj *indexColumn = NULL; + + if (indexInfo->ii_Unique) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("groonga: unique index isn't supported"))); + + bs.idsTable = NULL; + bs.buffer = NULL; + bs.nIndexedTuples = 0.0; + + PG_TRY(); + { + GrnCreate(index, &(bs.idsTable), &lexicon, &indexColumn); + { + unsigned long long int updateBufferSize = 10; + bs.buffer = grn_ii_buffer_open(ctx, (grn_ii *)indexColumn, + updateBufferSize); + } + nHeapTuples = IndexBuildHeapScan(heap, index, indexInfo, true, + GrnBuildCallback, &bs); + grn_ii_buffer_commit(ctx, bs.buffer); + grn_ii_buffer_close(ctx, bs.buffer); + } + PG_CATCH(); + { + if (bs.buffer) + grn_ii_buffer_close(ctx, bs.buffer); + if (indexColumn) + grn_obj_remove(ctx, indexColumn); + if (lexicon) + grn_obj_remove(ctx, lexicon); + if (bs.idsTable) + grn_obj_remove(ctx, bs.idsTable); + PG_RE_THROW(); + } + PG_END_TRY(); + + result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); + result->heap_tuples = nHeapTuples; + result->index_tuples = bs.nIndexedTuples; + + PG_RETURN_POINTER(result); +} + +static IndexBulkDeleteResult * +GrnBulkDeleteResult(IndexVacuumInfo *info, grn_obj *idsTable) +{ + IndexBulkDeleteResult *stats; + + stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + stats->num_pages = (BlockNumber) 1; /* TODO: sizeof index / BLCKSZ */ + + /* table might be NULL if index is corrupted */ + if (idsTable) + stats->num_index_tuples = grn_table_size(ctx, idsTable); + else + stats->num_index_tuples = 0; + + return stats; +} + +/** + * pgroonga.bulkdelete() -- ambulkdelete + */ +Datum +pgroonga_bulkdelete(PG_FUNCTION_ARGS) +{ + IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); + IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); + IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2); + void *callback_state = (void *) PG_GETARG_POINTER(3); + Relation index = info->index; + grn_obj *idsTable; + grn_table_cursor *cursor; + double nRemovedTuples; + + idsTable = GrnLookupIDsTable(index, WARNING); + + if (!stats) + stats = GrnBulkDeleteResult(info, idsTable); + + if (!idsTable || !callback) + PG_RETURN_POINTER(stats); + + nRemovedTuples = 0; + + cursor = grn_table_cursor_open(ctx, idsTable, NULL, 0, NULL, 0, 0, -1, 0); + if (!cursor) + ereport(ERROR, + (errcode(ERRCODE_SYSTEM_ERROR), + errmsg("groonga: failed to open cursor: %s", ctx->errbuf))); + + PG_TRY(); + { + while (grn_table_cursor_next(ctx, cursor) != GRN_ID_NIL) + { + ItemPointerData ctid; + uint64 key; + + CHECK_FOR_INTERRUPTS(); + + grn_table_cursor_get_key(ctx, cursor, (void **)&key); + ctid = UInt64ToCtid(key); + if (callback(&ctid, callback_state)) + { + GrnLock(index, ExclusiveLock); + grn_table_cursor_delete(ctx, cursor); + GrnUnlock(index, ExclusiveLock); + + nRemovedTuples += 1; + } + } + grn_table_cursor_close(ctx, cursor); + } + PG_CATCH(); + { + grn_table_cursor_close(ctx, cursor); + PG_RE_THROW(); + } + PG_END_TRY(); + + stats->tuples_removed = nRemovedTuples; + + PG_RETURN_POINTER(stats); +} + +/** + * pgroonga.vacuumcleanup() -- amvacuumcleanup + */ +Datum +pgroonga_vacuumcleanup(PG_FUNCTION_ARGS) +{ + IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); + IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); + + if (!stats) + stats = GrnBulkDeleteResult(info, + GrnLookupIDsTable(info->index, WARNING)); + + PG_RETURN_POINTER(stats); +} + +/** + * pgroonga.costestimate() -- amcostestimate + */ +Datum +pgroonga_costestimate(PG_FUNCTION_ARGS) +{ + /* + * We cannot use genericcostestimate because it is a static funciton. + * Use gistcostestimate instead, which just calls genericcostestimate. + */ + return gistcostestimate(fcinfo); +} + +/** + * pgroonga.options() -- amoptions + */ +Datum +pgroonga_options(PG_FUNCTION_ARGS) +{ + return (Datum) 0; +} Added: pgroonga.control (+3 -0) 100644 =================================================================== --- /dev/null +++ pgroonga.control 2015-01-12 23:39:34 +0900 (0c7640e) @@ -0,0 +1,3 @@ +default_version = '0.2.0' +comment = 'CJK-ready fast full-text search index based on Groonga' +module_pathname = '$libdir/pgroonga' Added: pgroonga.h (+79 -0) 100644 =================================================================== --- /dev/null +++ pgroonga.h 2015-01-12 23:39:34 +0900 (0f458e7) @@ -0,0 +1,79 @@ +/* + * IDENTIFICATION + * pgroonga.h + */ + +#ifndef PGROONGA_H +#define PGROONGA_H + +#include <postgres.h> +#include <fmgr.h> + +#ifndef PGDLLEXPORT +# define PGDLLEXPORT +#endif + +/* Groonga strategy types */ +#define GrnLessStrategyNumber 1 /* operator < */ +#define GrnLessEqualStrategyNumber 2 /* operator <= */ +#define GrnEqualStrategyNumber 3 /* operator = */ +#define GrnGreaterEqualStrategyNumber 4 /* operator >= */ +#define GrnGreaterStrategyNumber 5 /* operator > */ +#define GrnNotEqualStrategyNumber 6 /* operator <> (! in Groonga) */ +#define GrnContainStrategyNumber 7 /* operator %% (@ in Groonga) */ +#define GrnQueryStrategyNumber 8 /* operator @@ (Groonga query) */ + +/* Groonga support functions */ +#define GrnTypeOfProc 1 +#define GrnGetValueProc 2 +#define GrnSetValueProc 3 + +/* file and table names */ +#define GrnDatabaseBasename "grn" +#define GrnIDsTableNameFormat "IDs%u" +#define GrnLexiconNameFormat "Lexicon%u" +#define GrnIndexColumnName "index" + +/* in pgroonga.c */ +extern void PGDLLEXPORT _PG_init(void); + +extern Datum PGDLLEXPORT pgroonga_contains_text(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_contains_bpchar(PG_FUNCTION_ARGS); + +extern Datum PGDLLEXPORT pgroonga_insert(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_beginscan(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_gettuple(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_getbitmap(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_rescan(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_endscan(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_build(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_bulkdelete(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_vacuumcleanup(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_costestimate(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_options(PG_FUNCTION_ARGS); + +/* in groonga_types.c */ +extern Datum PGDLLEXPORT pgroonga_typeof(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_get_text(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_get_bpchar(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_get_bool(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_get_int2(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_get_int4(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_get_int8(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_get_float4(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_get_float8(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_get_timestamp(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_get_timestamptz(PG_FUNCTION_ARGS); + +extern Datum PGDLLEXPORT pgroonga_set_text(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_set_bpchar(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_set_bool(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_set_int2(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_set_int4(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_set_int8(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_set_float4(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_set_float8(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_set_timestamp(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_set_timestamptz(PG_FUNCTION_ARGS); + +#endif /* PPGROONGA_H */ Added: pgroonga_types.c (+323 -0) 100644 =================================================================== --- /dev/null +++ pgroonga_types.c 2015-01-12 23:39:34 +0900 (045d318) @@ -0,0 +1,323 @@ +/* + * IDENTIFICATION + * gproonga_types.c + */ + +#include "pgroonga.h" + +#include <catalog/pg_type.h> + +#include <groonga.h> + +static int +bpchar_size(const BpChar *arg) +{ + char *s = VARDATA_ANY(arg); + int i; + int len; + + len = VARSIZE_ANY_EXHDR(arg); + for (i = len - 1; i >= 0; i--) + { + if (s[i] != ' ') + break; + } + return i + 1; +} + +PG_FUNCTION_INFO_V1(pgroonga_typeof); +PG_FUNCTION_INFO_V1(pgroonga_get_text); +PG_FUNCTION_INFO_V1(pgroonga_get_bpchar); +PG_FUNCTION_INFO_V1(pgroonga_get_bool); +PG_FUNCTION_INFO_V1(pgroonga_get_int2); +PG_FUNCTION_INFO_V1(pgroonga_get_int4); +PG_FUNCTION_INFO_V1(pgroonga_get_int8); +PG_FUNCTION_INFO_V1(pgroonga_get_float4); +PG_FUNCTION_INFO_V1(pgroonga_get_float8); +PG_FUNCTION_INFO_V1(pgroonga_get_timestamp); +PG_FUNCTION_INFO_V1(pgroonga_get_timestamptz); +PG_FUNCTION_INFO_V1(pgroonga_set_text); +PG_FUNCTION_INFO_V1(pgroonga_set_bpchar); +PG_FUNCTION_INFO_V1(pgroonga_set_bool); +PG_FUNCTION_INFO_V1(pgroonga_set_int2); +PG_FUNCTION_INFO_V1(pgroonga_set_int4); +PG_FUNCTION_INFO_V1(pgroonga_set_int8); +PG_FUNCTION_INFO_V1(pgroonga_set_float4); +PG_FUNCTION_INFO_V1(pgroonga_set_float8); +PG_FUNCTION_INFO_V1(pgroonga_set_timestamp); +PG_FUNCTION_INFO_V1(pgroonga_set_timestamptz); + +/** + * pgroonga_typeof -- map a postgres' built-in type to a Groonga's type + * + * Raises ERROR if no corresponding types found. + */ +Datum +pgroonga_typeof(PG_FUNCTION_ARGS) +{ + Oid typid = PG_GETARG_OID(0); + int typmod = PG_GETARG_INT32(1); + int32 maxlen; + + /* TODO: support array and record types. */ + switch (typid) + { + case BOOLOID: + return GRN_DB_BOOL; + case INT2OID: + return GRN_DB_INT16; + case INT4OID: + return GRN_DB_INT32; + case INT8OID: + return GRN_DB_INT64; + case FLOAT4OID: + case FLOAT8OID: + return GRN_DB_FLOAT; + case TIMESTAMPOID: + case TIMESTAMPTZOID: +#ifdef HAVE_INT64_TIMESTAMP + return GRN_DB_INT64; /* FIXME: use GRN_DB_TIME instead */ +#else + return GRN_DB_FLOAT; +#endif + case TEXTOID: + case XMLOID: + return GRN_DB_LONG_TEXT; + case BPCHAROID: + case VARCHAROID: + maxlen = type_maximum_size(typid, typmod); + if (maxlen >= 0) + { + if (maxlen < 4096) + return GRN_DB_SHORT_TEXT; /* 4KB */ + if (maxlen < 64 * 1024) + return GRN_DB_TEXT; /* 64KB */ + } + return GRN_DB_LONG_TEXT; +#ifdef NOT_USED + case POINTOID: + return GRN_DB_TOKYO_GEO_POINT or GRN_DB_WGS84_GEO_POINT; +#endif + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Groonga: unsupported type: %u", typid))); + return GRN_DB_VOID; /* keep compiler quiet */ + } +} + +Datum +pgroonga_get_text(PG_FUNCTION_ARGS) +{ + text *var = PG_GETARG_TEXT_PP(0); + grn_ctx *ctx = (grn_ctx *)PG_GETARG_POINTER(1); + grn_obj *buffer = (grn_obj *)PG_GETARG_POINTER(2); + + GRN_TEXT_PUT(ctx, buffer, VARDATA_ANY(var), VARSIZE_ANY_EXHDR(var)); + + PG_RETURN_VOID(); +} + +Datum +pgroonga_get_bpchar(PG_FUNCTION_ARGS) +{ + BpChar *var = PG_GETARG_BPCHAR_PP(0); + grn_ctx *ctx = (grn_ctx *)PG_GETARG_POINTER(1); + grn_obj *buffer = (grn_obj *)PG_GETARG_POINTER(2); + + GRN_TEXT_PUT(ctx, buffer, VARDATA_ANY(var), bpchar_size(var)); + + PG_RETURN_VOID(); +} + +Datum +pgroonga_get_bool(PG_FUNCTION_ARGS) +{ + bool var = PG_GETARG_BOOL(0); + int *len = (int *) PG_GETARG_POINTER(1); + const char *ret = (var ? "true" : "false"); + + *len = strlen(ret); + + PG_RETURN_POINTER(ret); +} + +Datum +pgroonga_get_int2(PG_FUNCTION_ARGS) +{ + int16 var = PG_GETARG_INT16(0); + int *len = (int *) PG_GETARG_POINTER(1); + char *ret = (char *) palloc(8); + + *len = snprintf(ret, 8, "%d", var); + + PG_RETURN_POINTER(ret); +} + +Datum +pgroonga_get_int4(PG_FUNCTION_ARGS) +{ + int32 var = PG_GETARG_INT32(0); + int *len = (int *) PG_GETARG_POINTER(1); + char *ret = (char *) palloc(12); + + *len = snprintf(ret, 12, "%d", var); + + PG_RETURN_POINTER(ret); +} + +Datum +pgroonga_get_int8(PG_FUNCTION_ARGS) +{ + int64 var = PG_GETARG_INT64(0); + int *len = (int *) PG_GETARG_POINTER(1); + char *ret = (char *) palloc(24); + + *len = snprintf(ret, 24, INT64_FORMAT, var); + + PG_RETURN_POINTER(ret); +} + +Datum +pgroonga_get_float4(PG_FUNCTION_ARGS) +{ + float4 var = PG_GETARG_FLOAT4(0); + int *len = (int *) PG_GETARG_POINTER(1); + char *ret = (char *) palloc(32); + + *len = snprintf(ret, 32, "%f", var); + + PG_RETURN_POINTER(ret); +} + +Datum +pgroonga_get_float8(PG_FUNCTION_ARGS) +{ + float8 var = PG_GETARG_FLOAT8(0); + int *len = (int *) PG_GETARG_POINTER(1); + char *ret = (char *) palloc(32); + + *len = snprintf(ret, 32, "%f", var); + + PG_RETURN_POINTER(ret); +} + +Datum +pgroonga_get_timestamp(PG_FUNCTION_ARGS) +{ +#ifdef HAVE_INT64_TIMESTAMP + return pgroonga_get_int8(fcinfo); +#else + return pgroonga_get_float8(fcinfo); +#endif +} + +Datum +pgroonga_get_timestamptz(PG_FUNCTION_ARGS) +{ + return pgroonga_get_timestamp(fcinfo); +} + +Datum +pgroonga_set_text(PG_FUNCTION_ARGS) +{ + grn_ctx *ctx = (grn_ctx *) PG_GETARG_POINTER(0); + grn_obj *obj = (grn_obj *) PG_GETARG_POINTER(1); + text *var = PG_GETARG_TEXT_PP(2); + + GRN_TEXT_SET(ctx, obj, VARDATA_ANY(var), VARSIZE_ANY_EXHDR(var)); + PG_RETURN_VOID(); +} + +Datum +pgroonga_set_bpchar(PG_FUNCTION_ARGS) +{ + grn_ctx *ctx = (grn_ctx *) PG_GETARG_POINTER(0); + grn_obj *obj = (grn_obj *) PG_GETARG_POINTER(1); + BpChar *var = PG_GETARG_BPCHAR_PP(2); + + GRN_TEXT_SET(ctx, obj, VARDATA_ANY(var), bpchar_size(var)); + PG_RETURN_VOID(); +} + +Datum +pgroonga_set_bool(PG_FUNCTION_ARGS) +{ + grn_ctx *ctx = (grn_ctx *) PG_GETARG_POINTER(0); + grn_obj *obj = (grn_obj *) PG_GETARG_POINTER(1); + bool var = PG_GETARG_BOOL(2); + + GRN_BOOL_SET(ctx, obj, var); + PG_RETURN_VOID(); +} + +Datum +pgroonga_set_int2(PG_FUNCTION_ARGS) +{ + grn_ctx *ctx = (grn_ctx *) PG_GETARG_POINTER(0); + grn_obj *obj = (grn_obj *) PG_GETARG_POINTER(1); + int16 var = PG_GETARG_INT16(2); + + GRN_INT16_SET(ctx, obj, var); + PG_RETURN_VOID(); +} + +Datum +pgroonga_set_int4(PG_FUNCTION_ARGS) +{ + grn_ctx *ctx = (grn_ctx *) PG_GETARG_POINTER(0); + grn_obj *obj = (grn_obj *) PG_GETARG_POINTER(1); + int32 var = PG_GETARG_INT32(2); + + GRN_INT32_SET(ctx, obj, var); + PG_RETURN_VOID(); +} + +Datum +pgroonga_set_int8(PG_FUNCTION_ARGS) +{ + grn_ctx *ctx = (grn_ctx *) PG_GETARG_POINTER(0); + grn_obj *obj = (grn_obj *) PG_GETARG_POINTER(1); + int64 var = PG_GETARG_INT64(2); + + GRN_INT64_SET(ctx, obj, var); + PG_RETURN_VOID(); +} + +Datum +pgroonga_set_float4(PG_FUNCTION_ARGS) +{ + grn_ctx *ctx = (grn_ctx *) PG_GETARG_POINTER(0); + grn_obj *obj = (grn_obj *) PG_GETARG_POINTER(1); + float8 var = (float8) PG_GETARG_FLOAT4(2); + + GRN_FLOAT_SET(ctx, obj, var); + PG_RETURN_VOID(); +} + +Datum +pgroonga_set_float8(PG_FUNCTION_ARGS) +{ + grn_ctx *ctx = (grn_ctx *) PG_GETARG_POINTER(0); + grn_obj *obj = (grn_obj *) PG_GETARG_POINTER(1); + float8 var = PG_GETARG_FLOAT8(2); + + GRN_FLOAT_SET(ctx, obj, var); + PG_RETURN_VOID(); +} + +Datum +pgroonga_set_timestamp(PG_FUNCTION_ARGS) +{ +#ifdef HAVE_INT64_TIMESTAMP + return pgroonga_set_int8(fcinfo); +#else + return pgroonga_set_float8(fcinfo); +#endif +} + +Datum +pgroonga_set_timestamptz(PG_FUNCTION_ARGS) +{ + return pgroonga_set_timestamp(fcinfo); +} -------------- next part -------------- HTML����������������������������...Download