Kouhei Sutou
null+****@clear*****
Mon Jan 25 00:14:57 JST 2016
Kouhei Sutou 2016-01-25 00:14:57 +0900 (Mon, 25 Jan 2016) New Revision: f9974b9e5f3d3f445046eb464c876b700c2dc7a8 https://github.com/pgroonga/pgroonga/commit/f9974b9e5f3d3f445046eb464c876b700c2dc7a8 Message: jsonb: support sequential scan It requires Groonga 5.1.2 or later. (5.1.2 isn't released yet.) Added files: expected/jsonb/value/boolean/seqscan.out sql/jsonb/value/boolean/seqscan.sql Modified files: .travis.yml Makefile src/pgrn_jsonb.c Modified: .travis.yml (+2 -2) =================================================================== --- .travis.yml 2016-01-24 22:52:06 +0900 (452e4b7) +++ .travis.yml 2016-01-25 00:14:57 +0900 (f6eb206) @@ -7,8 +7,8 @@ compiler: addons: postgresql: "9.4" sudo: required -# env: -# - GROONGA_MASTER=yes +env: + - GROONGA_MASTER=yes install: - curl --silent --location https://github.com/groonga/groonga/raw/master/data/travis/setup.sh | sh - sudo apt-get update -qq Modified: Makefile (+1 -1) =================================================================== --- Makefile 2016-01-24 22:52:06 +0900 (d909937) +++ Makefile 2016-01-25 00:14:57 +0900 (2fea1cf) @@ -1,4 +1,4 @@ -REQUIRED_GROONGA_VERSION = 5.0.8 +REQUIRED_GROONGA_VERSION = 5.1.2 GROONGA_PKG = "groonga >= $(REQUIRED_GROONGA_VERSION)" MODULE_big = pgroonga Added: expected/jsonb/value/boolean/seqscan.out (+22 -0) 100644 =================================================================== --- /dev/null +++ expected/jsonb/value/boolean/seqscan.out 2016-01-25 00:14:57 +0900 (cbd6d8b) @@ -0,0 +1,22 @@ +CREATE TABLE fruits ( + id int, + items jsonb +); +INSERT INTO fruits VALUES (1, '{"apple": true}'); +INSERT INTO fruits VALUES (2, '{"banana": false}'); +INSERT INTO fruits VALUES (3, '{"peach": true}'); +CREATE INDEX pgroonga_index ON fruits USING pgroonga (items); +SET enable_seqscan = on; +SET enable_indexscan = off; +SET enable_bitmapscan = off; +SELECT id, items + FROM fruits + WHERE items @@ 'boolean == true' + ORDER BY id; + id | items +----+----------------- + 1 | {"apple": true} + 3 | {"peach": true} +(2 rows) + +DROP TABLE fruits; Added: sql/jsonb/value/boolean/seqscan.sql (+21 -0) 100644 =================================================================== --- /dev/null +++ sql/jsonb/value/boolean/seqscan.sql 2016-01-25 00:14:57 +0900 (0b665f7) @@ -0,0 +1,21 @@ +CREATE TABLE fruits ( + id int, + items jsonb +); + +INSERT INTO fruits VALUES (1, '{"apple": true}'); +INSERT INTO fruits VALUES (2, '{"banana": false}'); +INSERT INTO fruits VALUES (3, '{"peach": true}'); + +CREATE INDEX pgroonga_index ON fruits USING pgroonga (items); + +SET enable_seqscan = on; +SET enable_indexscan = off; +SET enable_bitmapscan = off; + +SELECT id, items + FROM fruits + WHERE items @@ 'boolean == true' + ORDER BY id; + +DROP TABLE fruits; Modified: src/pgrn_jsonb.c (+576 -525) =================================================================== --- src/pgrn_jsonb.c 2016-01-24 22:52:06 +0900 (6db7a5d) +++ src/pgrn_jsonb.c 2016-01-25 00:14:57 +0900 (faf4052) @@ -29,6 +29,26 @@ typedef struct grn_obj *valuesTable; } PGrnJSONBCreateData; +typedef struct PGrnJSONBInsertData +{ + grn_obj *pathsTable; + grn_obj *valuesTable; + grn_obj *pathColumn; + grn_obj *pathsColumn; + grn_obj *stringColumn; + grn_obj *numberColumn; + grn_obj *booleanColumn; + grn_obj *sizeColumn; + grn_obj *typeColumn; + grn_obj *valueIDs; + grn_obj key; + grn_obj components; + grn_obj path; + grn_obj pathIDs; + grn_obj value; + grn_obj type; +} PGrnJSONBInsertData; + static grn_ctx *ctx = &PGrnContext; static struct PGrnBuffers *buffers = &PGrnBuffers; @@ -41,6 +61,32 @@ static grn_obj *tmpTypesTable = NULL; static grn_obj *tmpValuesTable = NULL; static grn_obj * +PGrnLookupJSONPathsTable(Relation index, + unsigned int nthAttribute, + int errorLevel) +{ + char name[GRN_TABLE_MAX_KEY_SIZE]; + + snprintf(name, sizeof(name), + PGrnJSONPathsTableNameFormat, + index->rd_node.relNode, nthAttribute); + return PGrnLookup(name, errorLevel); +} + +static grn_obj * +PGrnLookupJSONValuesTable(Relation index, + unsigned int nthAttribute, + int errorLevel) +{ + char name[GRN_TABLE_MAX_KEY_SIZE]; + + snprintf(name, sizeof(name), + PGrnJSONValuesTableNameFormat, + index->rd_node.relNode, nthAttribute); + return PGrnLookup(name, errorLevel); +} + +static grn_obj * PGrnJSONBCreatePathsTable(const char *name) { return PGrnCreateTable(name, @@ -89,6 +135,10 @@ PGrnJSONBCreateDataColumns(PGrnJSONBCreateData *jsonbData) 0, grn_ctx_at(ctx, GRN_DB_FLOAT)); PGrnCreateColumn(jsonbData->valuesTable, + "boolean", + 0, + grn_ctx_at(ctx, GRN_DB_BOOL)); + PGrnCreateColumn(jsonbData->valuesTable, "size", 0, grn_ctx_at(ctx, GRN_DB_UINT32)); @@ -97,36 +147,7 @@ PGrnJSONBCreateDataColumns(PGrnJSONBCreateData *jsonbData) 0, jsonbData->typesTable); } -#endif - -void -PGrnInitializeJSONB(void) -{ -#ifdef JSONBOID - PGrnJSONBCreateData data; - - tmpPathsTable = PGrnJSONBCreatePathsTable(NULL); - tmpTypesTable = PGrnJSONBCreateTypesTable(NULL); - tmpValuesTable = PGrnJSONBCreateValuesTable(NULL); - - data.pathsTable = tmpPathsTable; - data.typesTable = tmpTypesTable; - data.valuesTable = tmpValuesTable; - PGrnJSONBCreateDataColumns(&data); -#endif -} - -void -PGrnFinalizeJSONB(void) -{ -#ifdef JSONBOID - grn_obj_remove(ctx, tmpValuesTable); - grn_obj_remove(ctx, tmpTypesTable); - grn_obj_remove(ctx, tmpPathsTable); -#endif -} -#ifdef JSONBOID static void PGrnJSONGeneratePath(grn_obj *components, unsigned int start, @@ -206,126 +227,438 @@ PGrnJSONIteratorTokenName(JsonbIteratorToken token) return names[token]; } -static grn_obj * -PGrnLookupJSONPathsTable(Relation index, - unsigned int nthAttribute, - int errorLevel) +static void +PGrnJSONBInsertDataInit(PGrnJSONBInsertData *data) { - char name[GRN_TABLE_MAX_KEY_SIZE]; + data->pathColumn = + PGrnLookupColumn(data->valuesTable, "path", ERROR); + data->pathsColumn = + PGrnLookupColumn(data->valuesTable, "paths", ERROR); + data->stringColumn = + PGrnLookupColumn(data->valuesTable, "string", ERROR); + data->numberColumn = + PGrnLookupColumn(data->valuesTable, "number", ERROR); + data->booleanColumn = + PGrnLookupColumn(data->valuesTable, "boolean", ERROR); + data->sizeColumn = + PGrnLookupColumn(data->valuesTable, "size", ERROR); + data->typeColumn = + PGrnLookupColumn(data->valuesTable, "type", ERROR); - snprintf(name, sizeof(name), - PGrnJSONPathsTableNameFormat, - index->rd_node.relNode, nthAttribute); - return PGrnLookup(name, errorLevel); + GRN_TEXT_INIT(&(data->key), 0); + GRN_TEXT_INIT(&(data->components), GRN_OBJ_VECTOR); + GRN_TEXT_INIT(&(data->path), 0); + GRN_RECORD_INIT(&(data->pathIDs), GRN_OBJ_VECTOR, + grn_obj_id(ctx, data->pathsTable)); + GRN_VOID_INIT(&(data->value)); + GRN_TEXT_INIT(&(data->type), GRN_OBJ_DO_SHALLOW_COPY); } -static grn_obj * -PGrnLookupJSONValuesTable(Relation index, - unsigned int nthAttribute, - int errorLevel) +static void +PGrnJSONBInsertDataFin(PGrnJSONBInsertData *data) { - char name[GRN_TABLE_MAX_KEY_SIZE]; - - snprintf(name, sizeof(name), - PGrnJSONValuesTableNameFormat, - index->rd_node.relNode, nthAttribute); - return PGrnLookup(name, errorLevel); + GRN_OBJ_FIN(ctx, &(data->type)); + GRN_OBJ_FIN(ctx, &(data->value)); + GRN_OBJ_FIN(ctx, &(data->pathIDs)); + GRN_OBJ_FIN(ctx, &(data->path)); + GRN_OBJ_FIN(ctx, &(data->components)); + GRN_OBJ_FIN(ctx, &(data->key)); } -static void -PGrnJSONBCreateTables(PGrnCreateData *data, - PGrnJSONBCreateData *jsonbData) +static uint64_t +PGrnJSONBInsertGenerateKey(PGrnJSONBInsertData *data, + bool haveValue, + const char *typeName) { - { - char jsonPathsTableName[GRN_TABLE_MAX_KEY_SIZE]; - snprintf(jsonPathsTableName, sizeof(jsonPathsTableName), - PGrnJSONPathsTableNameFormat, - data->relNode, data->i); - jsonbData->pathsTable = PGrnJSONBCreatePathsTable(jsonPathsTableName); - GRN_PTR_PUT(ctx, data->supplementaryTables, jsonbData->pathsTable); - } + unsigned int i, n; + + GRN_BULK_REWIND(&(data->key)); + GRN_TEXT_PUTS(ctx, &(data->key), "."); + n = grn_vector_size(ctx, &(data->components)); + for (i = 0; i < n; i++) { - char jsonTypesTableName[GRN_TABLE_MAX_KEY_SIZE]; - snprintf(jsonTypesTableName, sizeof(jsonTypesTableName), - PGrnJSONTypesTableNameFormat, - data->relNode, data->i); - jsonbData->typesTable = PGrnJSONBCreateTypesTable(jsonTypesTableName); - GRN_PTR_PUT(ctx, data->supplementaryTables, jsonbData->typesTable); + const char *component; + unsigned int componentSize; + grn_id domain; + + componentSize = grn_vector_get_element(ctx, + &(data->components), + i, + &component, + NULL, + &domain); + if (domain == GRN_DB_UINT32) + { + GRN_TEXT_PUTS(ctx, &(data->key), "[]"); + } + else + { + GRN_TEXT_PUTS(ctx, &(data->key), "["); + grn_text_esc(ctx, &(data->key), component, componentSize); + GRN_TEXT_PUTS(ctx, &(data->key), "]"); + } } + GRN_TEXT_PUTS(ctx, &(data->key), "|"); + GRN_TEXT_PUTS(ctx, &(data->key), typeName); + + if (haveValue) { - char jsonValuesTableName[GRN_TABLE_MAX_KEY_SIZE]; - snprintf(jsonValuesTableName, sizeof(jsonValuesTableName), - PGrnJSONValuesTableNameFormat, - data->relNode, data->i); - jsonbData->valuesTable = PGrnJSONBCreateValuesTable(jsonValuesTableName); - GRN_PTR_PUT(ctx, data->supplementaryTables, jsonbData->valuesTable); + GRN_TEXT_PUTS(ctx, &(data->key), "|"); + grn_obj_cast(ctx, &(data->value), &(data->key), GRN_FALSE); } + + return XXH64(GRN_TEXT_VALUE(&data->key), + GRN_TEXT_LEN(&data->key), + 0); } static void -PGrnJSONBCreateFullTextSearchIndexColumn(PGrnCreateData *data, - PGrnJSONBCreateData *jsonbData) +PGrnJSONBInsertAddPath(PGrnJSONBInsertData *data, + unsigned int start, + unsigned int flags) { - const char *tokenizerName = PGRN_DEFAULT_TOKENIZER; - const char *normalizerName = PGRN_DEFAULT_NORMALIZER; - char lexiconName[GRN_TABLE_MAX_KEY_SIZE]; - grn_obj *lexicon; + grn_id pathID; - PGrnApplyOptionValues(data->index, &tokenizerName, &normalizerName); + GRN_BULK_REWIND(&(data->path)); + PGrnJSONGeneratePath(&(data->components), + start, + flags, + &(data->path)); - if (PGrnIsNoneValue(tokenizerName)) + if (GRN_TEXT_LEN(&(data->path)) >= GRN_TABLE_MAX_KEY_SIZE) return; - snprintf(lexiconName, sizeof(lexiconName), - PGrnJSONValueLexiconNameFormat, - "FullTextSearch", data->relNode, data->i); - lexicon = PGrnCreateTable(lexiconName, - GRN_OBJ_TABLE_PAT_KEY, - grn_ctx_at(ctx, GRN_DB_SHORT_TEXT)); - GRN_PTR_PUT(ctx, data->lexicons, lexicon); + pathID = grn_table_add(ctx, data->pathsTable, + GRN_TEXT_VALUE(&(data->path)), + GRN_TEXT_LEN(&(data->path)), + NULL); + if (pathID == GRN_ID_NIL) + return; - grn_obj_set_info(ctx, lexicon, GRN_INFO_DEFAULT_TOKENIZER, - PGrnLookup(tokenizerName, ERROR)); - if (!PGrnIsNoneValue(normalizerName)) { - grn_obj_set_info(ctx, lexicon, GRN_INFO_NORMALIZER, - PGrnLookup(normalizerName, ERROR)); + unsigned int i, n; + + n = GRN_BULK_VSIZE(&(data->pathIDs)) / sizeof(grn_id); + for (i = 0; i < n; i++) + { + if (GRN_RECORD_VALUE_AT(&(data->pathIDs), i) == pathID) + return; + } } - PGrnCreateColumn(lexicon, - PGrnIndexColumnName, - GRN_OBJ_COLUMN_INDEX | GRN_OBJ_WITH_POSITION, - jsonbData->valuesTable); + GRN_RECORD_PUT(ctx, &(data->pathIDs), pathID); } static void -PGrnJSONBCreateIndexColumn(PGrnCreateData *data, - PGrnJSONBCreateData *jsonbData, - const char *typeName, - grn_obj_flags tableType, - grn_obj *type) +PGrnJSONBInsertGenerateSubPathsRecursive(PGrnJSONBInsertData *data, + unsigned int parentStart) { - char lexiconName[GRN_TABLE_MAX_KEY_SIZE]; - grn_obj *lexicon; - - snprintf(lexiconName, sizeof(lexiconName), - PGrnJSONValueLexiconNameFormat, - typeName, data->relNode, data->i); - lexicon = PGrnCreateTable(lexiconName, tableType, type); - GRN_PTR_PUT(ctx, data->lexicons, lexicon); - PGrnCreateColumn(lexicon, - PGrnIndexColumnName, - GRN_OBJ_COLUMN_INDEX, - jsonbData->valuesTable); -} + if (parentStart == grn_vector_size(ctx, &(data->components))) + return; -static void -PGrnJSONBCreateIndexColumns(PGrnCreateData *data, - PGrnJSONBCreateData *jsonbData) -{ - PGrnCreateColumn(jsonbData->valuesTable, + PGrnJSONBInsertAddPath(data, + parentStart, + PGRN_JSON_GENERATE_PATH_USE_DOT_STYLE); + PGrnJSONBInsertAddPath(data, + parentStart, + 0); + PGrnJSONBInsertAddPath(data, + parentStart, + PGRN_JSON_GENERATE_PATH_INCLUDE_ARRAY); + + PGrnJSONBInsertGenerateSubPathsRecursive(data, parentStart + 1); +} + +static void +PGrnJSONBInsertGeneratePaths(PGrnJSONBInsertData *data) +{ + GRN_BULK_REWIND(&(data->pathIDs)); + + PGrnJSONBInsertAddPath(data, + 0, + PGRN_JSON_GENERATE_PATH_IS_ABSOLUTE | + PGRN_JSON_GENERATE_PATH_USE_DOT_STYLE); + PGrnJSONBInsertAddPath(data, + 0, + PGRN_JSON_GENERATE_PATH_IS_ABSOLUTE); + PGrnJSONBInsertAddPath(data, + 0, + PGRN_JSON_GENERATE_PATH_IS_ABSOLUTE | + PGRN_JSON_GENERATE_PATH_INCLUDE_ARRAY); + + PGrnJSONBInsertGenerateSubPathsRecursive(data, 0); +} + +static void +PGrnJSONBInsertValueSet(PGrnJSONBInsertData *data, + grn_obj *column, + const char *typeName) +{ + uint64_t key; + grn_id valueID; + int added; + + key = PGrnJSONBInsertGenerateKey(data, column != NULL, typeName); + valueID = grn_table_add(ctx, data->valuesTable, + &key, sizeof(uint64_t), + &added); + GRN_RECORD_PUT(ctx, data->valueIDs, valueID); + if (!added) + return; + + GRN_BULK_REWIND(&(data->path)); + PGrnJSONGenerateCompletePath(&(data->components), &(data->path)); + if (GRN_TEXT_LEN(&(data->path)) < GRN_TABLE_MAX_KEY_SIZE) + grn_obj_set_value(ctx, data->pathColumn, valueID, + &(data->path), GRN_OBJ_SET); + + PGrnJSONBInsertGeneratePaths(data); + grn_obj_set_value(ctx, data->pathsColumn, valueID, + &(data->pathIDs), GRN_OBJ_SET); + + if (column) + grn_obj_set_value(ctx, column, valueID, &(data->value), GRN_OBJ_SET); + + GRN_TEXT_SETS(ctx, &(data->type), typeName); + grn_obj_set_value(ctx, data->typeColumn, valueID, + &(data->type), GRN_OBJ_SET); +} + +static void PGrnJSONBInsertContainer(JsonbIterator **iter, + PGrnJSONBInsertData *data); + +static void +PGrnJSONBInsertValue(JsonbIterator **iter, + JsonbValue *value, + PGrnJSONBInsertData *data) +{ + switch (value->type) + { + case jbvNull: + PGrnJSONBInsertValueSet(data, NULL, "null"); + break; + case jbvString: + grn_obj_reinit(ctx, &(data->value), GRN_DB_LONG_TEXT, + GRN_OBJ_DO_SHALLOW_COPY); + GRN_TEXT_SET(ctx, &(data->value), + value->val.string.val, + value->val.string.len); + PGrnJSONBInsertValueSet(data, data->stringColumn, "string"); + break; + case jbvNumeric: + { + Datum numericInString = + DirectFunctionCall1(numeric_out, + NumericGetDatum(value->val.numeric)); + const char *numericInCString = DatumGetCString(numericInString); + grn_obj_reinit(ctx, &(data->value), GRN_DB_TEXT, + GRN_OBJ_DO_SHALLOW_COPY); + GRN_TEXT_SETS(ctx, &(data->value), numericInCString); + PGrnJSONBInsertValueSet(data, data->numberColumn, "number"); + break; + } + case jbvBool: + grn_obj_reinit(ctx, &(data->value), GRN_DB_BOOL, 0); + GRN_BOOL_SET(ctx, &(data->value), value->val.boolean); + PGrnJSONBInsertValueSet(data, data->booleanColumn, "boolean"); + break; + case jbvArray: + PGrnJSONBInsertContainer(iter, data); + break; + case jbvObject: + PGrnJSONBInsertContainer(iter, data); + break; + case jbvBinary: + PGrnJSONBInsertContainer(iter, data); + break; + } +} + +static void +PGrnJSONBInsertContainer(JsonbIterator **iter, PGrnJSONBInsertData *data) +{ + JsonbIteratorToken token; + JsonbValue value; + + while ((token = JsonbIteratorNext(iter, &value, false)) != WJB_DONE) { + switch (token) + { + case WJB_KEY: + grn_vector_add_element(ctx, &(data->components), + value.val.string.val, + value.val.string.len, + 0, + GRN_DB_SHORT_TEXT); + break; + case WJB_VALUE: + PGrnJSONBInsertValue(iter, &value, data); + { + const char *component; + grn_vector_pop_element(ctx, &(data->components), &component, + NULL, NULL); + } + break; + case WJB_ELEM: + PGrnJSONBInsertValue(iter, &value, data); + break; + case WJB_BEGIN_ARRAY: + { + uint32_t nElements = value.val.array.nElems; + grn_vector_add_element(ctx, &(data->components), + (const char *)&nElements, + sizeof(uint32_t), + 0, + GRN_DB_UINT32); + PGrnJSONBInsertValueSet(data, NULL, "array"); + break; + } + case WJB_END_ARRAY: + { + const char *component; + grn_vector_pop_element(ctx, &(data->components), &component, + NULL, NULL); + break; + } + case WJB_BEGIN_OBJECT: + PGrnJSONBInsertValueSet(data, NULL, "object"); + break; + case WJB_END_OBJECT: + break; + default: + ereport(ERROR, + (errcode(ERRCODE_SYSTEM_ERROR), + errmsg("pgroonga: jsonb iterator returns invalid token: %d", + token))); + break; + } + } +} +#endif + +void +PGrnInitializeJSONB(void) +{ +#ifdef JSONBOID + PGrnJSONBCreateData data; + + tmpPathsTable = PGrnJSONBCreatePathsTable(NULL); + tmpTypesTable = PGrnJSONBCreateTypesTable(NULL); + tmpValuesTable = PGrnJSONBCreateValuesTable(NULL); + + data.pathsTable = tmpPathsTable; + data.typesTable = tmpTypesTable; + data.valuesTable = tmpValuesTable; + PGrnJSONBCreateDataColumns(&data); +#endif +} + +void +PGrnFinalizeJSONB(void) +{ +#ifdef JSONBOID + grn_obj_remove(ctx, tmpValuesTable); + grn_obj_remove(ctx, tmpTypesTable); + grn_obj_remove(ctx, tmpPathsTable); +#endif +} + +#ifdef JSONBOID +static void +PGrnJSONBCreateTables(PGrnCreateData *data, + PGrnJSONBCreateData *jsonbData) +{ + { + char jsonPathsTableName[GRN_TABLE_MAX_KEY_SIZE]; + snprintf(jsonPathsTableName, sizeof(jsonPathsTableName), + PGrnJSONPathsTableNameFormat, + data->relNode, data->i); + jsonbData->pathsTable = PGrnJSONBCreatePathsTable(jsonPathsTableName); + GRN_PTR_PUT(ctx, data->supplementaryTables, jsonbData->pathsTable); + } + + { + char jsonTypesTableName[GRN_TABLE_MAX_KEY_SIZE]; + snprintf(jsonTypesTableName, sizeof(jsonTypesTableName), + PGrnJSONTypesTableNameFormat, + data->relNode, data->i); + jsonbData->typesTable = PGrnJSONBCreateTypesTable(jsonTypesTableName); + GRN_PTR_PUT(ctx, data->supplementaryTables, jsonbData->typesTable); + } + + { + char jsonValuesTableName[GRN_TABLE_MAX_KEY_SIZE]; + snprintf(jsonValuesTableName, sizeof(jsonValuesTableName), + PGrnJSONValuesTableNameFormat, + data->relNode, data->i); + jsonbData->valuesTable = PGrnJSONBCreateValuesTable(jsonValuesTableName); + GRN_PTR_PUT(ctx, data->supplementaryTables, jsonbData->valuesTable); + } +} + +static void +PGrnJSONBCreateFullTextSearchIndexColumn(PGrnCreateData *data, + PGrnJSONBCreateData *jsonbData) +{ + const char *tokenizerName = PGRN_DEFAULT_TOKENIZER; + const char *normalizerName = PGRN_DEFAULT_NORMALIZER; + char lexiconName[GRN_TABLE_MAX_KEY_SIZE]; + grn_obj *lexicon; + + PGrnApplyOptionValues(data->index, &tokenizerName, &normalizerName); + + if (PGrnIsNoneValue(tokenizerName)) + return; + + snprintf(lexiconName, sizeof(lexiconName), + PGrnJSONValueLexiconNameFormat, + "FullTextSearch", data->relNode, data->i); + lexicon = PGrnCreateTable(lexiconName, + GRN_OBJ_TABLE_PAT_KEY, + grn_ctx_at(ctx, GRN_DB_SHORT_TEXT)); + GRN_PTR_PUT(ctx, data->lexicons, lexicon); + + grn_obj_set_info(ctx, lexicon, GRN_INFO_DEFAULT_TOKENIZER, + PGrnLookup(tokenizerName, ERROR)); + if (!PGrnIsNoneValue(normalizerName)) + { + grn_obj_set_info(ctx, lexicon, GRN_INFO_NORMALIZER, + PGrnLookup(normalizerName, ERROR)); + } + + PGrnCreateColumn(lexicon, + PGrnIndexColumnName, + GRN_OBJ_COLUMN_INDEX | GRN_OBJ_WITH_POSITION, + jsonbData->valuesTable); +} + +static void +PGrnJSONBCreateIndexColumn(PGrnCreateData *data, + PGrnJSONBCreateData *jsonbData, + const char *typeName, + grn_obj_flags tableType, + grn_obj *type) +{ + char lexiconName[GRN_TABLE_MAX_KEY_SIZE]; + grn_obj *lexicon; + + snprintf(lexiconName, sizeof(lexiconName), + PGrnJSONValueLexiconNameFormat, + typeName, data->relNode, data->i); + lexicon = PGrnCreateTable(lexiconName, tableType, type); + GRN_PTR_PUT(ctx, data->lexicons, lexicon); + PGrnCreateColumn(lexicon, + PGrnIndexColumnName, + GRN_OBJ_COLUMN_INDEX, + jsonbData->valuesTable); +} + +static void +PGrnJSONBCreateIndexColumns(PGrnCreateData *data, + PGrnJSONBCreateData *jsonbData) +{ + PGrnCreateColumn(jsonbData->valuesTable, PGrnIndexColumnName, GRN_OBJ_COLUMN_INDEX, data->sourcesTable); @@ -426,443 +759,155 @@ PGrnJSONBValueSetSource(Relation index, return; } - source = PGrnLookupColumn(jsonValuesTable, columnName, ERROR); - PGrnIndexColumnSetSource(indexColumn, source); - - grn_obj_unlink(ctx, source); - grn_obj_unlink(ctx, indexColumn); -} - -static void -PGrnJSONBSetSources(Relation index, - grn_obj *jsonValuesTable, - unsigned int nthAttribute) -{ - grn_obj *jsonPathsTable; - - jsonPathsTable = PGrnLookupJSONPathsTable(index, nthAttribute, ERROR); - - { - grn_obj *source; - grn_obj *indexColumn; - - GRN_BULK_REWIND(&(buffers->sourceIDs)); - - source = PGrnLookupColumn(jsonValuesTable, "path", ERROR); - GRN_RECORD_PUT(ctx, &(buffers->sourceIDs), grn_obj_id(ctx, source)); - grn_obj_unlink(ctx, source); - - source = PGrnLookupColumn(jsonValuesTable, "paths", ERROR); - GRN_RECORD_PUT(ctx, &(buffers->sourceIDs), grn_obj_id(ctx, source)); - grn_obj_unlink(ctx, source); - - indexColumn = PGrnLookupColumn(jsonPathsTable, PGrnIndexColumnName, - ERROR); - grn_obj_set_info(ctx, indexColumn, GRN_INFO_SOURCE, - &(buffers->sourceIDs)); - grn_obj_unlink(ctx, indexColumn); - } - - PGrnJSONBValueSetSource(index, jsonValuesTable, "string", "String", - nthAttribute, true); - PGrnJSONBValueSetSource(index, jsonValuesTable, "number", "Number", - nthAttribute, true); - PGrnJSONBValueSetSource(index, jsonValuesTable, "boolean", "Boolean", - nthAttribute, true); - PGrnJSONBValueSetSource(index, jsonValuesTable, "size", "Size", - nthAttribute, true); - PGrnJSONBValueSetSource(index, jsonValuesTable, "string", "FullTextSearch", - nthAttribute, false); - - grn_obj_unlink(ctx, jsonPathsTable); -} -#endif - -grn_obj * -PGrnJSONBSetSource(Relation index, unsigned int i) -{ -#ifdef JSONBOID - grn_obj *jsonValuesTable; - grn_obj *indexColumn; - - jsonValuesTable = PGrnLookupJSONValuesTable(index, i, ERROR); - PGrnJSONBSetSources(index, jsonValuesTable, i); - indexColumn = PGrnLookupColumn(jsonValuesTable, - PGrnIndexColumnName, - ERROR); - - return indexColumn; -#else - return NULL; -#endif -} - -#ifdef JSONBOID -/** - * pgroonga.match_jsonb(jsonb, query) : bool - */ -Datum -pgroonga_match_jsonb(PG_FUNCTION_ARGS) -{ -#ifdef NOT_USED - Jsonb *jsonb = PG_GETARG_JSONB(0); - text *query = PG_GETARG_TEXT_PP(1); -#endif - - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("pgroonga: operator @@ is available only in index scans"))); - - PG_RETURN_BOOL(false); -} - -typedef struct PGrnJSONBInsertData -{ - grn_obj *jsonPathsTable; - grn_obj *jsonValuesTable; - grn_obj *pathColumn; - grn_obj *pathsColumn; - grn_obj *stringColumn; - grn_obj *numberColumn; - grn_obj *booleanColumn; - grn_obj *sizeColumn; - grn_obj *typeColumn; - grn_obj *valueIDs; - grn_obj key; - grn_obj components; - grn_obj path; - grn_obj pathIDs; - grn_obj value; - grn_obj type; -} PGrnJSONBInsertData; - -static void -PGrnJSONBInsertDataInit(PGrnJSONBInsertData *data, - Relation index, - unsigned int nthValue, - grn_obj *valueIDs) -{ - data->jsonPathsTable = PGrnLookupJSONPathsTable(index, nthValue, ERROR); - data->jsonValuesTable = PGrnLookupJSONValuesTable(index, nthValue, ERROR); - - data->pathColumn = - PGrnLookupColumn(data->jsonValuesTable, "path", ERROR); - data->pathsColumn = - PGrnLookupColumn(data->jsonValuesTable, "paths", ERROR); - data->stringColumn = - PGrnLookupColumn(data->jsonValuesTable, "string", ERROR); - data->numberColumn = - PGrnLookupColumn(data->jsonValuesTable, "number", ERROR); - data->booleanColumn = - PGrnLookupColumn(data->jsonValuesTable, "boolean", ERROR); - data->sizeColumn = - PGrnLookupColumn(data->jsonValuesTable, "size", ERROR); - data->typeColumn = - PGrnLookupColumn(data->jsonValuesTable, "type", ERROR); - - data->valueIDs = valueIDs; - grn_obj_reinit(ctx, data->valueIDs, - grn_obj_id(ctx, data->jsonValuesTable), - GRN_OBJ_VECTOR); - - GRN_TEXT_INIT(&(data->key), 0); - GRN_TEXT_INIT(&(data->components), GRN_OBJ_VECTOR); - GRN_TEXT_INIT(&(data->path), 0); - GRN_RECORD_INIT(&(data->pathIDs), GRN_OBJ_VECTOR, - grn_obj_id(ctx, data->jsonPathsTable)); - GRN_VOID_INIT(&(data->value)); - GRN_TEXT_INIT(&(data->type), GRN_OBJ_DO_SHALLOW_COPY); -} - -static void -PGrnJSONBInsertDataFin(PGrnJSONBInsertData *data) -{ - GRN_OBJ_FIN(ctx, &(data->type)); - GRN_OBJ_FIN(ctx, &(data->value)); - GRN_OBJ_FIN(ctx, &(data->pathIDs)); - GRN_OBJ_FIN(ctx, &(data->path)); - GRN_OBJ_FIN(ctx, &(data->components)); - GRN_OBJ_FIN(ctx, &(data->key)); - - grn_obj_unlink(ctx, data->typeColumn); - grn_obj_unlink(ctx, data->sizeColumn); - grn_obj_unlink(ctx, data->booleanColumn); - grn_obj_unlink(ctx, data->numberColumn); - grn_obj_unlink(ctx, data->stringColumn); - grn_obj_unlink(ctx, data->pathsColumn); - grn_obj_unlink(ctx, data->pathColumn); - grn_obj_unlink(ctx, data->jsonValuesTable); - grn_obj_unlink(ctx, data->jsonPathsTable); -} - -static uint64_t -PGrnJSONBInsertGenerateKey(PGrnJSONBInsertData *data, - bool haveValue, - const char *typeName) -{ - unsigned int i, n; - - GRN_BULK_REWIND(&(data->key)); - - GRN_TEXT_PUTS(ctx, &(data->key), "."); - n = grn_vector_size(ctx, &(data->components)); - for (i = 0; i < n; i++) - { - const char *component; - unsigned int componentSize; - grn_id domain; - - componentSize = grn_vector_get_element(ctx, - &(data->components), - i, - &component, - NULL, - &domain); - if (domain == GRN_DB_UINT32) - { - GRN_TEXT_PUTS(ctx, &(data->key), "[]"); - } - else - { - GRN_TEXT_PUTS(ctx, &(data->key), "["); - grn_text_esc(ctx, &(data->key), component, componentSize); - GRN_TEXT_PUTS(ctx, &(data->key), "]"); - } - } - - GRN_TEXT_PUTS(ctx, &(data->key), "|"); - GRN_TEXT_PUTS(ctx, &(data->key), typeName); - - if (haveValue) - { - GRN_TEXT_PUTS(ctx, &(data->key), "|"); - grn_obj_cast(ctx, &(data->value), &(data->key), GRN_FALSE); - } + source = PGrnLookupColumn(jsonValuesTable, columnName, ERROR); + PGrnIndexColumnSetSource(indexColumn, source); - return XXH64(GRN_TEXT_VALUE(&data->key), - GRN_TEXT_LEN(&data->key), - 0); + grn_obj_unlink(ctx, source); + grn_obj_unlink(ctx, indexColumn); } static void -PGrnJSONBInsertAddPath(PGrnJSONBInsertData *data, - unsigned int start, - unsigned int flags) +PGrnJSONBSetSources(Relation index, + grn_obj *jsonValuesTable, + unsigned int nthAttribute) { - grn_id pathID; + grn_obj *jsonPathsTable; - GRN_BULK_REWIND(&(data->path)); - PGrnJSONGeneratePath(&(data->components), - start, - flags, - &(data->path)); + jsonPathsTable = PGrnLookupJSONPathsTable(index, nthAttribute, ERROR); - if (GRN_TEXT_LEN(&(data->path)) >= GRN_TABLE_MAX_KEY_SIZE) - return; + { + grn_obj *source; + grn_obj *indexColumn; - pathID = grn_table_add(ctx, data->jsonPathsTable, - GRN_TEXT_VALUE(&(data->path)), - GRN_TEXT_LEN(&(data->path)), - NULL); - if (pathID == GRN_ID_NIL) - return; + GRN_BULK_REWIND(&(buffers->sourceIDs)); - { - unsigned int i, n; + source = PGrnLookupColumn(jsonValuesTable, "path", ERROR); + GRN_RECORD_PUT(ctx, &(buffers->sourceIDs), grn_obj_id(ctx, source)); + grn_obj_unlink(ctx, source); - n = GRN_BULK_VSIZE(&(data->pathIDs)) / sizeof(grn_id); - for (i = 0; i < n; i++) - { - if (GRN_RECORD_VALUE_AT(&(data->pathIDs), i) == pathID) - return; - } + source = PGrnLookupColumn(jsonValuesTable, "paths", ERROR); + GRN_RECORD_PUT(ctx, &(buffers->sourceIDs), grn_obj_id(ctx, source)); + grn_obj_unlink(ctx, source); + + indexColumn = PGrnLookupColumn(jsonPathsTable, PGrnIndexColumnName, + ERROR); + grn_obj_set_info(ctx, indexColumn, GRN_INFO_SOURCE, + &(buffers->sourceIDs)); + grn_obj_unlink(ctx, indexColumn); } - GRN_RECORD_PUT(ctx, &(data->pathIDs), pathID); + PGrnJSONBValueSetSource(index, jsonValuesTable, "string", "String", + nthAttribute, true); + PGrnJSONBValueSetSource(index, jsonValuesTable, "number", "Number", + nthAttribute, true); + PGrnJSONBValueSetSource(index, jsonValuesTable, "boolean", "Boolean", + nthAttribute, true); + PGrnJSONBValueSetSource(index, jsonValuesTable, "size", "Size", + nthAttribute, true); + PGrnJSONBValueSetSource(index, jsonValuesTable, "string", "FullTextSearch", + nthAttribute, false); + + grn_obj_unlink(ctx, jsonPathsTable); } +#endif -static void -PGrnJSONBInsertGenerateSubPathsRecursive(PGrnJSONBInsertData *data, - unsigned int parentStart) +grn_obj * +PGrnJSONBSetSource(Relation index, unsigned int i) { - if (parentStart == grn_vector_size(ctx, &(data->components))) - return; +#ifdef JSONBOID + grn_obj *jsonValuesTable; + grn_obj *indexColumn; - PGrnJSONBInsertAddPath(data, - parentStart, - PGRN_JSON_GENERATE_PATH_USE_DOT_STYLE); - PGrnJSONBInsertAddPath(data, - parentStart, - 0); - PGrnJSONBInsertAddPath(data, - parentStart, - PGRN_JSON_GENERATE_PATH_INCLUDE_ARRAY); + jsonValuesTable = PGrnLookupJSONValuesTable(index, i, ERROR); + PGrnJSONBSetSources(index, jsonValuesTable, i); + indexColumn = PGrnLookupColumn(jsonValuesTable, + PGrnIndexColumnName, + ERROR); - PGrnJSONBInsertGenerateSubPathsRecursive(data, parentStart + 1); + return indexColumn; +#else + return NULL; +#endif } +#ifdef JSONBOID static void -PGrnJSONBInsertGeneratePaths(PGrnJSONBInsertData *data) +PGrnJSONBDeleteValues(grn_obj *valuesTable, grn_obj *valueIDs) { - GRN_BULK_REWIND(&(data->pathIDs)); - - PGrnJSONBInsertAddPath(data, - 0, - PGRN_JSON_GENERATE_PATH_IS_ABSOLUTE | - PGRN_JSON_GENERATE_PATH_USE_DOT_STYLE); - PGrnJSONBInsertAddPath(data, - 0, - PGRN_JSON_GENERATE_PATH_IS_ABSOLUTE); - PGrnJSONBInsertAddPath(data, - 0, - PGRN_JSON_GENERATE_PATH_IS_ABSOLUTE | - PGRN_JSON_GENERATE_PATH_INCLUDE_ARRAY); + int i, n; - PGrnJSONBInsertGenerateSubPathsRecursive(data, 0); + n = GRN_BULK_VSIZE(valueIDs) / sizeof(grn_id); + for (i = 0; i < n; i++) + { + grn_id id; + id = GRN_RECORD_VALUE_AT(valueIDs, i); + grn_table_delete_by_id(ctx, valuesTable, id); + } } -static void -PGrnJSONBInsertValueSet(PGrnJSONBInsertData *data, - grn_obj *column, - const char *typeName) +/** + * pgroonga.match_jsonb(jsonb, query) : bool + */ +Datum +pgroonga_match_jsonb(PG_FUNCTION_ARGS) { - uint64_t key; - grn_id valueID; - int added; - - key = PGrnJSONBInsertGenerateKey(data, column != NULL, typeName); - valueID = grn_table_add(ctx, data->jsonValuesTable, - &key, sizeof(uint64_t), - &added); - GRN_RECORD_PUT(ctx, data->valueIDs, valueID); - if (!added) - return; - - GRN_BULK_REWIND(&(data->path)); - PGrnJSONGenerateCompletePath(&(data->components), &(data->path)); - if (GRN_TEXT_LEN(&(data->path)) < GRN_TABLE_MAX_KEY_SIZE) - grn_obj_set_value(ctx, data->pathColumn, valueID, - &(data->path), GRN_OBJ_SET); - - PGrnJSONBInsertGeneratePaths(data); - grn_obj_set_value(ctx, data->pathsColumn, valueID, - &(data->pathIDs), GRN_OBJ_SET); - - if (column) - grn_obj_set_value(ctx, column, valueID, &(data->value), GRN_OBJ_SET); - - GRN_TEXT_SETS(ctx, &(data->type), typeName); - grn_obj_set_value(ctx, data->typeColumn, valueID, - &(data->type), GRN_OBJ_SET); -} + Jsonb *jsonb = PG_GETARG_JSONB(0); + text *query = PG_GETARG_TEXT_PP(1); + grn_obj valueIDs; + PGrnJSONBInsertData data; + JsonbIterator *iter; + grn_obj *filter = NULL; + grn_obj *dummy_variable = NULL; + grn_obj *result = NULL; + bool matched = false; -static void PGrnJSONBInsert(JsonbIterator **iter, PGrnJSONBInsertData *data); + data.pathsTable = tmpPathsTable; + data.valuesTable = tmpValuesTable; + GRN_PTR_INIT(&valueIDs, GRN_OBJ_VECTOR, grn_obj_id(ctx, data.valuesTable)); + data.valueIDs = &valueIDs; + PGrnJSONBInsertDataInit(&data); + iter = JsonbIteratorInit(&(jsonb->root)); + PGrnJSONBInsertContainer(&iter, &data); + PGrnJSONBInsertDataFin(&data); -static void -PGrnJSONBInsertValue(JsonbIterator **iter, - JsonbValue *value, - PGrnJSONBInsertData *data) -{ - switch (value->type) - { - case jbvNull: - PGrnJSONBInsertValueSet(data, NULL, "null"); - break; - case jbvString: - grn_obj_reinit(ctx, &(data->value), GRN_DB_LONG_TEXT, - GRN_OBJ_DO_SHALLOW_COPY); - GRN_TEXT_SET(ctx, &(data->value), - value->val.string.val, - value->val.string.len); - PGrnJSONBInsertValueSet(data, data->stringColumn, "string"); - break; - case jbvNumeric: + PG_TRY(); { - Datum numericInString = - DirectFunctionCall1(numeric_out, - NumericGetDatum(value->val.numeric)); - const char *numericInCString = DatumGetCString(numericInString); - grn_obj_reinit(ctx, &(data->value), GRN_DB_TEXT, - GRN_OBJ_DO_SHALLOW_COPY); - GRN_TEXT_SETS(ctx, &(data->value), numericInCString); - PGrnJSONBInsertValueSet(data, data->numberColumn, "number"); - break; + GRN_EXPR_CREATE_FOR_QUERY(ctx, tmpValuesTable, filter, dummy_variable); + PGrnCheck("pgroonga: match_jsonb: failed to create expression object"); + grn_expr_parse(ctx, filter, + VARDATA_ANY(query), + VARSIZE_ANY_EXHDR(query), + NULL, GRN_OP_MATCH, GRN_OP_AND, + GRN_EXPR_SYNTAX_SCRIPT); + PGrnCheck("pgroonga: match_jsonb: failed to parse query"); + result = grn_table_create(ctx, NULL, 0, NULL, + GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, + tmpValuesTable, NULL); + PGrnCheck("pgroonga: match_jsonb: failed to create result table"); + grn_table_select(ctx, tmpValuesTable, filter, result, GRN_OP_OR); + PGrnCheck("pgroonga: match_jsonb: failed to select"); } - case jbvBool: - grn_obj_reinit(ctx, &(data->value), GRN_DB_BOOL, 0); - GRN_BOOL_SET(ctx, &(data->value), value->val.boolean); - PGrnJSONBInsertValueSet(data, data->booleanColumn, "boolean"); - break; - case jbvArray: - PGrnJSONBInsert(iter, data); - break; - case jbvObject: - PGrnJSONBInsert(iter, data); - break; - case jbvBinary: - PGrnJSONBInsert(iter, data); - break; + PG_CATCH(); + { + if (result) + grn_obj_close(ctx, result); + if (filter) + grn_obj_close(ctx, filter); + PGrnJSONBDeleteValues(tmpValuesTable, &valueIDs); + GRN_OBJ_FIN(ctx, &valueIDs); + PG_RE_THROW(); } -} + PG_END_TRY(); -static void -PGrnJSONBInsert(JsonbIterator **iter, PGrnJSONBInsertData *data) -{ - JsonbIteratorToken token; - JsonbValue value; + matched = grn_table_size(ctx, result) > 0; - while ((token = JsonbIteratorNext(iter, &value, false)) != WJB_DONE) { - switch (token) - { - case WJB_KEY: - grn_vector_add_element(ctx, &(data->components), - value.val.string.val, - value.val.string.len, - 0, - GRN_DB_SHORT_TEXT); - break; - case WJB_VALUE: - PGrnJSONBInsertValue(iter, &value, data); - { - const char *component; - grn_vector_pop_element(ctx, &(data->components), &component, - NULL, NULL); - } - break; - case WJB_ELEM: - PGrnJSONBInsertValue(iter, &value, data); - break; - case WJB_BEGIN_ARRAY: - { - uint32_t nElements = value.val.array.nElems; - grn_vector_add_element(ctx, &(data->components), - (const char *)&nElements, - sizeof(uint32_t), - 0, - GRN_DB_UINT32); - PGrnJSONBInsertValueSet(data, NULL, "array"); - break; - } - case WJB_END_ARRAY: - { - const char *component; - grn_vector_pop_element(ctx, &(data->components), &component, - NULL, NULL); - break; - } - case WJB_BEGIN_OBJECT: - PGrnJSONBInsertValueSet(data, NULL, "object"); - break; - case WJB_END_OBJECT: - break; - default: - ereport(ERROR, - (errcode(ERRCODE_SYSTEM_ERROR), - errmsg("pgroonga: jsonb iterator returns invalid token: %d", - token))); - break; - } - } + grn_obj_close(ctx, filter); + grn_obj_close(ctx, result); + + PGrnJSONBDeleteValues(tmpValuesTable, &valueIDs); + GRN_OBJ_FIN(ctx, &valueIDs); + + PG_RETURN_BOOL(matched); } #endif @@ -877,10 +922,16 @@ PGrnJSONBInsert(Relation index, Jsonb *jsonb; JsonbIterator *iter; - PGrnJSONBInsertDataInit(&data, index, nthValue, valueIDs); + data.pathsTable = PGrnLookupJSONPathsTable(index, nthValue, ERROR); + data.valuesTable = PGrnLookupJSONValuesTable(index, nthValue, ERROR); + data.valueIDs = valueIDs; + grn_obj_reinit(ctx, data.valueIDs, + grn_obj_id(ctx, data.valuesTable), + GRN_OBJ_VECTOR); + PGrnJSONBInsertDataInit(&data); jsonb = DatumGetJsonb(values[nthValue]); iter = JsonbIteratorInit(&(jsonb->root)); - PGrnJSONBInsert(&iter, &data); + PGrnJSONBInsertContainer(&iter, &data); PGrnJSONBInsertDataFin(&data); #endif } -------------- next part -------------- HTML����������������������������...Download