Kouhei Sutou
null+****@clear*****
Fri Feb 3 14:16:38 JST 2017
Kouhei Sutou 2017-02-03 14:16:38 +0900 (Fri, 03 Feb 2017) New Revision: 24e94927a6f689eed4c2b37a792977ee46c22d8b https://github.com/groonga/groonga/commit/24e94927a6f689eed4c2b37a792977ee46c22d8b Message: Move load related code to load.c Added files: lib/grn_load.h lib/load.c Modified files: lib/db.c lib/grn_db.h lib/proc.c lib/sources.am lib/util.c Modified: lib/db.c (+0 -1133) =================================================================== --- lib/db.c 2017-02-03 14:15:00 +0900 (8c92436) +++ lib/db.c 2017-02-03 14:16:38 +0900 (7943e6f) @@ -13672,1139 +13672,6 @@ exit : GRN_API_RETURN(buf); } -/* grn_load */ - -static grn_obj * -values_add(grn_ctx *ctx, grn_loader *loader) -{ - grn_obj *res; - uint32_t curr_size = loader->values_size * sizeof(grn_obj); - if (curr_size < GRN_TEXT_LEN(&loader->values)) { - res = (grn_obj *)(GRN_TEXT_VALUE(&loader->values) + curr_size); - res->header.domain = GRN_DB_TEXT; - GRN_BULK_REWIND(res); - } else { - if (grn_bulk_space(ctx, &loader->values, sizeof(grn_obj))) { return NULL; } - res = (grn_obj *)(GRN_TEXT_VALUE(&loader->values) + curr_size); - GRN_TEXT_INIT(res, 0); - } - loader->values_size++; - loader->last = res; - return res; -} - -static grn_obj * -values_next(grn_ctx *ctx, grn_obj *value) -{ - if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET || - value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) { - value += GRN_UINT32_VALUE(value); - } - return value + 1; -} - -static int -values_len(grn_ctx *ctx, grn_obj *head, grn_obj *tail) -{ - int len; - for (len = 0; head < tail; head = values_next(ctx, head), len++) ; - return len; -} - -static grn_id -loader_add(grn_ctx *ctx, grn_obj *key) -{ - int added = 0; - grn_loader *loader = &ctx->impl->loader; - grn_id id = grn_table_add_by_key(ctx, loader->table, key, &added); - if (!added && loader->ifexists) { - grn_obj *v = grn_expr_get_var_by_offset(ctx, loader->ifexists, 0); - grn_obj *result; - GRN_RECORD_SET(ctx, v, id); - result = grn_expr_exec(ctx, loader->ifexists, 0); - if (!grn_obj_is_true(ctx, result)) { - id = 0; - } - } - return id; -} - -static void -set_vector(grn_ctx *ctx, grn_obj *column, grn_id id, grn_obj *vector) -{ - int n = GRN_UINT32_VALUE(vector); - grn_obj buf, *v = vector + 1; - grn_id range_id; - grn_obj *range; - - range_id = DB_OBJ(column)->range; - range = grn_ctx_at(ctx, range_id); - if (GRN_OBJ_TABLEP(range)) { - GRN_RECORD_INIT(&buf, GRN_OBJ_VECTOR, range_id); - while (n--) { - grn_bool cast_failed = GRN_FALSE; - grn_obj record, *element = v; - if (range_id != element->header.domain) { - GRN_RECORD_INIT(&record, 0, range_id); - if (grn_obj_cast(ctx, element, &record, GRN_TRUE)) { - cast_failed = GRN_TRUE; - ERR_CAST(column, range, element); - } - element = &record; - } - if (!cast_failed) { - GRN_UINT32_PUT(ctx, &buf, GRN_RECORD_VALUE(element)); - } - if (element == &record) { GRN_OBJ_FIN(ctx, element); } - v = values_next(ctx, v); - } - } else { - if (((struct _grn_type *)range)->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) { - GRN_TEXT_INIT(&buf, GRN_OBJ_VECTOR); - while (n--) { - if (v->header.domain == GRN_DB_TEXT) { - grn_bool cast_failed = GRN_FALSE; - grn_obj casted_element, *element = v; - if (range_id != element->header.domain) { - GRN_OBJ_INIT(&casted_element, GRN_BULK, 0, range_id); - if (grn_obj_cast(ctx, element, &casted_element, GRN_TRUE)) { - cast_failed = GRN_TRUE; - ERR_CAST(column, range, element); - } - element = &casted_element; - } - if (!cast_failed) { - grn_vector_add_element(ctx, &buf, - GRN_TEXT_VALUE(element), - GRN_TEXT_LEN(element), 0, - element->header.domain); - } - if (element == &casted_element) { GRN_OBJ_FIN(ctx, element); } - } else { - ERR(GRN_INVALID_ARGUMENT, "bad syntax."); - } - v = values_next(ctx, v); - } - } else { - grn_id value_size = ((grn_db_obj *)range)->range; - GRN_VALUE_FIX_SIZE_INIT(&buf, GRN_OBJ_VECTOR, range_id); - while (n--) { - grn_bool cast_failed = GRN_FALSE; - grn_obj casted_element, *element = v; - if (range_id != element->header.domain) { - GRN_OBJ_INIT(&casted_element, GRN_BULK, 0, range_id); - if (grn_obj_cast(ctx, element, &casted_element, GRN_TRUE)) { - cast_failed = GRN_TRUE; - ERR_CAST(column, range, element); - } - element = &casted_element; - } - if (!cast_failed) { - grn_bulk_write(ctx, &buf, GRN_TEXT_VALUE(element), value_size); - } - if (element == &casted_element) { GRN_OBJ_FIN(ctx, element); } - v = values_next(ctx, v); - } - } - } - grn_obj_set_value(ctx, column, id, &buf, GRN_OBJ_SET); - GRN_OBJ_FIN(ctx, &buf); -} - -static void -set_weight_vector(grn_ctx *ctx, grn_obj *column, grn_id id, grn_obj *index_value) -{ - if (!GRN_OBJ_WEIGHT_VECTOR_COLUMNP(column)) { - char column_name[GRN_TABLE_MAX_KEY_SIZE]; - int column_name_size; - column_name_size = grn_obj_name(ctx, column, column_name, - GRN_TABLE_MAX_KEY_SIZE); - ERR(GRN_INVALID_ARGUMENT, - "<%.*s>: columns except weight vector column don't support object value", - column_name_size, column_name); - return; - } - - { - unsigned int i, n; - grn_obj vector; - grn_obj weight_buffer; - - n = GRN_UINT32_VALUE(index_value); - GRN_TEXT_INIT(&vector, GRN_OBJ_VECTOR); - GRN_UINT32_INIT(&weight_buffer, 0); - for (i = 0; i < n; i += 2) { - grn_rc rc; - grn_obj *key, *weight; - - key = index_value + 1 + i; - weight = key + 1; - - GRN_BULK_REWIND(&weight_buffer); - rc = grn_obj_cast(ctx, weight, &weight_buffer, GRN_TRUE); - if (rc != GRN_SUCCESS) { - grn_obj *range; - range = grn_ctx_at(ctx, weight_buffer.header.domain); - ERR_CAST(column, range, weight); - grn_obj_unlink(ctx, range); - break; - } - grn_vector_add_element(ctx, &vector, - GRN_BULK_HEAD(key), GRN_BULK_VSIZE(key), - GRN_UINT32_VALUE(&weight_buffer), - key->header.domain); - } - grn_obj_set_value(ctx, column, id, &vector, GRN_OBJ_SET); - GRN_OBJ_FIN(ctx, &vector); - } -} - -static inline int -name_equal(const char *p, unsigned int size, const char *name) -{ - if (strlen(name) != size) { return 0; } - if (*p != GRN_DB_PSEUDO_COLUMN_PREFIX) { return 0; } - return !memcmp(p + 1, name + 1, size - 1); -} - -static void -report_set_column_value_failure(grn_ctx *ctx, - grn_obj *key, - const char *column_name, - unsigned int column_name_size, - grn_obj *column_value) -{ - grn_obj key_inspected, column_value_inspected; - - GRN_TEXT_INIT(&key_inspected, 0); - GRN_TEXT_INIT(&column_value_inspected, 0); - limited_size_inspect(ctx, &key_inspected, key); - limited_size_inspect(ctx, &column_value_inspected, column_value); - GRN_LOG(ctx, GRN_LOG_ERROR, - "[table][load] failed to set column value: %s: " - "key: <%.*s>, column: <%.*s>, value: <%.*s>", - ctx->errbuf, - (int)GRN_TEXT_LEN(&key_inspected), - GRN_TEXT_VALUE(&key_inspected), - column_name_size, - column_name, - (int)GRN_TEXT_LEN(&column_value_inspected), - GRN_TEXT_VALUE(&column_value_inspected)); - GRN_OBJ_FIN(ctx, &key_inspected); - GRN_OBJ_FIN(ctx, &column_value_inspected); -} - -static void -grn_loader_save_error(grn_ctx *ctx, grn_loader *loader) -{ - loader->rc = ctx->rc; - grn_strcpy(loader->errbuf, GRN_CTX_MSGSIZE, ctx->errbuf); -} - -static grn_id -parse_id_value(grn_ctx *ctx, grn_obj *value) -{ - switch (value->header.type) { - case GRN_DB_UINT32 : - return GRN_UINT32_VALUE(value); - case GRN_DB_INT32 : - return GRN_INT32_VALUE(value); - default : - { - grn_id id = GRN_ID_NIL; - grn_obj casted_value; - GRN_UINT32_INIT(&casted_value, 0); - if (grn_obj_cast(ctx, value, &casted_value, GRN_FALSE) != GRN_SUCCESS) { - grn_obj inspected; - GRN_TEXT_INIT(&inspected, 0); - grn_inspect(ctx, &inspected, value); - ERR(GRN_INVALID_ARGUMENT, - "<%s>: failed to cast to <UInt32>: <%.*s>", - GRN_COLUMN_NAME_ID, - (int)GRN_TEXT_LEN(&inspected), - GRN_TEXT_VALUE(&inspected)); - GRN_OBJ_FIN(ctx, &inspected); - } else { - id = GRN_UINT32_VALUE(&casted_value); - } - GRN_OBJ_FIN(ctx, &casted_value); - return id; - } - } -} - -static void -bracket_close(grn_ctx *ctx, grn_loader *loader) -{ - grn_id id = GRN_ID_NIL; - grn_obj *value, *value_end, *id_value = NULL, *key_value = NULL; - grn_obj *col, **cols; /* Columns except _id and _key. */ - uint32_t i, begin; - uint32_t ncols; /* Number of columns except _id and _key. */ - uint32_t nvalues; /* Number of values in brackets. */ - uint32_t depth; - - cols = (grn_obj **)GRN_BULK_HEAD(&loader->columns); - ncols = GRN_BULK_VSIZE(&loader->columns) / sizeof(grn_obj *); - GRN_UINT32_POP(&loader->level, begin); - value = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + begin; - value_end = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + loader->values_size; - GRN_ASSERT(value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET); - GRN_UINT32_SET(ctx, value, loader->values_size - begin - 1); - value++; - depth = GRN_BULK_VSIZE(&loader->level); - if (depth > sizeof(uint32_t) * loader->emit_level) { - return; - } - if (depth == 0 || !loader->table || - loader->columns_status == GRN_LOADER_COLUMNS_BROKEN) { - goto exit; - } - nvalues = values_len(ctx, value, value_end); - - if (loader->columns_status == GRN_LOADER_COLUMNS_UNSET) { - /* - * Target columns and _id or _key are not specified yet and values are - * handled as column names and "_id" or "_key". - */ - for (i = 0; i < nvalues; i++) { - const char *col_name; - unsigned int col_name_size; - if (value->header.domain != GRN_DB_TEXT) { - grn_obj buffer; - GRN_TEXT_INIT(&buffer, 0); - grn_inspect(ctx, &buffer, value); - ERR(GRN_INVALID_ARGUMENT, - "column name must be string: <%.*s>", - (int)GRN_TEXT_LEN(&buffer), GRN_TEXT_VALUE(&buffer)); - grn_loader_save_error(ctx, loader); - GRN_OBJ_FIN(ctx, &buffer); - loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; - goto exit; - } - col_name = GRN_TEXT_VALUE(value); - col_name_size = GRN_TEXT_LEN(value); - col = grn_obj_column(ctx, loader->table, col_name, col_name_size); - if (!col) { - ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s>", - col_name_size, col_name); - grn_loader_save_error(ctx, loader); - loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; - goto exit; - } - if (name_equal(col_name, col_name_size, GRN_COLUMN_NAME_ID)) { - grn_obj_unlink(ctx, col); - if (loader->id_offset != -1 || loader->key_offset != -1) { - /* _id and _key must not appear more than once. */ - if (loader->id_offset != -1) { - ERR(GRN_INVALID_ARGUMENT, - "duplicated id and key columns: <%s> at %d and <%s> at %d", - GRN_COLUMN_NAME_ID, i, - GRN_COLUMN_NAME_ID, loader->id_offset); - } else { - ERR(GRN_INVALID_ARGUMENT, - "duplicated id and key columns: <%s> at %d and <%s> at %d", - GRN_COLUMN_NAME_ID, i, - GRN_COLUMN_NAME_KEY, loader->key_offset); - } - grn_loader_save_error(ctx, loader); - loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; - goto exit; - } - loader->id_offset = i; - } else if (name_equal(col_name, col_name_size, GRN_COLUMN_NAME_KEY)) { - grn_obj_unlink(ctx, col); - if (loader->id_offset != -1 || loader->key_offset != -1) { - /* _id and _key must not appear more than once. */ - if (loader->id_offset != -1) { - ERR(GRN_INVALID_ARGUMENT, - "duplicated id and key columns: <%s> at %d and <%s> at %d", - GRN_COLUMN_NAME_KEY, i, - GRN_COLUMN_NAME_ID, loader->id_offset); - } else { - ERR(GRN_INVALID_ARGUMENT, - "duplicated id and key columns: <%s> at %d and <%s> at %d", - GRN_COLUMN_NAME_KEY, i, - GRN_COLUMN_NAME_KEY, loader->key_offset); - } - grn_loader_save_error(ctx, loader); - loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; - goto exit; - } - loader->key_offset = i; - } else { - GRN_PTR_PUT(ctx, &loader->columns, col); - } - value++; - } - switch (loader->table->header.type) { - case GRN_TABLE_HASH_KEY : - case GRN_TABLE_PAT_KEY : - case GRN_TABLE_DAT_KEY : - if (loader->id_offset == -1 && loader->key_offset == -1) { - ERR(GRN_INVALID_ARGUMENT, "missing id or key column"); - grn_loader_save_error(ctx, loader); - loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; - goto exit; - } - break; - } - loader->columns_status = GRN_LOADER_COLUMNS_SET; - goto exit; - } - - /* Target columns and _id or _key are already specified. */ - if (!nvalues) { - /* - * Accept empty arrays because a dump command may output a load command - * which contains empty arrays for a table with deleted records. - */ - id = grn_table_add(ctx, loader->table, NULL, 0, NULL); - } else { - uint32_t expected_nvalues = ncols; - if (loader->id_offset != -1 || loader->key_offset != -1) { - expected_nvalues++; - } - if (nvalues != expected_nvalues) { - ERR(GRN_INVALID_ARGUMENT, - "unexpected #values: expected:%u, actual:%u", - expected_nvalues, nvalues); - goto exit; - } - if (loader->id_offset != -1) { - id_value = value + loader->id_offset; - id = parse_id_value(ctx, id_value); - if (grn_table_at(ctx, loader->table, id) == GRN_ID_NIL) { - id = grn_table_add(ctx, loader->table, NULL, 0, NULL); - } - } else if (loader->key_offset != -1) { - key_value = value + loader->key_offset; - id = loader_add(ctx, key_value); - } else { - id = grn_table_add(ctx, loader->table, NULL, 0, NULL); - } - } - if (id == GRN_ID_NIL) { - /* Target record is not available. */ - goto exit; - } - - for (i = 0; i < nvalues; i++, value = values_next(ctx, value)) { - if (i == loader->id_offset || i == loader->key_offset) { - /* Skip _id and _key, because it's already used to get id. */ - continue; - } - col = *cols; - if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) { - set_vector(ctx, col, id, value); - } else if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) { - set_weight_vector(ctx, col, id, value); - } else { - grn_obj_set_value(ctx, col, id, value, GRN_OBJ_SET); - } - if (ctx->rc != GRN_SUCCESS) { - char column_name[GRN_TABLE_MAX_KEY_SIZE]; - unsigned int column_name_size; - grn_loader_save_error(ctx, loader); - column_name_size = grn_obj_name(ctx, col, column_name, - GRN_TABLE_MAX_KEY_SIZE); - report_set_column_value_failure(ctx, key_value, - column_name, column_name_size, - value); - ERRCLR(ctx); - } - cols++; - } - if (loader->each) { - grn_obj *v = grn_expr_get_var_by_offset(ctx, loader->each, 0); - GRN_RECORD_SET(ctx, v, id); - grn_expr_exec(ctx, loader->each, 0); - } - loader->nrecords++; -exit: - if (depth > 0 && loader->output_ids) { - GRN_UINT32_PUT(ctx, &(loader->ids), id); - } - loader->values_size = begin; -} - -static void -brace_close(grn_ctx *ctx, grn_loader *loader) -{ - grn_id id = GRN_ID_NIL; - grn_obj *value, *value_begin, *value_end; - grn_obj *id_value = NULL, *key_value = NULL; - uint32_t begin; - - GRN_UINT32_POP(&loader->level, begin); - value_begin = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + begin; - value_end = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + loader->values_size; - GRN_ASSERT(value->header.domain == GRN_JSON_LOAD_OPEN_BRACE); - GRN_UINT32_SET(ctx, value_begin, loader->values_size - begin - 1); - value_begin++; - if (GRN_BULK_VSIZE(&loader->level) > sizeof(uint32_t) * loader->emit_level) { - return; - } - if (!loader->table) { - goto exit; - } - - /* Scan values to find _id or _key. */ - for (value = value_begin; value + 1 < value_end; - value = values_next(ctx, value)) { - const char *name = GRN_TEXT_VALUE(value); - unsigned int name_size = GRN_TEXT_LEN(value); - if (value->header.domain != GRN_DB_TEXT) { - grn_obj buffer; - GRN_TEXT_INIT(&buffer, 0); - grn_inspect(ctx, &buffer, value); - GRN_LOG(ctx, GRN_LOG_ERROR, - "column name must be string: <%.*s>", - (int)GRN_TEXT_LEN(&buffer), GRN_TEXT_VALUE(&buffer)); - GRN_OBJ_FIN(ctx, &buffer); - goto exit; - } - value++; - if (name_equal(name, name_size, GRN_COLUMN_NAME_ID)) { - if (id_value || key_value) { - if (loader->table->header.type == GRN_TABLE_NO_KEY) { - GRN_LOG(ctx, GRN_LOG_ERROR, "duplicated '_id' column"); - goto exit; - } else { - GRN_LOG(ctx, GRN_LOG_ERROR, - "duplicated key columns: %s and %s", - id_value ? GRN_COLUMN_NAME_ID : GRN_COLUMN_NAME_KEY, - GRN_COLUMN_NAME_ID); - goto exit; - } - } - id_value = value; - } else if (name_equal(name, name_size, GRN_COLUMN_NAME_KEY)) { - if (id_value || key_value) { - GRN_LOG(ctx, GRN_LOG_ERROR, - "duplicated key columns: %s and %s", - id_value ? GRN_COLUMN_NAME_ID : GRN_COLUMN_NAME_KEY, - GRN_COLUMN_NAME_KEY); - goto exit; - } - key_value = value; - } - } - - switch (loader->table->header.type) { - case GRN_TABLE_HASH_KEY : - case GRN_TABLE_PAT_KEY : - case GRN_TABLE_DAT_KEY : - /* The target table requires _id or _key. */ - if (!id_value && !key_value) { - GRN_LOG(ctx, GRN_LOG_ERROR, "neither _key nor _id is assigned"); - goto exit; - } - break; - default : - /* The target table does not have _key. */ - if (key_value) { - GRN_LOG(ctx, GRN_LOG_ERROR, "nonexistent key value"); - goto exit; - } - break; - } - - if (id_value) { - id = parse_id_value(ctx, id_value); - if (grn_table_at(ctx, loader->table, id) == GRN_ID_NIL) { - if (ctx->rc == GRN_SUCCESS) { - id = grn_table_add(ctx, loader->table, NULL, 0, NULL); - } - } - } else if (key_value) { - id = loader_add(ctx, key_value); - } else { - id = grn_table_add(ctx, loader->table, NULL, 0, NULL); - } - if (id == GRN_ID_NIL) { - /* Target record is not available. */ - goto exit; - } - - for (value = value_begin; value + 1 < value_end; - value = values_next(ctx, value)) { - grn_obj *col; - const char *name = GRN_TEXT_VALUE(value); - unsigned int name_size = GRN_TEXT_LEN(value); - value++; - if (value == id_value || value == key_value) { - /* Skip _id and _key, because it's already used to get id. */ - continue; - } - col = grn_obj_column(ctx, loader->table, name, name_size); - if (!col) { - GRN_LOG(ctx, GRN_LOG_ERROR, "invalid column('%.*s')", - (int)name_size, name); - /* Automatic column creation is disabled. */ - /* - if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) { - grn_obj *v = value + 1; - col = grn_column_create(ctx, loader->table, name, name_size, - NULL, GRN_OBJ_PERSISTENT|GRN_OBJ_COLUMN_VECTOR, - grn_ctx_at(ctx, v->header.domain)); - } else { - col = grn_column_create(ctx, loader->table, name, name_size, - NULL, GRN_OBJ_PERSISTENT, - grn_ctx_at(ctx, value->header.domain)); - } - */ - } else { - if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) { - set_vector(ctx, col, id, value); - } else if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) { - set_weight_vector(ctx, col, id, value); - } else { - grn_obj_set_value(ctx, col, id, value, GRN_OBJ_SET); - } - if (ctx->rc != GRN_SUCCESS) { - grn_loader_save_error(ctx, loader); - report_set_column_value_failure(ctx, key_value, - name, name_size, value); - ERRCLR(ctx); - } - grn_obj_unlink(ctx, col); - } - } - if (loader->each) { - value = grn_expr_get_var_by_offset(ctx, loader->each, 0); - GRN_RECORD_SET(ctx, value, id); - grn_expr_exec(ctx, loader->each, 0); - } - loader->nrecords++; -exit: - if (loader->output_ids) { - GRN_UINT32_PUT(ctx, &(loader->ids), id); - } - loader->values_size = begin; -} - -#define JSON_READ_OPEN_BRACKET() do {\ - GRN_UINT32_PUT(ctx, &loader->level, loader->values_size);\ - values_add(ctx, loader);\ - loader->last->header.domain = GRN_JSON_LOAD_OPEN_BRACKET;\ - loader->stat = GRN_LOADER_TOKEN;\ - str++;\ -} while (0) - -#define JSON_READ_OPEN_BRACE() do {\ - GRN_UINT32_PUT(ctx, &loader->level, loader->values_size);\ - values_add(ctx, loader);\ - loader->last->header.domain = GRN_JSON_LOAD_OPEN_BRACE;\ - loader->stat = GRN_LOADER_TOKEN;\ - str++;\ -} while (0) - -static void -json_read(grn_ctx *ctx, grn_loader *loader, const char *str, unsigned int str_len) -{ - const char *const beg = str; - char c; - int len; - const char *se = str + str_len; - while (str < se) { - c = *str; - switch (loader->stat) { - case GRN_LOADER_BEGIN : - if ((len = grn_isspace(str, ctx->encoding))) { - str += len; - continue; - } - switch (c) { - case '[' : - JSON_READ_OPEN_BRACKET(); - break; - case '{' : - JSON_READ_OPEN_BRACE(); - break; - default : - ERR(GRN_INVALID_ARGUMENT, - "JSON must start with '[' or '{': <%.*s>", str_len, beg); - loader->stat = GRN_LOADER_END; - break; - } - break; - case GRN_LOADER_TOKEN : - if ((len = grn_isspace(str, ctx->encoding))) { - str += len; - continue; - } - switch (c) { - case '"' : - loader->stat = GRN_LOADER_STRING; - values_add(ctx, loader); - str++; - break; - case '[' : - JSON_READ_OPEN_BRACKET(); - break; - case '{' : - JSON_READ_OPEN_BRACE(); - break; - case ':' : - str++; - break; - case ',' : - str++; - break; - case ']' : - bracket_close(ctx, loader); - loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; - if (ctx->rc == GRN_CANCEL) { - loader->stat = GRN_LOADER_END; - } - str++; - break; - case '}' : - brace_close(ctx, loader); - loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; - if (ctx->rc == GRN_CANCEL) { - loader->stat = GRN_LOADER_END; - } - str++; - break; - case '+' : case '-' : case '0' : case '1' : case '2' : case '3' : - case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : - loader->stat = GRN_LOADER_NUMBER; - values_add(ctx, loader); - break; - default : - if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('_' == c)) { - loader->stat = GRN_LOADER_SYMBOL; - values_add(ctx, loader); - } else { - if ((len = grn_charlen(ctx, str, se))) { - GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char('%c') at", c); - GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg) + len, beg); - GRN_LOG(ctx, GRN_LOG_ERROR, "%*s", (int)(str - beg) + 1, "^"); - str += len; - } else { - GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c); - GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg); - str = se; - } - } - break; - } - break; - case GRN_LOADER_SYMBOL : - if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || - ('0' <= c && c <= '9') || ('_' == c)) { - GRN_TEXT_PUTC(ctx, loader->last, c); - str++; - } else { - char *v = GRN_TEXT_VALUE(loader->last); - switch (*v) { - case 'n' : - if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "null", 4)) { - loader->last->header.domain = GRN_DB_VOID; - GRN_BULK_REWIND(loader->last); - } - break; - case 't' : - if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "true", 4)) { - loader->last->header.domain = GRN_DB_BOOL; - GRN_BOOL_SET(ctx, loader->last, GRN_TRUE); - } - break; - case 'f' : - if (GRN_TEXT_LEN(loader->last) == 5 && !memcmp(v, "false", 5)) { - loader->last->header.domain = GRN_DB_BOOL; - GRN_BOOL_SET(ctx, loader->last, GRN_FALSE); - } - break; - default : - break; - } - loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; - } - break; - case GRN_LOADER_NUMBER : - switch (c) { - case '+' : case '-' : case '.' : case 'e' : case 'E' : - case '0' : case '1' : case '2' : case '3' : case '4' : - case '5' : case '6' : case '7' : case '8' : case '9' : - GRN_TEXT_PUTC(ctx, loader->last, c); - str++; - break; - default : - { - const char *cur, *str = GRN_BULK_HEAD(loader->last); - const char *str_end = GRN_BULK_CURR(loader->last); - int64_t i = grn_atoll(str, str_end, &cur); - if (cur == str_end) { - loader->last->header.domain = GRN_DB_INT64; - GRN_INT64_SET(ctx, loader->last, i); - } else if (cur != str) { - double d; - char *end; - grn_obj buf; - GRN_TEXT_INIT(&buf, 0); - GRN_TEXT_PUT(ctx, &buf, str, GRN_BULK_VSIZE(loader->last)); - GRN_TEXT_PUTC(ctx, &buf, '\0'); - errno = 0; - d = strtod(GRN_TEXT_VALUE(&buf), &end); - if (!errno && end + 1 == GRN_BULK_CURR(&buf)) { - loader->last->header.domain = GRN_DB_FLOAT; - GRN_FLOAT_SET(ctx, loader->last, d); - } - GRN_OBJ_FIN(ctx, &buf); - } - } - loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; - break; - } - break; - case GRN_LOADER_STRING : - switch (c) { - case '\\' : - loader->stat = GRN_LOADER_STRING_ESC; - str++; - break; - case '"' : - str++; - loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; - /* - *(GRN_BULK_CURR(loader->last)) = '\0'; - GRN_LOG(ctx, GRN_LOG_ALERT, "read str(%s)", GRN_TEXT_VALUE(loader->last)); - */ - break; - default : - if ((len = grn_charlen(ctx, str, se))) { - GRN_TEXT_PUT(ctx, loader->last, str, len); - str += len; - } else { - GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c); - GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg); - str = se; - } - break; - } - break; - case GRN_LOADER_STRING_ESC : - switch (c) { - case 'b' : - GRN_TEXT_PUTC(ctx, loader->last, '\b'); - loader->stat = GRN_LOADER_STRING; - break; - case 'f' : - GRN_TEXT_PUTC(ctx, loader->last, '\f'); - loader->stat = GRN_LOADER_STRING; - break; - case 'n' : - GRN_TEXT_PUTC(ctx, loader->last, '\n'); - loader->stat = GRN_LOADER_STRING; - break; - case 'r' : - GRN_TEXT_PUTC(ctx, loader->last, '\r'); - loader->stat = GRN_LOADER_STRING; - break; - case 't' : - GRN_TEXT_PUTC(ctx, loader->last, '\t'); - loader->stat = GRN_LOADER_STRING; - break; - case 'u' : - loader->stat = GRN_LOADER_UNICODE0; - break; - default : - GRN_TEXT_PUTC(ctx, loader->last, c); - loader->stat = GRN_LOADER_STRING; - break; - } - str++; - break; - case GRN_LOADER_UNICODE0 : - switch (c) { - case '0' : case '1' : case '2' : case '3' : case '4' : - case '5' : case '6' : case '7' : case '8' : case '9' : - loader->unichar = (c - '0') * 0x1000; - break; - case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : - loader->unichar = (c - 'a' + 10) * 0x1000; - break; - case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : - loader->unichar = (c - 'A' + 10) * 0x1000; - break; - default : - ;// todo : error - } - loader->stat = GRN_LOADER_UNICODE1; - str++; - break; - case GRN_LOADER_UNICODE1 : - switch (c) { - case '0' : case '1' : case '2' : case '3' : case '4' : - case '5' : case '6' : case '7' : case '8' : case '9' : - loader->unichar += (c - '0') * 0x100; - break; - case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : - loader->unichar += (c - 'a' + 10) * 0x100; - break; - case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : - loader->unichar += (c - 'A' + 10) * 0x100; - break; - default : - ;// todo : error - } - loader->stat = GRN_LOADER_UNICODE2; - str++; - break; - case GRN_LOADER_UNICODE2 : - switch (c) { - case '0' : case '1' : case '2' : case '3' : case '4' : - case '5' : case '6' : case '7' : case '8' : case '9' : - loader->unichar += (c - '0') * 0x10; - break; - case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : - loader->unichar += (c - 'a' + 10) * 0x10; - break; - case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : - loader->unichar += (c - 'A' + 10) * 0x10; - break; - default : - ;// todo : error - } - loader->stat = GRN_LOADER_UNICODE3; - str++; - break; - case GRN_LOADER_UNICODE3 : - switch (c) { - case '0' : case '1' : case '2' : case '3' : case '4' : - case '5' : case '6' : case '7' : case '8' : case '9' : - loader->unichar += (c - '0'); - break; - case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : - loader->unichar += (c - 'a' + 10); - break; - case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : - loader->unichar += (c - 'A' + 10); - break; - default : - ;// todo : error - } - { - uint32_t u = loader->unichar; - if (u < 0x80) { - GRN_TEXT_PUTC(ctx, loader->last, u); - } else { - if (u < 0x800) { - GRN_TEXT_PUTC(ctx, loader->last, ((u >> 6) & 0x1f) | 0xc0); - } else { - GRN_TEXT_PUTC(ctx, loader->last, (u >> 12) | 0xe0); - GRN_TEXT_PUTC(ctx, loader->last, ((u >> 6) & 0x3f) | 0x80); - } - GRN_TEXT_PUTC(ctx, loader->last, (u & 0x3f) | 0x80); - } - } - loader->stat = GRN_LOADER_STRING; - str++; - break; - case GRN_LOADER_END : - str = se; - break; - } - } -} - -#undef JSON_READ_OPEN_BRACKET -#undef JSON_READ_OPEN_BRACE - -/* - * grn_loader_parse_columns parses a columns parameter. - * Columns except _id and _key are appended to loader->columns. - * If it contains _id or _key, loader->id_offset or loader->key_offset is set. - */ -static grn_rc -grn_loader_parse_columns(grn_ctx *ctx, grn_loader *loader, - const char *str, unsigned int str_size) -{ - const char *ptr = str, *ptr_end = ptr + str_size, *rest; - const char *tokens[256], *token_end; - while (ptr < ptr_end) { - int i, n = tokenize(ptr, ptr_end - ptr, tokens, 256, &rest); - for (i = 0; i < n; i++) { - grn_obj *column; - token_end = tokens[i]; - while (ptr < token_end && (' ' == *ptr || ',' == *ptr)) { - ptr++; - } - column = grn_obj_column(ctx, loader->table, ptr, token_end - ptr); - if (!column) { - ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s>", - (int)(token_end - ptr), ptr); - return ctx->rc; - } - if (name_equal(ptr, token_end - ptr, GRN_COLUMN_NAME_ID)) { - grn_obj_unlink(ctx, column); - if (loader->id_offset != -1 || loader->key_offset != -1) { - /* _id and _key must not appear more than once. */ - if (loader->id_offset != -1) { - ERR(GRN_INVALID_ARGUMENT, - "duplicated id and key columns: <%s> at %d and <%s> at %d", - GRN_COLUMN_NAME_ID, i, - GRN_COLUMN_NAME_ID, loader->id_offset); - } else { - ERR(GRN_INVALID_ARGUMENT, - "duplicated id and key columns: <%s> at %d and <%s> at %d", - GRN_COLUMN_NAME_ID, i, - GRN_COLUMN_NAME_KEY, loader->key_offset); - } - return ctx->rc; - } - loader->id_offset = i; - } else if (name_equal(ptr, token_end - ptr, GRN_COLUMN_NAME_KEY)) { - grn_obj_unlink(ctx, column); - if (loader->id_offset != -1 || loader->key_offset != -1) { - /* _id and _key must not appear more than once. */ - if (loader->id_offset != -1) { - ERR(GRN_INVALID_ARGUMENT, - "duplicated id and key columns: <%s> at %d and <%s> at %d", - GRN_COLUMN_NAME_KEY, i, - GRN_COLUMN_NAME_ID, loader->id_offset); - } else { - ERR(GRN_INVALID_ARGUMENT, - "duplicated id and key columns: <%s> at %d and <%s> at %d", - GRN_COLUMN_NAME_KEY, i, - GRN_COLUMN_NAME_KEY, loader->key_offset); - } - return ctx->rc; - } - loader->key_offset = i; - } else { - GRN_PTR_PUT(ctx, &loader->columns, column); - } - ptr = token_end; - } - ptr = rest; - } - switch (loader->table->header.type) { - case GRN_TABLE_HASH_KEY : - case GRN_TABLE_PAT_KEY : - case GRN_TABLE_DAT_KEY : - if (loader->id_offset == -1 && loader->key_offset == -1) { - ERR(GRN_INVALID_ARGUMENT, "missing id or key column"); - return ctx->rc; - } - break; - } - return ctx->rc; -} - -static grn_com_addr *addr; - -void -grn_load_(grn_ctx *ctx, grn_content_type input_type, - const char *table, unsigned int table_len, - const char *columns, unsigned int columns_len, - const char *values, unsigned int values_len, - const char *ifexists, unsigned int ifexists_len, - const char *each, unsigned int each_len, - grn_obj *output_ids, - uint32_t emit_level) -{ - grn_loader *loader = &ctx->impl->loader; - loader->emit_level = emit_level; - if (ctx->impl->edge) { - grn_edge *edge = grn_edges_add_communicator(ctx, addr); - grn_obj *msg = grn_msg_open(ctx, edge->com, &ctx->impl->edge->send_old); - /* build msg */ - grn_edge_dispatch(ctx, edge, msg); - } - if (table && table_len) { - grn_ctx_loader_clear(ctx); - loader->input_type = input_type; - if (grn_db_check_name(ctx, table, table_len)) { - GRN_DB_CHECK_NAME_ERR("[table][load]", table, table_len); - loader->stat = GRN_LOADER_END; - return; - } - loader->table = grn_ctx_get(ctx, table, table_len); - if (!loader->table) { - ERR(GRN_INVALID_ARGUMENT, "nonexistent table: <%.*s>", table_len, table); - loader->stat = GRN_LOADER_END; - return; - } - if (columns && columns_len) { - grn_rc rc = grn_loader_parse_columns(ctx, loader, columns, columns_len); - if (rc != GRN_SUCCESS) { - loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; - loader->stat = GRN_LOADER_END; - return; - } - loader->columns_status = GRN_LOADER_COLUMNS_SET; - } - if (ifexists && ifexists_len) { - grn_obj *v; - GRN_EXPR_CREATE_FOR_QUERY(ctx, loader->table, loader->ifexists, v); - if (loader->ifexists && v) { - grn_expr_parse(ctx, loader->ifexists, ifexists, ifexists_len, - NULL, GRN_OP_EQUAL, GRN_OP_AND, - GRN_EXPR_SYNTAX_SCRIPT|GRN_EXPR_ALLOW_UPDATE); - } - } - if (each && each_len) { - grn_obj *v; - GRN_EXPR_CREATE_FOR_QUERY(ctx, loader->table, loader->each, v); - if (loader->each && v) { - grn_expr_parse(ctx, loader->each, each, each_len, - NULL, GRN_OP_EQUAL, GRN_OP_AND, - GRN_EXPR_SYNTAX_SCRIPT|GRN_EXPR_ALLOW_UPDATE); - } - } - if (output_ids && GRN_TEXT_LEN(output_ids) > 0) { - loader->output_ids = - grn_proc_option_value_bool(ctx, output_ids, GRN_FALSE); - } - } else { - if (!loader->table) { - ERR(GRN_INVALID_ARGUMENT, "mandatory \"table\" parameter is absent"); - loader->stat = GRN_LOADER_END; - return; - } - input_type = loader->input_type; - } - switch (input_type) { - case GRN_CONTENT_JSON : - json_read(ctx, loader, values, values_len); - break; - case GRN_CONTENT_NONE : - case GRN_CONTENT_TSV : - case GRN_CONTENT_XML : - case GRN_CONTENT_MSGPACK : - case GRN_CONTENT_GROONGA_COMMAND_LIST : - ERR(GRN_FUNCTION_NOT_IMPLEMENTED, "unsupported input_type"); - loader->stat = GRN_LOADER_END; - // todo - break; - } -} - -grn_rc -grn_load(grn_ctx *ctx, grn_content_type input_type, - const char *table, unsigned int table_len, - const char *columns, unsigned int columns_len, - const char *values, unsigned int values_len, - const char *ifexists, unsigned int ifexists_len, - const char *each, unsigned int each_len) -{ - if (!ctx || !ctx->impl) { - ERR(GRN_INVALID_ARGUMENT, "db not initialized"); - return ctx->rc; - } - GRN_API_ENTER; - grn_load_(ctx, input_type, table, table_len, - columns, columns_len, values, values_len, - ifexists, ifexists_len, each, each_len, - NULL, - 1); - GRN_API_RETURN(ctx->rc); -} - static void grn_db_recover_database(grn_ctx *ctx, grn_obj *db) { Modified: lib/grn_db.h (+0 -12) =================================================================== --- lib/grn_db.h 2017-02-03 14:15:00 +0900 (973ad81) +++ lib/grn_db.h 2017-02-03 14:16:38 +0900 (8f8599d) @@ -38,9 +38,6 @@ extern "C" { #define GRN_N_RESERVED_TYPES 256 -#define GRN_JSON_LOAD_OPEN_BRACKET 0x40000000 -#define GRN_JSON_LOAD_OPEN_BRACE 0x40000001 - typedef struct _grn_db grn_db; typedef struct _grn_proc grn_proc; @@ -413,15 +410,6 @@ GRN_API grn_rc grn_expr_inspect(grn_ctx *ctx, grn_obj *buf, grn_obj *expr); grn_hash *grn_expr_get_vars(grn_ctx *ctx, grn_obj *expr, unsigned int *nvars); grn_obj *grn_expr_open(grn_ctx *ctx, grn_obj_spec *spec, const uint8_t *p, const uint8_t *pe); -GRN_API void grn_load_(grn_ctx *ctx, grn_content_type input_type, - const char *table, unsigned int table_len, - const char *columns, unsigned int columns_len, - const char *values, unsigned int values_len, - const char *ifexists, unsigned int ifexists_len, - const char *each, unsigned int each_len, - grn_obj *output_ids, - uint32_t emit_level); - GRN_API grn_rc grn_table_group_with_range_gap(grn_ctx *ctx, grn_obj *table, grn_table_sort_key *group_key, grn_obj *result_set, Added: lib/grn_load.h (+41 -0) 100644 =================================================================== --- /dev/null +++ lib/grn_load.h 2017-02-03 14:16:38 +0900 (125e1dd) @@ -0,0 +1,41 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2009-2017 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#pragma once + +#include "grn.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define GRN_JSON_LOAD_OPEN_BRACKET 0x40000000 +#define GRN_JSON_LOAD_OPEN_BRACE 0x40000001 + +GRN_API void grn_load_(grn_ctx *ctx, grn_content_type input_type, + const char *table, unsigned int table_len, + const char *columns, unsigned int columns_len, + const char *values, unsigned int values_len, + const char *ifexists, unsigned int ifexists_len, + const char *each, unsigned int each_len, + grn_obj *output_ids, + uint32_t emit_level); + +#ifdef __cplusplus +} +#endif Added: lib/load.c (+1154 -0) 100644 =================================================================== --- /dev/null +++ lib/load.c 2017-02-03 14:16:38 +0900 (919a1f0) @@ -0,0 +1,1154 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2009-2017 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "grn_load.h" +#include "grn_ctx_impl.h" +#include "grn_db.h" +#include "grn_util.h" +#include "grn_proc.h" + +static grn_obj * +values_add(grn_ctx *ctx, grn_loader *loader) +{ + grn_obj *res; + uint32_t curr_size = loader->values_size * sizeof(grn_obj); + if (curr_size < GRN_TEXT_LEN(&loader->values)) { + res = (grn_obj *)(GRN_TEXT_VALUE(&loader->values) + curr_size); + res->header.domain = GRN_DB_TEXT; + GRN_BULK_REWIND(res); + } else { + if (grn_bulk_space(ctx, &loader->values, sizeof(grn_obj))) { return NULL; } + res = (grn_obj *)(GRN_TEXT_VALUE(&loader->values) + curr_size); + GRN_TEXT_INIT(res, 0); + } + loader->values_size++; + loader->last = res; + return res; +} + +static grn_obj * +values_next(grn_ctx *ctx, grn_obj *value) +{ + if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET || + value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) { + value += GRN_UINT32_VALUE(value); + } + return value + 1; +} + +static int +values_len(grn_ctx *ctx, grn_obj *head, grn_obj *tail) +{ + int len; + for (len = 0; head < tail; head = values_next(ctx, head), len++) ; + return len; +} + +static grn_id +loader_add(grn_ctx *ctx, grn_obj *key) +{ + int added = 0; + grn_loader *loader = &ctx->impl->loader; + grn_id id = grn_table_add_by_key(ctx, loader->table, key, &added); + if (!added && loader->ifexists) { + grn_obj *v = grn_expr_get_var_by_offset(ctx, loader->ifexists, 0); + grn_obj *result; + GRN_RECORD_SET(ctx, v, id); + result = grn_expr_exec(ctx, loader->ifexists, 0); + if (!grn_obj_is_true(ctx, result)) { + id = 0; + } + } + return id; +} + +static void +set_vector(grn_ctx *ctx, grn_obj *column, grn_id id, grn_obj *vector) +{ + int n = GRN_UINT32_VALUE(vector); + grn_obj buf, *v = vector + 1; + grn_id range_id; + grn_obj *range; + + range_id = DB_OBJ(column)->range; + range = grn_ctx_at(ctx, range_id); + if (grn_obj_is_table(ctx, range)) { + GRN_RECORD_INIT(&buf, GRN_OBJ_VECTOR, range_id); + while (n--) { + grn_bool cast_failed = GRN_FALSE; + grn_obj record, *element = v; + if (range_id != element->header.domain) { + GRN_RECORD_INIT(&record, 0, range_id); + if (grn_obj_cast(ctx, element, &record, GRN_TRUE)) { + cast_failed = GRN_TRUE; + ERR_CAST(column, range, element); + } + element = &record; + } + if (!cast_failed) { + GRN_UINT32_PUT(ctx, &buf, GRN_RECORD_VALUE(element)); + } + if (element == &record) { GRN_OBJ_FIN(ctx, element); } + v = values_next(ctx, v); + } + } else { + if (((struct _grn_type *)range)->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) { + GRN_TEXT_INIT(&buf, GRN_OBJ_VECTOR); + while (n--) { + if (v->header.domain == GRN_DB_TEXT) { + grn_bool cast_failed = GRN_FALSE; + grn_obj casted_element, *element = v; + if (range_id != element->header.domain) { + GRN_OBJ_INIT(&casted_element, GRN_BULK, 0, range_id); + if (grn_obj_cast(ctx, element, &casted_element, GRN_TRUE)) { + cast_failed = GRN_TRUE; + ERR_CAST(column, range, element); + } + element = &casted_element; + } + if (!cast_failed) { + grn_vector_add_element(ctx, &buf, + GRN_TEXT_VALUE(element), + GRN_TEXT_LEN(element), 0, + element->header.domain); + } + if (element == &casted_element) { GRN_OBJ_FIN(ctx, element); } + } else { + ERR(GRN_INVALID_ARGUMENT, "bad syntax."); + } + v = values_next(ctx, v); + } + } else { + grn_id value_size = ((grn_db_obj *)range)->range; + GRN_VALUE_FIX_SIZE_INIT(&buf, GRN_OBJ_VECTOR, range_id); + while (n--) { + grn_bool cast_failed = GRN_FALSE; + grn_obj casted_element, *element = v; + if (range_id != element->header.domain) { + GRN_OBJ_INIT(&casted_element, GRN_BULK, 0, range_id); + if (grn_obj_cast(ctx, element, &casted_element, GRN_TRUE)) { + cast_failed = GRN_TRUE; + ERR_CAST(column, range, element); + } + element = &casted_element; + } + if (!cast_failed) { + grn_bulk_write(ctx, &buf, GRN_TEXT_VALUE(element), value_size); + } + if (element == &casted_element) { GRN_OBJ_FIN(ctx, element); } + v = values_next(ctx, v); + } + } + } + grn_obj_set_value(ctx, column, id, &buf, GRN_OBJ_SET); + GRN_OBJ_FIN(ctx, &buf); +} + +static void +set_weight_vector(grn_ctx *ctx, grn_obj *column, grn_id id, grn_obj *index_value) +{ + if (!grn_obj_is_weight_vector_column(ctx, column)) { + char column_name[GRN_TABLE_MAX_KEY_SIZE]; + int column_name_size; + column_name_size = grn_obj_name(ctx, column, column_name, + GRN_TABLE_MAX_KEY_SIZE); + ERR(GRN_INVALID_ARGUMENT, + "<%.*s>: columns except weight vector column don't support object value", + column_name_size, column_name); + return; + } + + { + unsigned int i, n; + grn_obj vector; + grn_obj weight_buffer; + + n = GRN_UINT32_VALUE(index_value); + GRN_TEXT_INIT(&vector, GRN_OBJ_VECTOR); + GRN_UINT32_INIT(&weight_buffer, 0); + for (i = 0; i < n; i += 2) { + grn_rc rc; + grn_obj *key, *weight; + + key = index_value + 1 + i; + weight = key + 1; + + GRN_BULK_REWIND(&weight_buffer); + rc = grn_obj_cast(ctx, weight, &weight_buffer, GRN_TRUE); + if (rc != GRN_SUCCESS) { + grn_obj *range; + range = grn_ctx_at(ctx, weight_buffer.header.domain); + ERR_CAST(column, range, weight); + grn_obj_unlink(ctx, range); + break; + } + grn_vector_add_element(ctx, &vector, + GRN_BULK_HEAD(key), GRN_BULK_VSIZE(key), + GRN_UINT32_VALUE(&weight_buffer), + key->header.domain); + } + grn_obj_set_value(ctx, column, id, &vector, GRN_OBJ_SET); + GRN_OBJ_FIN(ctx, &vector); + } +} + +static inline int +name_equal(const char *p, unsigned int size, const char *name) +{ + if (strlen(name) != size) { return 0; } + if (*p != GRN_DB_PSEUDO_COLUMN_PREFIX) { return 0; } + return !memcmp(p + 1, name + 1, size - 1); +} + +static void +report_set_column_value_failure(grn_ctx *ctx, + grn_obj *key, + const char *column_name, + unsigned int column_name_size, + grn_obj *column_value) +{ + grn_obj key_inspected, column_value_inspected; + + GRN_TEXT_INIT(&key_inspected, 0); + GRN_TEXT_INIT(&column_value_inspected, 0); + grn_inspect_limited(ctx, &key_inspected, key); + grn_inspect_limited(ctx, &column_value_inspected, column_value); + GRN_LOG(ctx, GRN_LOG_ERROR, + "[table][load] failed to set column value: %s: " + "key: <%.*s>, column: <%.*s>, value: <%.*s>", + ctx->errbuf, + (int)GRN_TEXT_LEN(&key_inspected), + GRN_TEXT_VALUE(&key_inspected), + column_name_size, + column_name, + (int)GRN_TEXT_LEN(&column_value_inspected), + GRN_TEXT_VALUE(&column_value_inspected)); + GRN_OBJ_FIN(ctx, &key_inspected); + GRN_OBJ_FIN(ctx, &column_value_inspected); +} + +static void +grn_loader_save_error(grn_ctx *ctx, grn_loader *loader) +{ + loader->rc = ctx->rc; + grn_strcpy(loader->errbuf, GRN_CTX_MSGSIZE, ctx->errbuf); +} + +static grn_id +parse_id_value(grn_ctx *ctx, grn_obj *value) +{ + switch (value->header.type) { + case GRN_DB_UINT32 : + return GRN_UINT32_VALUE(value); + case GRN_DB_INT32 : + return GRN_INT32_VALUE(value); + default : + { + grn_id id = GRN_ID_NIL; + grn_obj casted_value; + GRN_UINT32_INIT(&casted_value, 0); + if (grn_obj_cast(ctx, value, &casted_value, GRN_FALSE) != GRN_SUCCESS) { + grn_obj inspected; + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, value); + ERR(GRN_INVALID_ARGUMENT, + "<%s>: failed to cast to <UInt32>: <%.*s>", + GRN_COLUMN_NAME_ID, + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + } else { + id = GRN_UINT32_VALUE(&casted_value); + } + GRN_OBJ_FIN(ctx, &casted_value); + return id; + } + } +} + +static void +bracket_close(grn_ctx *ctx, grn_loader *loader) +{ + grn_id id = GRN_ID_NIL; + grn_obj *value, *value_end, *id_value = NULL, *key_value = NULL; + grn_obj *col, **cols; /* Columns except _id and _key. */ + uint32_t i, begin; + uint32_t ncols; /* Number of columns except _id and _key. */ + uint32_t nvalues; /* Number of values in brackets. */ + uint32_t depth; + + cols = (grn_obj **)GRN_BULK_HEAD(&loader->columns); + ncols = GRN_BULK_VSIZE(&loader->columns) / sizeof(grn_obj *); + GRN_UINT32_POP(&loader->level, begin); + value = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + begin; + value_end = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + loader->values_size; + GRN_ASSERT(value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET); + GRN_UINT32_SET(ctx, value, loader->values_size - begin - 1); + value++; + depth = GRN_BULK_VSIZE(&loader->level); + if (depth > sizeof(uint32_t) * loader->emit_level) { + return; + } + if (depth == 0 || !loader->table || + loader->columns_status == GRN_LOADER_COLUMNS_BROKEN) { + goto exit; + } + nvalues = values_len(ctx, value, value_end); + + if (loader->columns_status == GRN_LOADER_COLUMNS_UNSET) { + /* + * Target columns and _id or _key are not specified yet and values are + * handled as column names and "_id" or "_key". + */ + for (i = 0; i < nvalues; i++) { + const char *col_name; + unsigned int col_name_size; + if (value->header.domain != GRN_DB_TEXT) { + grn_obj buffer; + GRN_TEXT_INIT(&buffer, 0); + grn_inspect(ctx, &buffer, value); + ERR(GRN_INVALID_ARGUMENT, + "column name must be string: <%.*s>", + (int)GRN_TEXT_LEN(&buffer), GRN_TEXT_VALUE(&buffer)); + grn_loader_save_error(ctx, loader); + GRN_OBJ_FIN(ctx, &buffer); + loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; + goto exit; + } + col_name = GRN_TEXT_VALUE(value); + col_name_size = GRN_TEXT_LEN(value); + col = grn_obj_column(ctx, loader->table, col_name, col_name_size); + if (!col) { + ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s>", + col_name_size, col_name); + grn_loader_save_error(ctx, loader); + loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; + goto exit; + } + if (name_equal(col_name, col_name_size, GRN_COLUMN_NAME_ID)) { + grn_obj_unlink(ctx, col); + if (loader->id_offset != -1 || loader->key_offset != -1) { + /* _id and _key must not appear more than once. */ + if (loader->id_offset != -1) { + ERR(GRN_INVALID_ARGUMENT, + "duplicated id and key columns: <%s> at %d and <%s> at %d", + GRN_COLUMN_NAME_ID, i, + GRN_COLUMN_NAME_ID, loader->id_offset); + } else { + ERR(GRN_INVALID_ARGUMENT, + "duplicated id and key columns: <%s> at %d and <%s> at %d", + GRN_COLUMN_NAME_ID, i, + GRN_COLUMN_NAME_KEY, loader->key_offset); + } + grn_loader_save_error(ctx, loader); + loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; + goto exit; + } + loader->id_offset = i; + } else if (name_equal(col_name, col_name_size, GRN_COLUMN_NAME_KEY)) { + grn_obj_unlink(ctx, col); + if (loader->id_offset != -1 || loader->key_offset != -1) { + /* _id and _key must not appear more than once. */ + if (loader->id_offset != -1) { + ERR(GRN_INVALID_ARGUMENT, + "duplicated id and key columns: <%s> at %d and <%s> at %d", + GRN_COLUMN_NAME_KEY, i, + GRN_COLUMN_NAME_ID, loader->id_offset); + } else { + ERR(GRN_INVALID_ARGUMENT, + "duplicated id and key columns: <%s> at %d and <%s> at %d", + GRN_COLUMN_NAME_KEY, i, + GRN_COLUMN_NAME_KEY, loader->key_offset); + } + grn_loader_save_error(ctx, loader); + loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; + goto exit; + } + loader->key_offset = i; + } else { + GRN_PTR_PUT(ctx, &loader->columns, col); + } + value++; + } + switch (loader->table->header.type) { + case GRN_TABLE_HASH_KEY : + case GRN_TABLE_PAT_KEY : + case GRN_TABLE_DAT_KEY : + if (loader->id_offset == -1 && loader->key_offset == -1) { + ERR(GRN_INVALID_ARGUMENT, "missing id or key column"); + grn_loader_save_error(ctx, loader); + loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; + goto exit; + } + break; + } + loader->columns_status = GRN_LOADER_COLUMNS_SET; + goto exit; + } + + /* Target columns and _id or _key are already specified. */ + if (!nvalues) { + /* + * Accept empty arrays because a dump command may output a load command + * which contains empty arrays for a table with deleted records. + */ + id = grn_table_add(ctx, loader->table, NULL, 0, NULL); + } else { + uint32_t expected_nvalues = ncols; + if (loader->id_offset != -1 || loader->key_offset != -1) { + expected_nvalues++; + } + if (nvalues != expected_nvalues) { + ERR(GRN_INVALID_ARGUMENT, + "unexpected #values: expected:%u, actual:%u", + expected_nvalues, nvalues); + goto exit; + } + if (loader->id_offset != -1) { + id_value = value + loader->id_offset; + id = parse_id_value(ctx, id_value); + if (grn_table_at(ctx, loader->table, id) == GRN_ID_NIL) { + id = grn_table_add(ctx, loader->table, NULL, 0, NULL); + } + } else if (loader->key_offset != -1) { + key_value = value + loader->key_offset; + id = loader_add(ctx, key_value); + } else { + id = grn_table_add(ctx, loader->table, NULL, 0, NULL); + } + } + if (id == GRN_ID_NIL) { + /* Target record is not available. */ + goto exit; + } + + for (i = 0; i < nvalues; i++, value = values_next(ctx, value)) { + if (i == loader->id_offset || i == loader->key_offset) { + /* Skip _id and _key, because it's already used to get id. */ + continue; + } + col = *cols; + if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) { + set_vector(ctx, col, id, value); + } else if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) { + set_weight_vector(ctx, col, id, value); + } else { + grn_obj_set_value(ctx, col, id, value, GRN_OBJ_SET); + } + if (ctx->rc != GRN_SUCCESS) { + char column_name[GRN_TABLE_MAX_KEY_SIZE]; + unsigned int column_name_size; + grn_loader_save_error(ctx, loader); + column_name_size = grn_obj_name(ctx, col, column_name, + GRN_TABLE_MAX_KEY_SIZE); + report_set_column_value_failure(ctx, key_value, + column_name, column_name_size, + value); + ERRCLR(ctx); + } + cols++; + } + if (loader->each) { + grn_obj *v = grn_expr_get_var_by_offset(ctx, loader->each, 0); + GRN_RECORD_SET(ctx, v, id); + grn_expr_exec(ctx, loader->each, 0); + } + loader->nrecords++; +exit: + if (depth > 0 && loader->output_ids) { + GRN_UINT32_PUT(ctx, &(loader->ids), id); + } + loader->values_size = begin; +} + +static void +brace_close(grn_ctx *ctx, grn_loader *loader) +{ + grn_id id = GRN_ID_NIL; + grn_obj *value, *value_begin, *value_end; + grn_obj *id_value = NULL, *key_value = NULL; + uint32_t begin; + + GRN_UINT32_POP(&loader->level, begin); + value_begin = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + begin; + value_end = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + loader->values_size; + GRN_ASSERT(value->header.domain == GRN_JSON_LOAD_OPEN_BRACE); + GRN_UINT32_SET(ctx, value_begin, loader->values_size - begin - 1); + value_begin++; + if (GRN_BULK_VSIZE(&loader->level) > sizeof(uint32_t) * loader->emit_level) { + return; + } + if (!loader->table) { + goto exit; + } + + /* Scan values to find _id or _key. */ + for (value = value_begin; value + 1 < value_end; + value = values_next(ctx, value)) { + const char *name = GRN_TEXT_VALUE(value); + unsigned int name_size = GRN_TEXT_LEN(value); + if (value->header.domain != GRN_DB_TEXT) { + grn_obj buffer; + GRN_TEXT_INIT(&buffer, 0); + grn_inspect(ctx, &buffer, value); + GRN_LOG(ctx, GRN_LOG_ERROR, + "column name must be string: <%.*s>", + (int)GRN_TEXT_LEN(&buffer), GRN_TEXT_VALUE(&buffer)); + GRN_OBJ_FIN(ctx, &buffer); + goto exit; + } + value++; + if (name_equal(name, name_size, GRN_COLUMN_NAME_ID)) { + if (id_value || key_value) { + if (loader->table->header.type == GRN_TABLE_NO_KEY) { + GRN_LOG(ctx, GRN_LOG_ERROR, "duplicated '_id' column"); + goto exit; + } else { + GRN_LOG(ctx, GRN_LOG_ERROR, + "duplicated key columns: %s and %s", + id_value ? GRN_COLUMN_NAME_ID : GRN_COLUMN_NAME_KEY, + GRN_COLUMN_NAME_ID); + goto exit; + } + } + id_value = value; + } else if (name_equal(name, name_size, GRN_COLUMN_NAME_KEY)) { + if (id_value || key_value) { + GRN_LOG(ctx, GRN_LOG_ERROR, + "duplicated key columns: %s and %s", + id_value ? GRN_COLUMN_NAME_ID : GRN_COLUMN_NAME_KEY, + GRN_COLUMN_NAME_KEY); + goto exit; + } + key_value = value; + } + } + + switch (loader->table->header.type) { + case GRN_TABLE_HASH_KEY : + case GRN_TABLE_PAT_KEY : + case GRN_TABLE_DAT_KEY : + /* The target table requires _id or _key. */ + if (!id_value && !key_value) { + GRN_LOG(ctx, GRN_LOG_ERROR, "neither _key nor _id is assigned"); + goto exit; + } + break; + default : + /* The target table does not have _key. */ + if (key_value) { + GRN_LOG(ctx, GRN_LOG_ERROR, "nonexistent key value"); + goto exit; + } + break; + } + + if (id_value) { + id = parse_id_value(ctx, id_value); + if (grn_table_at(ctx, loader->table, id) == GRN_ID_NIL) { + if (ctx->rc == GRN_SUCCESS) { + id = grn_table_add(ctx, loader->table, NULL, 0, NULL); + } + } + } else if (key_value) { + id = loader_add(ctx, key_value); + } else { + id = grn_table_add(ctx, loader->table, NULL, 0, NULL); + } + if (id == GRN_ID_NIL) { + /* Target record is not available. */ + goto exit; + } + + for (value = value_begin; value + 1 < value_end; + value = values_next(ctx, value)) { + grn_obj *col; + const char *name = GRN_TEXT_VALUE(value); + unsigned int name_size = GRN_TEXT_LEN(value); + value++; + if (value == id_value || value == key_value) { + /* Skip _id and _key, because it's already used to get id. */ + continue; + } + col = grn_obj_column(ctx, loader->table, name, name_size); + if (!col) { + GRN_LOG(ctx, GRN_LOG_ERROR, "invalid column('%.*s')", + (int)name_size, name); + /* Automatic column creation is disabled. */ + /* + if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) { + grn_obj *v = value + 1; + col = grn_column_create(ctx, loader->table, name, name_size, + NULL, GRN_OBJ_PERSISTENT|GRN_OBJ_COLUMN_VECTOR, + grn_ctx_at(ctx, v->header.domain)); + } else { + col = grn_column_create(ctx, loader->table, name, name_size, + NULL, GRN_OBJ_PERSISTENT, + grn_ctx_at(ctx, value->header.domain)); + } + */ + } else { + if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) { + set_vector(ctx, col, id, value); + } else if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) { + set_weight_vector(ctx, col, id, value); + } else { + grn_obj_set_value(ctx, col, id, value, GRN_OBJ_SET); + } + if (ctx->rc != GRN_SUCCESS) { + grn_loader_save_error(ctx, loader); + report_set_column_value_failure(ctx, key_value, + name, name_size, value); + ERRCLR(ctx); + } + grn_obj_unlink(ctx, col); + } + } + if (loader->each) { + value = grn_expr_get_var_by_offset(ctx, loader->each, 0); + GRN_RECORD_SET(ctx, value, id); + grn_expr_exec(ctx, loader->each, 0); + } + loader->nrecords++; +exit: + if (loader->output_ids) { + GRN_UINT32_PUT(ctx, &(loader->ids), id); + } + loader->values_size = begin; +} + +#define JSON_READ_OPEN_BRACKET() do {\ + GRN_UINT32_PUT(ctx, &loader->level, loader->values_size);\ + values_add(ctx, loader);\ + loader->last->header.domain = GRN_JSON_LOAD_OPEN_BRACKET;\ + loader->stat = GRN_LOADER_TOKEN;\ + str++;\ +} while (0) + +#define JSON_READ_OPEN_BRACE() do {\ + GRN_UINT32_PUT(ctx, &loader->level, loader->values_size);\ + values_add(ctx, loader);\ + loader->last->header.domain = GRN_JSON_LOAD_OPEN_BRACE;\ + loader->stat = GRN_LOADER_TOKEN;\ + str++;\ +} while (0) + +static void +json_read(grn_ctx *ctx, grn_loader *loader, const char *str, unsigned int str_len) +{ + const char *const beg = str; + char c; + int len; + const char *se = str + str_len; + while (str < se) { + c = *str; + switch (loader->stat) { + case GRN_LOADER_BEGIN : + if ((len = grn_isspace(str, ctx->encoding))) { + str += len; + continue; + } + switch (c) { + case '[' : + JSON_READ_OPEN_BRACKET(); + break; + case '{' : + JSON_READ_OPEN_BRACE(); + break; + default : + ERR(GRN_INVALID_ARGUMENT, + "JSON must start with '[' or '{': <%.*s>", str_len, beg); + loader->stat = GRN_LOADER_END; + break; + } + break; + case GRN_LOADER_TOKEN : + if ((len = grn_isspace(str, ctx->encoding))) { + str += len; + continue; + } + switch (c) { + case '"' : + loader->stat = GRN_LOADER_STRING; + values_add(ctx, loader); + str++; + break; + case '[' : + JSON_READ_OPEN_BRACKET(); + break; + case '{' : + JSON_READ_OPEN_BRACE(); + break; + case ':' : + str++; + break; + case ',' : + str++; + break; + case ']' : + bracket_close(ctx, loader); + loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; + if (ctx->rc == GRN_CANCEL) { + loader->stat = GRN_LOADER_END; + } + str++; + break; + case '}' : + brace_close(ctx, loader); + loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; + if (ctx->rc == GRN_CANCEL) { + loader->stat = GRN_LOADER_END; + } + str++; + break; + case '+' : case '-' : case '0' : case '1' : case '2' : case '3' : + case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : + loader->stat = GRN_LOADER_NUMBER; + values_add(ctx, loader); + break; + default : + if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('_' == c)) { + loader->stat = GRN_LOADER_SYMBOL; + values_add(ctx, loader); + } else { + if ((len = grn_charlen(ctx, str, se))) { + GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char('%c') at", c); + GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg) + len, beg); + GRN_LOG(ctx, GRN_LOG_ERROR, "%*s", (int)(str - beg) + 1, "^"); + str += len; + } else { + GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c); + GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg); + str = se; + } + } + break; + } + break; + case GRN_LOADER_SYMBOL : + if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || + ('0' <= c && c <= '9') || ('_' == c)) { + GRN_TEXT_PUTC(ctx, loader->last, c); + str++; + } else { + char *v = GRN_TEXT_VALUE(loader->last); + switch (*v) { + case 'n' : + if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "null", 4)) { + loader->last->header.domain = GRN_DB_VOID; + GRN_BULK_REWIND(loader->last); + } + break; + case 't' : + if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "true", 4)) { + loader->last->header.domain = GRN_DB_BOOL; + GRN_BOOL_SET(ctx, loader->last, GRN_TRUE); + } + break; + case 'f' : + if (GRN_TEXT_LEN(loader->last) == 5 && !memcmp(v, "false", 5)) { + loader->last->header.domain = GRN_DB_BOOL; + GRN_BOOL_SET(ctx, loader->last, GRN_FALSE); + } + break; + default : + break; + } + loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; + } + break; + case GRN_LOADER_NUMBER : + switch (c) { + case '+' : case '-' : case '.' : case 'e' : case 'E' : + case '0' : case '1' : case '2' : case '3' : case '4' : + case '5' : case '6' : case '7' : case '8' : case '9' : + GRN_TEXT_PUTC(ctx, loader->last, c); + str++; + break; + default : + { + const char *cur, *str = GRN_BULK_HEAD(loader->last); + const char *str_end = GRN_BULK_CURR(loader->last); + int64_t i = grn_atoll(str, str_end, &cur); + if (cur == str_end) { + loader->last->header.domain = GRN_DB_INT64; + GRN_INT64_SET(ctx, loader->last, i); + } else if (cur != str) { + double d; + char *end; + grn_obj buf; + GRN_TEXT_INIT(&buf, 0); + GRN_TEXT_PUT(ctx, &buf, str, GRN_BULK_VSIZE(loader->last)); + GRN_TEXT_PUTC(ctx, &buf, '\0'); + errno = 0; + d = strtod(GRN_TEXT_VALUE(&buf), &end); + if (!errno && end + 1 == GRN_BULK_CURR(&buf)) { + loader->last->header.domain = GRN_DB_FLOAT; + GRN_FLOAT_SET(ctx, loader->last, d); + } + GRN_OBJ_FIN(ctx, &buf); + } + } + loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; + break; + } + break; + case GRN_LOADER_STRING : + switch (c) { + case '\\' : + loader->stat = GRN_LOADER_STRING_ESC; + str++; + break; + case '"' : + str++; + loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; + /* + *(GRN_BULK_CURR(loader->last)) = '\0'; + GRN_LOG(ctx, GRN_LOG_ALERT, "read str(%s)", GRN_TEXT_VALUE(loader->last)); + */ + break; + default : + if ((len = grn_charlen(ctx, str, se))) { + GRN_TEXT_PUT(ctx, loader->last, str, len); + str += len; + } else { + GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c); + GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg); + str = se; + } + break; + } + break; + case GRN_LOADER_STRING_ESC : + switch (c) { + case 'b' : + GRN_TEXT_PUTC(ctx, loader->last, '\b'); + loader->stat = GRN_LOADER_STRING; + break; + case 'f' : + GRN_TEXT_PUTC(ctx, loader->last, '\f'); + loader->stat = GRN_LOADER_STRING; + break; + case 'n' : + GRN_TEXT_PUTC(ctx, loader->last, '\n'); + loader->stat = GRN_LOADER_STRING; + break; + case 'r' : + GRN_TEXT_PUTC(ctx, loader->last, '\r'); + loader->stat = GRN_LOADER_STRING; + break; + case 't' : + GRN_TEXT_PUTC(ctx, loader->last, '\t'); + loader->stat = GRN_LOADER_STRING; + break; + case 'u' : + loader->stat = GRN_LOADER_UNICODE0; + break; + default : + GRN_TEXT_PUTC(ctx, loader->last, c); + loader->stat = GRN_LOADER_STRING; + break; + } + str++; + break; + case GRN_LOADER_UNICODE0 : + switch (c) { + case '0' : case '1' : case '2' : case '3' : case '4' : + case '5' : case '6' : case '7' : case '8' : case '9' : + loader->unichar = (c - '0') * 0x1000; + break; + case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : + loader->unichar = (c - 'a' + 10) * 0x1000; + break; + case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : + loader->unichar = (c - 'A' + 10) * 0x1000; + break; + default : + ;// todo : error + } + loader->stat = GRN_LOADER_UNICODE1; + str++; + break; + case GRN_LOADER_UNICODE1 : + switch (c) { + case '0' : case '1' : case '2' : case '3' : case '4' : + case '5' : case '6' : case '7' : case '8' : case '9' : + loader->unichar += (c - '0') * 0x100; + break; + case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : + loader->unichar += (c - 'a' + 10) * 0x100; + break; + case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : + loader->unichar += (c - 'A' + 10) * 0x100; + break; + default : + ;// todo : error + } + loader->stat = GRN_LOADER_UNICODE2; + str++; + break; + case GRN_LOADER_UNICODE2 : + switch (c) { + case '0' : case '1' : case '2' : case '3' : case '4' : + case '5' : case '6' : case '7' : case '8' : case '9' : + loader->unichar += (c - '0') * 0x10; + break; + case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : + loader->unichar += (c - 'a' + 10) * 0x10; + break; + case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : + loader->unichar += (c - 'A' + 10) * 0x10; + break; + default : + ;// todo : error + } + loader->stat = GRN_LOADER_UNICODE3; + str++; + break; + case GRN_LOADER_UNICODE3 : + switch (c) { + case '0' : case '1' : case '2' : case '3' : case '4' : + case '5' : case '6' : case '7' : case '8' : case '9' : + loader->unichar += (c - '0'); + break; + case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : + loader->unichar += (c - 'a' + 10); + break; + case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : + loader->unichar += (c - 'A' + 10); + break; + default : + ;// todo : error + } + { + uint32_t u = loader->unichar; + if (u < 0x80) { + GRN_TEXT_PUTC(ctx, loader->last, u); + } else { + if (u < 0x800) { + GRN_TEXT_PUTC(ctx, loader->last, ((u >> 6) & 0x1f) | 0xc0); + } else { + GRN_TEXT_PUTC(ctx, loader->last, (u >> 12) | 0xe0); + GRN_TEXT_PUTC(ctx, loader->last, ((u >> 6) & 0x3f) | 0x80); + } + GRN_TEXT_PUTC(ctx, loader->last, (u & 0x3f) | 0x80); + } + } + loader->stat = GRN_LOADER_STRING; + str++; + break; + case GRN_LOADER_END : + str = se; + break; + } + } +} + +#undef JSON_READ_OPEN_BRACKET +#undef JSON_READ_OPEN_BRACE + +/* + * grn_loader_parse_columns parses a columns parameter. + * Columns except _id and _key are appended to loader->columns. + * If it contains _id or _key, loader->id_offset or loader->key_offset is set. + */ +static grn_rc +grn_loader_parse_columns(grn_ctx *ctx, grn_loader *loader, + const char *str, unsigned int str_size) +{ + const char *ptr = str, *ptr_end = ptr + str_size, *rest; + const char *tokens[256], *token_end; + while (ptr < ptr_end) { + int i, n = grn_tokenize(ptr, ptr_end - ptr, tokens, 256, &rest); + for (i = 0; i < n; i++) { + grn_obj *column; + token_end = tokens[i]; + while (ptr < token_end && (' ' == *ptr || ',' == *ptr)) { + ptr++; + } + column = grn_obj_column(ctx, loader->table, ptr, token_end - ptr); + if (!column) { + ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s>", + (int)(token_end - ptr), ptr); + return ctx->rc; + } + if (name_equal(ptr, token_end - ptr, GRN_COLUMN_NAME_ID)) { + grn_obj_unlink(ctx, column); + if (loader->id_offset != -1 || loader->key_offset != -1) { + /* _id and _key must not appear more than once. */ + if (loader->id_offset != -1) { + ERR(GRN_INVALID_ARGUMENT, + "duplicated id and key columns: <%s> at %d and <%s> at %d", + GRN_COLUMN_NAME_ID, i, + GRN_COLUMN_NAME_ID, loader->id_offset); + } else { + ERR(GRN_INVALID_ARGUMENT, + "duplicated id and key columns: <%s> at %d and <%s> at %d", + GRN_COLUMN_NAME_ID, i, + GRN_COLUMN_NAME_KEY, loader->key_offset); + } + return ctx->rc; + } + loader->id_offset = i; + } else if (name_equal(ptr, token_end - ptr, GRN_COLUMN_NAME_KEY)) { + grn_obj_unlink(ctx, column); + if (loader->id_offset != -1 || loader->key_offset != -1) { + /* _id and _key must not appear more than once. */ + if (loader->id_offset != -1) { + ERR(GRN_INVALID_ARGUMENT, + "duplicated id and key columns: <%s> at %d and <%s> at %d", + GRN_COLUMN_NAME_KEY, i, + GRN_COLUMN_NAME_ID, loader->id_offset); + } else { + ERR(GRN_INVALID_ARGUMENT, + "duplicated id and key columns: <%s> at %d and <%s> at %d", + GRN_COLUMN_NAME_KEY, i, + GRN_COLUMN_NAME_KEY, loader->key_offset); + } + return ctx->rc; + } + loader->key_offset = i; + } else { + GRN_PTR_PUT(ctx, &loader->columns, column); + } + ptr = token_end; + } + ptr = rest; + } + switch (loader->table->header.type) { + case GRN_TABLE_HASH_KEY : + case GRN_TABLE_PAT_KEY : + case GRN_TABLE_DAT_KEY : + if (loader->id_offset == -1 && loader->key_offset == -1) { + ERR(GRN_INVALID_ARGUMENT, "missing id or key column"); + return ctx->rc; + } + break; + } + return ctx->rc; +} + +static grn_com_addr *addr; + +void +grn_load_(grn_ctx *ctx, grn_content_type input_type, + const char *table, unsigned int table_len, + const char *columns, unsigned int columns_len, + const char *values, unsigned int values_len, + const char *ifexists, unsigned int ifexists_len, + const char *each, unsigned int each_len, + grn_obj *output_ids, + uint32_t emit_level) +{ + grn_loader *loader = &ctx->impl->loader; + loader->emit_level = emit_level; + if (ctx->impl->edge) { + grn_edge *edge = grn_edges_add_communicator(ctx, addr); + grn_obj *msg = grn_msg_open(ctx, edge->com, &ctx->impl->edge->send_old); + /* build msg */ + grn_edge_dispatch(ctx, edge, msg); + } + if (table && table_len) { + grn_ctx_loader_clear(ctx); + loader->input_type = input_type; + if (grn_db_check_name(ctx, table, table_len)) { + GRN_DB_CHECK_NAME_ERR("[table][load]", table, table_len); + loader->stat = GRN_LOADER_END; + return; + } + loader->table = grn_ctx_get(ctx, table, table_len); + if (!loader->table) { + ERR(GRN_INVALID_ARGUMENT, "nonexistent table: <%.*s>", table_len, table); + loader->stat = GRN_LOADER_END; + return; + } + if (columns && columns_len) { + grn_rc rc = grn_loader_parse_columns(ctx, loader, columns, columns_len); + if (rc != GRN_SUCCESS) { + loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; + loader->stat = GRN_LOADER_END; + return; + } + loader->columns_status = GRN_LOADER_COLUMNS_SET; + } + if (ifexists && ifexists_len) { + grn_obj *v; + GRN_EXPR_CREATE_FOR_QUERY(ctx, loader->table, loader->ifexists, v); + if (loader->ifexists && v) { + grn_expr_parse(ctx, loader->ifexists, ifexists, ifexists_len, + NULL, GRN_OP_EQUAL, GRN_OP_AND, + GRN_EXPR_SYNTAX_SCRIPT|GRN_EXPR_ALLOW_UPDATE); + } + } + if (each && each_len) { + grn_obj *v; + GRN_EXPR_CREATE_FOR_QUERY(ctx, loader->table, loader->each, v); + if (loader->each && v) { + grn_expr_parse(ctx, loader->each, each, each_len, + NULL, GRN_OP_EQUAL, GRN_OP_AND, + GRN_EXPR_SYNTAX_SCRIPT|GRN_EXPR_ALLOW_UPDATE); + } + } + if (output_ids && GRN_TEXT_LEN(output_ids) > 0) { + loader->output_ids = + grn_proc_option_value_bool(ctx, output_ids, GRN_FALSE); + } + } else { + if (!loader->table) { + ERR(GRN_INVALID_ARGUMENT, "mandatory \"table\" parameter is absent"); + loader->stat = GRN_LOADER_END; + return; + } + input_type = loader->input_type; + } + switch (input_type) { + case GRN_CONTENT_JSON : + json_read(ctx, loader, values, values_len); + break; + case GRN_CONTENT_NONE : + case GRN_CONTENT_TSV : + case GRN_CONTENT_XML : + case GRN_CONTENT_MSGPACK : + case GRN_CONTENT_GROONGA_COMMAND_LIST : + ERR(GRN_FUNCTION_NOT_IMPLEMENTED, "unsupported input_type"); + loader->stat = GRN_LOADER_END; + // todo + break; + } +} + +grn_rc +grn_load(grn_ctx *ctx, grn_content_type input_type, + const char *table, unsigned int table_len, + const char *columns, unsigned int columns_len, + const char *values, unsigned int values_len, + const char *ifexists, unsigned int ifexists_len, + const char *each, unsigned int each_len) +{ + if (!ctx || !ctx->impl) { + ERR(GRN_INVALID_ARGUMENT, "db not initialized"); + return ctx->rc; + } + GRN_API_ENTER; + grn_load_(ctx, input_type, table, table_len, + columns, columns_len, values, values_len, + ifexists, ifexists_len, each, each_len, + NULL, + 1); + GRN_API_RETURN(ctx->rc); +} Modified: lib/proc.c (+1 -0) =================================================================== --- lib/proc.c 2017-02-03 14:15:00 +0900 (ac4e375) +++ lib/proc.c 2017-02-03 14:16:38 +0900 (d036cfe) @@ -26,6 +26,7 @@ #include "grn_geo.h" #include "grn_expr.h" #include "grn_cache.h" +#include "grn_load.h" #include <string.h> #include <stdlib.h> Modified: lib/sources.am (+2 -0) =================================================================== --- lib/sources.am 2017-02-03 14:15:00 +0900 (6576af3) +++ lib/sources.am 2017-02-03 14:16:38 +0900 (eed46d1) @@ -40,6 +40,8 @@ libgroonga_la_SOURCES = \ grn_index_column.h \ io.c \ grn_io.h \ + load.c \ + grn_load.h \ logger.c \ grn_logger.h \ mrb.c \ Modified: lib/util.c (+1 -0) =================================================================== --- lib/util.c 2017-02-03 14:15:00 +0900 (166cdfa) +++ lib/util.c 2017-02-03 14:16:38 +0900 (cc57e2f) @@ -22,6 +22,7 @@ #include "grn_util.h" #include "grn_string.h" #include "grn_expr.h" +#include "grn_load.h" #include <string.h> #include <stdio.h>