Kouhei Sutou
null+****@clear*****
Fri Feb 13 18:31:35 JST 2015
Kouhei Sutou 2015-02-13 18:31:35 +0900 (Fri, 13 Feb 2015) New Revision: 56cd8a96a4ac9cf045c69cc51886f49972d00c64 https://github.com/groonga/groonga/commit/56cd8a96a4ac9cf045c69cc51886f49972d00c64 Message: Use double instead of int32 for score internally Any public query APIs aren't changed. DB API is changed. _score value type is changed to GRN_DB_FLOAT from GRN_DB_INT32. It's backward incompatible change. If you're using DB API, upgrading Groonga carefully. Modified files: lib/db.c lib/grn_db.h lib/grn_rset.h lib/ii.c lib/output.c plugins/suggest/suggest.c Modified: lib/db.c (+22 -20) =================================================================== --- lib/db.c 2015-02-13 18:00:26 +0900 (e9747f8) +++ lib/db.c 2015-02-13 18:31:35 +0900 (d99dfff) @@ -71,7 +71,7 @@ inline static grn_id grn_table_add_v_inline(grn_ctx *ctx, grn_obj *table, const void *key, int key_size, void **value, int *added); inline static void -grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, int score, +grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, double score, grn_rset_posinfo *pi, int dir); inline static grn_id grn_table_cursor_next_inline(grn_ctx *ctx, grn_table_cursor *tc); @@ -986,7 +986,7 @@ grn_table_get_subrecs(grn_ctx *ctx, grn_obj *table, grn_id id, } for (; count < limit; count++) { if (scorebuf) { - scorebuf[count] = *((int *)psubrec); + scorebuf[count] = *((double *)psubrec); } psubrec += GRN_RSET_SCORE_SIZE; if (subrecbuf) { @@ -2041,10 +2041,10 @@ grn_table_size(grn_ctx *ctx, grn_obj *table) } inline static void -subrecs_push(byte *subrecs, int size, int n_subrecs, int score, void *body, int dir) +subrecs_push(byte *subrecs, int size, int n_subrecs, double score, void *body, int dir) { byte *v; - int *c2; + double *c2; int n = n_subrecs - 1, n2; while (n) { n2 = (n - 1) >> 1; @@ -2054,15 +2054,16 @@ subrecs_push(byte *subrecs, int size, int n_subrecs, int score, void *body, int n = n2; } v = subrecs + n * (GRN_RSET_SCORE_SIZE + size); - *((int *)v) = score; + *((double *)v) = score; memcpy(v + GRN_RSET_SCORE_SIZE, body, size); } inline static void -subrecs_replace_min(byte *subrecs, int size, int n_subrecs, int score, void *body, int dir) +subrecs_replace_min(byte *subrecs, int size, int n_subrecs, double score, void *body, int dir) { byte *v; - int n = 0, n1, n2, *c1, *c2; + int n = 0, n1, n2; + double *c1, *c2; for (;;) { n1 = n * 2 + 1; n2 = n1 + 1; @@ -2093,7 +2094,7 @@ subrecs_replace_min(byte *subrecs, int size, int n_subrecs, int score, void *bod } inline static void -grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, int score, +grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, double score, grn_rset_posinfo *pi, int dir) { if (DB_OBJ(table)->header.flags & GRN_OBJ_WITH_SUBREC) { @@ -2106,7 +2107,7 @@ grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, int score, if (pi) { byte *body = (byte *)pi + DB_OBJ(table)->subrec_offset; if (limit < n_subrecs) { - if (GRN_RSET_SUBRECS_CMP(score, *ri->subrecs, dir) > 0) { + if (GRN_RSET_SUBRECS_CMP(score, *((double *)(ri->subrecs)), dir) > 0) { subrecs_replace_min((byte *)ri->subrecs, subrec_size, limit, score, body, dir); } } else { @@ -2118,7 +2119,7 @@ grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, int score, } void -grn_table_add_subrec(grn_obj *table, grn_rset_recinfo *ri, int score, +grn_table_add_subrec(grn_obj *table, grn_rset_recinfo *ri, double score, grn_rset_posinfo *pi, int dir) { grn_table_add_subrec_inline(table, ri, score, pi, dir); @@ -3062,7 +3063,7 @@ grn_obj_search(grn_ctx *ctx, grn_obj *obj, grn_obj *query, inline static void grn_table_group_add_subrec(grn_ctx *ctx, grn_obj *table, - grn_rset_recinfo *ri, int score, + grn_rset_recinfo *ri, double score, grn_rset_posinfo *pi, int dir, grn_obj *calc_target, grn_obj *value_buffer) @@ -5148,6 +5149,8 @@ grn_obj_get_range_info(grn_ctx *ctx, grn_obj *obj, } break; case GRN_ACCESSOR_GET_SCORE : + *range_id = GRN_DB_FLOAT; + break; case GRN_ACCESSOR_GET_NSUBRECS : *range_id = GRN_DB_INT32; break; @@ -5677,7 +5680,7 @@ grn_accessor_get_value_(grn_ctx *ctx, grn_accessor *a, grn_id id, uint32_t *size case GRN_ACCESSOR_GET_SCORE : if ((value = grn_obj_get_value_(ctx, a->obj, id, size))) { value = (const char *)&((grn_rset_recinfo *)value)->score; - *size = sizeof(int); + *size = sizeof(double); } break; case GRN_ACCESSOR_GET_NSUBRECS : @@ -5800,11 +5803,11 @@ grn_accessor_get_value(grn_ctx *ctx, grn_accessor *a, grn_id id, grn_obj *value) case GRN_ACCESSOR_GET_SCORE : if (id) { grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs); - GRN_INT32_PUT(ctx, value, ri->score); + GRN_FLOAT_PUT(ctx, value, ri->score); } else { - GRN_INT32_PUT(ctx, value, 0); + GRN_FLOAT_PUT(ctx, value, 0.0); } - value->header.domain = GRN_DB_INT32; + value->header.domain = GRN_DB_FLOAT; break; case GRN_ACCESSOR_GET_NSUBRECS : if (id) { @@ -5925,15 +5928,14 @@ grn_accessor_set_value(grn_ctx *ctx, grn_accessor *a, grn_id id, } else { uint32_t size; if ((ri = (grn_rset_recinfo *) grn_obj_get_value_(ctx, a->obj, id, &size))) { - vp = &ri->score; // todo : flags support - if (value->header.domain == GRN_DB_INT32) { - memcpy(vp, GRN_BULK_HEAD(value), sizeof(int)); + if (value->header.domain == GRN_DB_FLOAT) { + ri->score = GRN_FLOAT_VALUE(value); } else { grn_obj buf; - GRN_INT32_INIT(&buf, 0); + GRN_FLOAT_INIT(&buf, 0); grn_obj_cast(ctx, value, &buf, GRN_FALSE); - memcpy(vp, GRN_BULK_HEAD(&buf), sizeof(int)); + ri->score = GRN_FLOAT_VALUE(&buf); GRN_OBJ_FIN(ctx, &buf); } } Modified: lib/grn_db.h (+1 -1) =================================================================== --- lib/grn_db.h 2015-02-13 18:00:26 +0900 (39eb010) +++ lib/grn_db.h 2015-02-13 18:31:35 +0900 (8213c99) @@ -83,7 +83,7 @@ int grn_table_get_key2(grn_ctx *ctx, grn_obj *table, grn_id id, grn_obj *bulk); grn_table_cursor *grn_table_cursor_open_by_id(grn_ctx *ctx, grn_obj *table, grn_id min, grn_id max, int flags); -void grn_table_add_subrec(grn_obj *table, grn_rset_recinfo *ri, int score, +void grn_table_add_subrec(grn_obj *table, grn_rset_recinfo *ri, double score, grn_rset_posinfo *pi, int dir); grn_obj *grn_obj_graft(grn_ctx *ctx, grn_obj *obj); Modified: lib/grn_rset.h (+3 -3) =================================================================== --- lib/grn_rset.h 2015-02-13 18:00:26 +0900 (f3effc5) +++ lib/grn_rset.h 2015-02-13 18:31:35 +0900 (4fa0d08) @@ -24,7 +24,7 @@ extern "C" { #endif typedef struct { - int score; + double score; int n_subrecs; int subrecs[1]; } grn_rset_recinfo; @@ -43,7 +43,7 @@ typedef struct { #define GRN_RSET_SUM_SIZE (sizeof(int64_t)) #define GRN_RSET_AVG_SIZE (sizeof(double)) -#define GRN_RSET_SCORE_SIZE (sizeof(int)) +#define GRN_RSET_SCORE_SIZE (sizeof(double)) #define GRN_RSET_N_SUBRECS(ri) ((ri)->n_subrecs & ~GRN_RSET_UTIL_BIT) @@ -51,7 +51,7 @@ typedef struct { (GRN_RSET_SCORE_SIZE + subrec_size) #define GRN_RSET_SUBRECS_CMP(a,b,dir) (((a) - (b))*(dir)) #define GRN_RSET_SUBRECS_NTH(subrecs,size,n) \ - ((int *)((byte *)subrecs + n * GRN_RSET_SUBREC_SIZE(size))) + ((double *)((byte *)subrecs + n * GRN_RSET_SUBREC_SIZE(size))) #define GRN_RSET_SUBRECS_COPY(subrecs,size,n,src) \ (memcpy(GRN_RSET_SUBRECS_NTH(subrecs, size, n), src, GRN_RSET_SUBREC_SIZE(size))) #define GRN_RSET_SUBRECS_SIZE(subrec_size,n) \ Modified: lib/ii.c (+3 -3) =================================================================== --- lib/ii.c 2015-02-13 18:00:26 +0900 (64e1195) +++ lib/ii.c 2015-02-13 18:31:35 +0900 (4b4c4e0) @@ -5540,7 +5540,7 @@ token_info_clear_offset(token_info **tis, uint32_t n) /* select */ inline static void -res_add(grn_ctx *ctx, grn_hash *s, grn_rset_posinfo *pi, uint32_t score, +res_add(grn_ctx *ctx, grn_hash *s, grn_rset_posinfo *pi, double score, grn_operator op) { grn_rset_recinfo *ri; @@ -5699,7 +5699,7 @@ typedef enum { grn_wv_constant } grn_wv_mode; -inline static int +inline static double get_weight(grn_ctx *ctx, grn_hash *s, grn_id rid, int sid, grn_wv_mode wvm, grn_select_optarg *optarg) { @@ -6045,7 +6045,7 @@ grn_ii_select_sequential_search_body(grn_ctx *ctx, 0); if (position != ONIG_MISMATCH) { grn_rset_posinfo info; - uint32_t score; + double score; info.rid = id; info.sid = i + 1; info.pos = 0; Modified: lib/output.c (+21 -4) =================================================================== --- lib/output.c 2015-02-13 18:00:26 +0900 (6d0f26a) +++ lib/output.c 2015-02-13 18:31:35 +0900 (20bd2a8) @@ -519,10 +519,10 @@ grn_text_atoj(grn_ctx *ctx, grn_obj *outbuf, grn_content_type output_type, buf.header.domain = DB_OBJ(a->obj)->range; break; case GRN_ACCESSOR_GET_SCORE : - grn_obj_get_value(ctx, a->obj, id, &buf); { grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs); - GRN_INT32_PUT(ctx, &buf, ri->score); + int32_t int32_score = ri->score; + GRN_INT32_PUT(ctx, &buf, int32_score); } buf.header.domain = GRN_DB_INT32; break; @@ -1141,6 +1141,20 @@ count_used_n_codes(grn_ctx *ctx, grn_expr_code *start, grn_expr_code *target) return n_codes; } +static grn_bool +is_score_accessor(grn_ctx *ctx, grn_obj *obj) +{ + grn_accessor *a; + + if (obj->header.type != GRN_ACCESSOR) { + return GRN_FALSE; + } + + for (a = (grn_accessor *)obj; a->next; a = a->next) { + } + return a->action == GRN_ACCESSOR_GET_SCORE; +} + static inline void grn_output_table_column(grn_ctx *ctx, grn_obj *outbuf, grn_content_type output_type, @@ -1148,13 +1162,16 @@ grn_output_table_column(grn_ctx *ctx, grn_obj *outbuf, { grn_output_array_open(ctx, outbuf, output_type, "COLUMN", 2); if (column) { - grn_id range_id; + grn_id range_id = GRN_ID_NIL; GRN_BULK_REWIND(buf); grn_column_name_(ctx, column, buf); grn_output_obj(ctx, outbuf, output_type, buf, NULL); if (column->header.type == GRN_COLUMN_INDEX) { range_id = GRN_DB_UINT32; - } else { + } else if (is_score_accessor(ctx, column)) { + range_id = GRN_DB_INT32; + } + if (range_id == GRN_ID_NIL) { range_id = grn_obj_get_range(ctx, column); } if (range_id == GRN_ID_NIL) { Modified: plugins/suggest/suggest.c (+9 -9) =================================================================== --- plugins/suggest/suggest.c 2015-02-13 18:00:26 +0900 (7c64539) +++ plugins/suggest/suggest.c 2015-02-13 18:31:35 +0900 (863ffcf) @@ -134,12 +134,12 @@ grn_parse_suggest_types(grn_obj *text) return types; } -static int32_t +static double cooccurrence_search(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_id id, grn_obj *res, int query_type, int frequency_threshold, double conditional_probability_threshold) { - int32_t max_score = 0; + double max_score = 0.0; if (id) { grn_ii_cursor *c; grn_obj *co = grn_obj_column(ctx, items, CONST_STR_LEN("co")); @@ -198,7 +198,7 @@ cooccurrence_search(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_id i boost >= 0) { grn_rset_recinfo *ri; void *value; - int32_t score = pfreq; + double score = pfreq; int added; if (max_score < score + boost) { max_score = score + boost; } /* put any formula if desired */ @@ -279,7 +279,7 @@ complete_add_item(grn_ctx *ctx, grn_id id, grn_obj *res, int frequency_threshold grn_obj_get_value(ctx, items_freq, id, item_freq); grn_obj_get_value(ctx, items_boost, id, item_boost); if (GRN_INT32_VALUE(item_boost) >= 0) { - int32_t score; + double score; score = 1 + GRN_INT32_VALUE(item_freq) + GRN_INT32_VALUE(item_boost); @@ -384,12 +384,12 @@ correct(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, if ((res = grn_table_create(ctx, NULL, 0, NULL, GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) { grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query)); - int32_t max_score; + double max_score; max_score = cooccurrence_search(ctx, items, items_boost, tid, res, CORRECT, frequency_threshold, conditional_probability_threshold); GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SCORE, - ":", "cooccur(%d)", max_score); + ":", "cooccur(%f)", max_score); if (GRN_TEXT_LEN(query) && ((similar_search_mode == GRN_SUGGEST_SEARCH_YES) || (similar_search_mode == GRN_SUGGEST_SEARCH_AUTO && @@ -423,7 +423,7 @@ correct(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_obj_get_value(ctx, items_freq2, *rp, &item_freq2); grn_obj_get_value(ctx, items_boost, *rp, &item_boost); if (GRN_INT32_VALUE(&item_boost) >= 0) { - int32_t score; + double score; grn_rset_recinfo *ri; score = 1 + (GRN_INT32_VALUE(&item_freq2) >> 4) + @@ -471,13 +471,13 @@ correct(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, if ((tc = grn_table_cursor_open(ctx, res, NULL, 0, NULL, 0, 0, -1, 0))) { grn_id id; grn_obj score_value; - GRN_INT32_INIT(&score_value, 0); + GRN_FLOAT_INIT(&score_value, 0); while ((id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) { GRN_RECORD_SET(ctx, var, id); grn_expr_exec(ctx, expr, 0); GRN_BULK_REWIND(&score_value); grn_obj_get_value(ctx, score, id, &score_value); - if (GRN_INT32_VALUE(&score_value) < frequency_threshold) { + if (GRN_FLOAT_VALUE(&score_value) < frequency_threshold) { grn_table_cursor_delete(ctx, tc); } } -------------- next part -------------- HTML����������������������������...Download