[Groonga-commit] groonga/groonga at 56cd8a9 [master] Use double instead of int32 for score internally

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Feb 13 18:31:35 JST 2015


Kouhei Sutou	2015-02-13 18:31:35 +0900 (Fri, 13 Feb 2015)

  New Revision: 56cd8a96a4ac9cf045c69cc51886f49972d00c64
  https://github.com/groonga/groonga/commit/56cd8a96a4ac9cf045c69cc51886f49972d00c64

  Message:
    Use double instead of int32 for score internally
    
    Any public query APIs aren't changed.
    
    DB API is changed. _score value type is changed to GRN_DB_FLOAT from
    GRN_DB_INT32.
    
    It's backward incompatible change. If you're using DB API, upgrading
    Groonga carefully.

  Modified files:
    lib/db.c
    lib/grn_db.h
    lib/grn_rset.h
    lib/ii.c
    lib/output.c
    plugins/suggest/suggest.c

  Modified: lib/db.c (+22 -20)
===================================================================
--- lib/db.c    2015-02-13 18:00:26 +0900 (e9747f8)
+++ lib/db.c    2015-02-13 18:31:35 +0900 (d99dfff)
@@ -71,7 +71,7 @@ inline static grn_id
 grn_table_add_v_inline(grn_ctx *ctx, grn_obj *table, const void *key, int key_size,
                        void **value, int *added);
 inline static void
-grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, int score,
+grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, double score,
                             grn_rset_posinfo *pi, int dir);
 inline static grn_id
 grn_table_cursor_next_inline(grn_ctx *ctx, grn_table_cursor *tc);
@@ -986,7 +986,7 @@ grn_table_get_subrecs(grn_ctx *ctx, grn_obj *table, grn_id id,
       }
       for (; count < limit; count++) {
         if (scorebuf) {
-          scorebuf[count] = *((int *)psubrec);
+          scorebuf[count] = *((double *)psubrec);
         }
         psubrec += GRN_RSET_SCORE_SIZE;
         if (subrecbuf) {
@@ -2041,10 +2041,10 @@ grn_table_size(grn_ctx *ctx, grn_obj *table)
 }
 
 inline static void
-subrecs_push(byte *subrecs, int size, int n_subrecs, int score, void *body, int dir)
+subrecs_push(byte *subrecs, int size, int n_subrecs, double score, void *body, int dir)
 {
   byte *v;
-  int *c2;
+  double *c2;
   int n = n_subrecs - 1, n2;
   while (n) {
     n2 = (n - 1) >> 1;
@@ -2054,15 +2054,16 @@ subrecs_push(byte *subrecs, int size, int n_subrecs, int score, void *body, int
     n = n2;
   }
   v = subrecs + n * (GRN_RSET_SCORE_SIZE + size);
-  *((int *)v) = score;
+  *((double *)v) = score;
   memcpy(v + GRN_RSET_SCORE_SIZE, body, size);
 }
 
 inline static void
-subrecs_replace_min(byte *subrecs, int size, int n_subrecs, int score, void *body, int dir)
+subrecs_replace_min(byte *subrecs, int size, int n_subrecs, double score, void *body, int dir)
 {
   byte *v;
-  int n = 0, n1, n2, *c1, *c2;
+  int n = 0, n1, n2;
+  double *c1, *c2;
   for (;;) {
     n1 = n * 2 + 1;
     n2 = n1 + 1;
@@ -2093,7 +2094,7 @@ subrecs_replace_min(byte *subrecs, int size, int n_subrecs, int score, void *bod
 }
 
 inline static void
-grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, int score,
+grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, double score,
                             grn_rset_posinfo *pi, int dir)
 {
   if (DB_OBJ(table)->header.flags & GRN_OBJ_WITH_SUBREC) {
@@ -2106,7 +2107,7 @@ grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, int score,
       if (pi) {
         byte *body = (byte *)pi + DB_OBJ(table)->subrec_offset;
         if (limit < n_subrecs) {
-          if (GRN_RSET_SUBRECS_CMP(score, *ri->subrecs, dir) > 0) {
+          if (GRN_RSET_SUBRECS_CMP(score, *((double *)(ri->subrecs)), dir) > 0) {
             subrecs_replace_min((byte *)ri->subrecs, subrec_size, limit, score, body, dir);
           }
         } else {
@@ -2118,7 +2119,7 @@ grn_table_add_subrec_inline(grn_obj *table, grn_rset_recinfo *ri, int score,
 }
 
 void
-grn_table_add_subrec(grn_obj *table, grn_rset_recinfo *ri, int score,
+grn_table_add_subrec(grn_obj *table, grn_rset_recinfo *ri, double score,
                      grn_rset_posinfo *pi, int dir)
 {
   grn_table_add_subrec_inline(table, ri, score, pi, dir);
@@ -3062,7 +3063,7 @@ grn_obj_search(grn_ctx *ctx, grn_obj *obj, grn_obj *query,
 inline static void
 grn_table_group_add_subrec(grn_ctx *ctx,
                            grn_obj *table,
-                           grn_rset_recinfo *ri, int score,
+                           grn_rset_recinfo *ri, double score,
                            grn_rset_posinfo *pi, int dir,
                            grn_obj *calc_target,
                            grn_obj *value_buffer)
@@ -5148,6 +5149,8 @@ grn_obj_get_range_info(grn_ctx *ctx, grn_obj *obj,
         }
         break;
       case GRN_ACCESSOR_GET_SCORE :
+        *range_id = GRN_DB_FLOAT;
+        break;
       case GRN_ACCESSOR_GET_NSUBRECS :
         *range_id = GRN_DB_INT32;
         break;
@@ -5677,7 +5680,7 @@ grn_accessor_get_value_(grn_ctx *ctx, grn_accessor *a, grn_id id, uint32_t *size
     case GRN_ACCESSOR_GET_SCORE :
       if ((value = grn_obj_get_value_(ctx, a->obj, id, size))) {
         value = (const char *)&((grn_rset_recinfo *)value)->score;
-        *size = sizeof(int);
+        *size = sizeof(double);
       }
       break;
     case GRN_ACCESSOR_GET_NSUBRECS :
@@ -5800,11 +5803,11 @@ grn_accessor_get_value(grn_ctx *ctx, grn_accessor *a, grn_id id, grn_obj *value)
     case GRN_ACCESSOR_GET_SCORE :
       if (id) {
         grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
-        GRN_INT32_PUT(ctx, value, ri->score);
+        GRN_FLOAT_PUT(ctx, value, ri->score);
       } else {
-        GRN_INT32_PUT(ctx, value, 0);
+        GRN_FLOAT_PUT(ctx, value, 0.0);
       }
-      value->header.domain = GRN_DB_INT32;
+      value->header.domain = GRN_DB_FLOAT;
       break;
     case GRN_ACCESSOR_GET_NSUBRECS :
       if (id) {
@@ -5925,15 +5928,14 @@ grn_accessor_set_value(grn_ctx *ctx, grn_accessor *a, grn_id id,
           } else {
             uint32_t size;
             if ((ri = (grn_rset_recinfo *) grn_obj_get_value_(ctx, a->obj, id, &size))) {
-              vp = &ri->score;
               // todo : flags support
-              if (value->header.domain == GRN_DB_INT32) {
-                memcpy(vp, GRN_BULK_HEAD(value), sizeof(int));
+              if (value->header.domain == GRN_DB_FLOAT) {
+                ri->score = GRN_FLOAT_VALUE(value);
               } else {
                 grn_obj buf;
-                GRN_INT32_INIT(&buf, 0);
+                GRN_FLOAT_INIT(&buf, 0);
                 grn_obj_cast(ctx, value, &buf, GRN_FALSE);
-                memcpy(vp, GRN_BULK_HEAD(&buf), sizeof(int));
+                ri->score = GRN_FLOAT_VALUE(&buf);
                 GRN_OBJ_FIN(ctx, &buf);
               }
             }

  Modified: lib/grn_db.h (+1 -1)
===================================================================
--- lib/grn_db.h    2015-02-13 18:00:26 +0900 (39eb010)
+++ lib/grn_db.h    2015-02-13 18:31:35 +0900 (8213c99)
@@ -83,7 +83,7 @@ int grn_table_get_key2(grn_ctx *ctx, grn_obj *table, grn_id id, grn_obj *bulk);
 grn_table_cursor *grn_table_cursor_open_by_id(grn_ctx *ctx, grn_obj *table,
                                               grn_id min, grn_id max, int flags);
 
-void grn_table_add_subrec(grn_obj *table, grn_rset_recinfo *ri, int score,
+void grn_table_add_subrec(grn_obj *table, grn_rset_recinfo *ri, double score,
                           grn_rset_posinfo *pi, int dir);
 
 grn_obj *grn_obj_graft(grn_ctx *ctx, grn_obj *obj);

  Modified: lib/grn_rset.h (+3 -3)
===================================================================
--- lib/grn_rset.h    2015-02-13 18:00:26 +0900 (f3effc5)
+++ lib/grn_rset.h    2015-02-13 18:31:35 +0900 (4fa0d08)
@@ -24,7 +24,7 @@ extern "C" {
 #endif
 
 typedef struct {
-  int score;
+  double score;
   int n_subrecs;
   int subrecs[1];
 } grn_rset_recinfo;
@@ -43,7 +43,7 @@ typedef struct {
 #define GRN_RSET_SUM_SIZE       (sizeof(int64_t))
 #define GRN_RSET_AVG_SIZE       (sizeof(double))
 
-#define GRN_RSET_SCORE_SIZE (sizeof(int))
+#define GRN_RSET_SCORE_SIZE (sizeof(double))
 
 #define GRN_RSET_N_SUBRECS(ri) ((ri)->n_subrecs & ~GRN_RSET_UTIL_BIT)
 
@@ -51,7 +51,7 @@ typedef struct {
   (GRN_RSET_SCORE_SIZE + subrec_size)
 #define GRN_RSET_SUBRECS_CMP(a,b,dir) (((a) - (b))*(dir))
 #define GRN_RSET_SUBRECS_NTH(subrecs,size,n) \
-  ((int *)((byte *)subrecs + n * GRN_RSET_SUBREC_SIZE(size)))
+  ((double *)((byte *)subrecs + n * GRN_RSET_SUBREC_SIZE(size)))
 #define GRN_RSET_SUBRECS_COPY(subrecs,size,n,src) \
   (memcpy(GRN_RSET_SUBRECS_NTH(subrecs, size, n), src, GRN_RSET_SUBREC_SIZE(size)))
 #define GRN_RSET_SUBRECS_SIZE(subrec_size,n) \

  Modified: lib/ii.c (+3 -3)
===================================================================
--- lib/ii.c    2015-02-13 18:00:26 +0900 (64e1195)
+++ lib/ii.c    2015-02-13 18:31:35 +0900 (4b4c4e0)
@@ -5540,7 +5540,7 @@ token_info_clear_offset(token_info **tis, uint32_t n)
 /* select */
 
 inline static void
-res_add(grn_ctx *ctx, grn_hash *s, grn_rset_posinfo *pi, uint32_t score,
+res_add(grn_ctx *ctx, grn_hash *s, grn_rset_posinfo *pi, double score,
         grn_operator op)
 {
   grn_rset_recinfo *ri;
@@ -5699,7 +5699,7 @@ typedef enum {
   grn_wv_constant
 } grn_wv_mode;
 
-inline static int
+inline static double
 get_weight(grn_ctx *ctx, grn_hash *s, grn_id rid, int sid,
            grn_wv_mode wvm, grn_select_optarg *optarg)
 {
@@ -6045,7 +6045,7 @@ grn_ii_select_sequential_search_body(grn_ctx *ctx,
                                0);
         if (position != ONIG_MISMATCH) {
           grn_rset_posinfo info;
-          uint32_t score;
+          double score;
           info.rid = id;
           info.sid = i + 1;
           info.pos = 0;

  Modified: lib/output.c (+21 -4)
===================================================================
--- lib/output.c    2015-02-13 18:00:26 +0900 (6d0f26a)
+++ lib/output.c    2015-02-13 18:31:35 +0900 (20bd2a8)
@@ -519,10 +519,10 @@ grn_text_atoj(grn_ctx *ctx, grn_obj *outbuf, grn_content_type output_type,
         buf.header.domain = DB_OBJ(a->obj)->range;
         break;
       case GRN_ACCESSOR_GET_SCORE :
-        grn_obj_get_value(ctx, a->obj, id, &buf);
         {
           grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
-          GRN_INT32_PUT(ctx, &buf, ri->score);
+          int32_t int32_score = ri->score;
+          GRN_INT32_PUT(ctx, &buf, int32_score);
         }
         buf.header.domain = GRN_DB_INT32;
         break;
@@ -1141,6 +1141,20 @@ count_used_n_codes(grn_ctx *ctx, grn_expr_code *start, grn_expr_code *target)
   return n_codes;
 }
 
+static grn_bool
+is_score_accessor(grn_ctx *ctx, grn_obj *obj)
+{
+  grn_accessor *a;
+
+  if (obj->header.type != GRN_ACCESSOR) {
+    return GRN_FALSE;
+  }
+
+  for (a = (grn_accessor *)obj; a->next; a = a->next) {
+  }
+  return a->action == GRN_ACCESSOR_GET_SCORE;
+}
+
 static inline void
 grn_output_table_column(grn_ctx *ctx, grn_obj *outbuf,
                         grn_content_type output_type,
@@ -1148,13 +1162,16 @@ grn_output_table_column(grn_ctx *ctx, grn_obj *outbuf,
 {
   grn_output_array_open(ctx, outbuf, output_type, "COLUMN", 2);
   if (column) {
-    grn_id range_id;
+    grn_id range_id = GRN_ID_NIL;
     GRN_BULK_REWIND(buf);
     grn_column_name_(ctx, column, buf);
     grn_output_obj(ctx, outbuf, output_type, buf, NULL);
     if (column->header.type == GRN_COLUMN_INDEX) {
       range_id = GRN_DB_UINT32;
-    } else {
+    } else if (is_score_accessor(ctx, column)) {
+      range_id = GRN_DB_INT32;
+    }
+    if (range_id == GRN_ID_NIL) {
       range_id = grn_obj_get_range(ctx, column);
     }
     if (range_id == GRN_ID_NIL) {

  Modified: plugins/suggest/suggest.c (+9 -9)
===================================================================
--- plugins/suggest/suggest.c    2015-02-13 18:00:26 +0900 (7c64539)
+++ plugins/suggest/suggest.c    2015-02-13 18:31:35 +0900 (863ffcf)
@@ -134,12 +134,12 @@ grn_parse_suggest_types(grn_obj *text)
   return types;
 }
 
-static int32_t
+static double
 cooccurrence_search(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_id id,
                     grn_obj *res, int query_type, int frequency_threshold,
                     double conditional_probability_threshold)
 {
-  int32_t max_score = 0;
+  double max_score = 0.0;
   if (id) {
     grn_ii_cursor *c;
     grn_obj *co = grn_obj_column(ctx, items, CONST_STR_LEN("co"));
@@ -198,7 +198,7 @@ cooccurrence_search(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_id i
             boost >= 0) {
           grn_rset_recinfo *ri;
           void *value;
-          int32_t score = pfreq;
+          double score = pfreq;
           int added;
           if (max_score < score + boost) { max_score = score + boost; }
           /* put any formula if desired */
@@ -279,7 +279,7 @@ complete_add_item(grn_ctx *ctx, grn_id id, grn_obj *res, int frequency_threshold
   grn_obj_get_value(ctx, items_freq, id, item_freq);
   grn_obj_get_value(ctx, items_boost, id, item_boost);
   if (GRN_INT32_VALUE(item_boost) >= 0) {
-    int32_t score;
+    double score;
     score = 1 +
             GRN_INT32_VALUE(item_freq) +
             GRN_INT32_VALUE(item_boost);
@@ -384,12 +384,12 @@ correct(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost,
   if ((res = grn_table_create(ctx, NULL, 0, NULL,
                               GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) {
     grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query));
-    int32_t max_score;
+    double max_score;
     max_score = cooccurrence_search(ctx, items, items_boost, tid, res, CORRECT,
                                     frequency_threshold,
                                     conditional_probability_threshold);
     GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SCORE,
-                  ":", "cooccur(%d)", max_score);
+                  ":", "cooccur(%f)", max_score);
     if (GRN_TEXT_LEN(query) &&
         ((similar_search_mode == GRN_SUGGEST_SEARCH_YES) ||
          (similar_search_mode == GRN_SUGGEST_SEARCH_AUTO &&
@@ -423,7 +423,7 @@ correct(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost,
                   grn_obj_get_value(ctx, items_freq2, *rp, &item_freq2);
                   grn_obj_get_value(ctx, items_boost, *rp, &item_boost);
                   if (GRN_INT32_VALUE(&item_boost) >= 0) {
-                    int32_t score;
+                    double score;
                     grn_rset_recinfo *ri;
                     score = 1 +
                             (GRN_INT32_VALUE(&item_freq2) >> 4) +
@@ -471,13 +471,13 @@ correct(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost,
               if ((tc = grn_table_cursor_open(ctx, res, NULL, 0, NULL, 0, 0, -1, 0))) {
                 grn_id id;
                 grn_obj score_value;
-                GRN_INT32_INIT(&score_value, 0);
+                GRN_FLOAT_INIT(&score_value, 0);
                 while ((id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
                   GRN_RECORD_SET(ctx, var, id);
                   grn_expr_exec(ctx, expr, 0);
                   GRN_BULK_REWIND(&score_value);
                   grn_obj_get_value(ctx, score, id, &score_value);
-                  if (GRN_INT32_VALUE(&score_value) < frequency_threshold) {
+                  if (GRN_FLOAT_VALUE(&score_value) < frequency_threshold) {
                     grn_table_cursor_delete(ctx, tc);
                   }
                 }
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index