null+****@clear*****
null+****@clear*****
2010年 8月 3日 (火) 15:42:07 JST
Kouhei Sutou 2010-08-03 06:42:07 +0000 (Tue, 03 Aug 2010)
New Revision: a440be4d479e5040562543a7836de595581f736a
Log:
work geo search with index again.
Modified files:
lib/db.c
Modified: lib/db.c (+207 -155)
===================================================================
--- lib/db.c 2010-07-28 08:24:12 +0000 (1953fea)
+++ lib/db.c 2010-08-03 06:42:07 +0000 (38ffd5a)
@@ -6509,105 +6509,142 @@ range_is_idp(grn_obj *obj)
#define GEO_GRS_C1 6335439
#define GEO_GRS_C2 6378137
#define GEO_GRS_C3 0.006694
-#define GEO_INT2RAD(x) ((M_PI / (GEO_RESOLUTION * 180)) * x)
+#define GEO_INT2RAD(x) ((M_PI / (GEO_RESOLUTION * 180)) * (x))
typedef struct {
grn_id id;
double d;
} geo_entry;
+static int
+compute_diff_bit(uint8_t *geo_key1, uint8_t *geo_key2)
+{
+ int i, diff_bit = 0;
+
+ for (i = 0; i < sizeof(grn_geo_point); i++) {
+ if (geo_key1[i] == geo_key2[i]) {
+ continue;
+ }
+
+ if ((geo_key1[i] & 0xc0) != (geo_key2[i] & 0xc0)) {
+ diff_bit = 0;
+ break;
+ } else if ((geo_key1[i] & 0x30) != (geo_key2[i] & 0x30)) {
+ diff_bit = 2;
+ break;
+ } else if ((geo_key1[i] & 0x0c) != (geo_key2[i] & 0x0c)) {
+ diff_bit = 4;
+ break;
+ } else if ((geo_key1[i] & 0x03) != (geo_key2[i] & 0x03)) {
+ diff_bit = 6;
+ break;
+ }
+ }
+
+ return i * 8 + diff_bit;
+}
+
static void
+compute_min_and_max(grn_geo_point *base_point, int diff_bit,
+ grn_geo_point *geo_min, grn_geo_point *geo_max)
+{
+ int diff_byte, diff_bit_mask;
+ uint8_t geo_key_base[sizeof(grn_geo_point)];
+ uint8_t geo_key_min[sizeof(grn_geo_point)];
+ uint8_t geo_key_max[sizeof(grn_geo_point)];
+
+ diff_byte = diff_bit / 8;
+ diff_bit_mask = 0xff >> (diff_bit % 8);
+ grn_gton(geo_key_base, base_point, sizeof(grn_geo_point));
+
+ memcpy(geo_key_min, geo_key_base, diff_byte + 1);
+ geo_key_min[diff_byte] &= ~diff_bit_mask;
+ memset(geo_key_min + diff_byte + 1, 0,
+ sizeof(grn_geo_point) - diff_byte - 1);
+
+ memcpy(geo_key_max, geo_key_base, diff_byte + 1);
+ geo_key_max[diff_byte] |= diff_bit_mask;
+ memset(geo_key_max + diff_byte + 1, 0xff,
+ sizeof(grn_geo_point) - diff_byte - 1);
+
+ grn_ntog((uint8_t *)geo_min, geo_key_min, sizeof(grn_geo_point));
+ grn_ntog((uint8_t *)geo_max, geo_key_max, sizeof(grn_geo_point));
+}
+
+static int
grn_table_sort_geo_detect_far_point(grn_ctx *ctx, grn_obj *table, grn_obj *index,
- grn_pat *pat,
+ grn_pat *pat, geo_entry *entries,
grn_pat_cursor *pc, int n, int accessorp,
grn_geo_point *base_point,
- int *lng_far, int *lat_far, double *d_far)
+ double *d_far,
+ grn_geo_point *geo_min, grn_geo_point *geo_max,
+ int *diff_bit)
{
- int i = 0;
+ int i = 0, diff_bit_prev, diff_bit_current;
grn_id tid;
+ geo_entry *ep, *p;
double lng1, lat1, lng2, lat2, x, y, d;
+ uint8_t geo_key_prev[sizeof(grn_geo_point)];
+ uint8_t geo_key_curr[sizeof(grn_geo_point)];
+ grn_geo_point point;
- *lng_far = base_point->longitude;
- *lat_far = base_point->latitude;
- lng1 = GEO_INT2RAD(*lng_far);
- lat1 = GEO_INT2RAD(*lat_far);
+ lng1 = GEO_INT2RAD(base_point->longitude);
+ lat1 = GEO_INT2RAD(base_point->latitude);
*d_far = 0.0;
- while (i < n && (tid = grn_pat_cursor_next(ctx, pc))) {
+ grn_gton(geo_key_curr, base_point, sizeof(grn_geo_point));
+ *diff_bit = sizeof(grn_geo_point) * 8;
+ diff_bit_current = sizeof(grn_geo_point) * 8;
+ memcpy(&point, base_point, sizeof(grn_geo_point));
+ ep = entries;
+ while ((tid = grn_pat_cursor_next(ctx, pc))) {
grn_ii_cursor *ic = grn_ii_cursor_open(ctx, (grn_ii *)index, tid, 0, 0, 1, 0);
if (ic) {
- grn_geo_point pos;
grn_ii_posting *posting;
- grn_pat_get_key(ctx, pat, tid, &pos, sizeof(grn_geo_point));
- lng2 = GEO_INT2RAD(pos.longitude);
- lat2 = GEO_INT2RAD(pos.latitude);
+ grn_gton(geo_key_prev, &point, sizeof(grn_geo_point));
+ grn_pat_get_key(ctx, pat, tid, &point, sizeof(grn_geo_point));
+ grn_gton(geo_key_curr, &point, sizeof(grn_geo_point));
+ lng2 = GEO_INT2RAD(point.longitude);
+ lat2 = GEO_INT2RAD(point.latitude);
x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5);
y = (lat2 - lat1);
d = ((x * x) + (y * y));
+
+ diff_bit_prev = diff_bit_current;
+ diff_bit_current = compute_diff_bit(geo_key_curr, geo_key_prev);
+ if (diff_bit_current < diff_bit_prev && *diff_bit > diff_bit_current) {
+ if (i == n) {
+ break;
+ }
+ *diff_bit = diff_bit_current;
+ }
+
if (d > *d_far) {
- *lng_far = pos.longitude;
- *lat_far = pos.latitude;
*d_far = d;
}
- while (i < n && (posting = grn_ii_cursor_next(ctx, ic))) {
+ while ((posting = grn_ii_cursor_next(ctx, ic))) {
grn_id rid = accessorp
? grn_table_get(ctx, table, &posting->rid, sizeof(grn_id))
: posting->rid;
if (rid) {
- i++;
+ for (p = ep; entries < p && p[-1].d > d; p--) {
+ p->id = p[-1].id;
+ p->d = p[-1].d;
+ }
+ p->id = rid;
+ p->d = d;
+ if (i < n) {
+ ep++;
+ i++;
+ }
}
}
grn_ii_cursor_close(ctx, ic);
}
}
-}
-static void
-grn_table_sort_geo_detect_mesh(grn_ctx *ctx, grn_obj *table,
- grn_geo_point *base_point,
- int lng_far, int lat_far, double d_far,
- grn_geo_point *geo_min, grn_geo_point *geo_max,
- int *diff_byte, int *diff_bit)
-{
- int i;
- int diff_bit_mask = 0;
- uint8_t geo_key_base[sizeof(grn_geo_point)];
- uint8_t geo_key_far[sizeof(grn_geo_point)];
- uint8_t geo_key_min[sizeof(grn_geo_point)];
- uint8_t geo_key_max[sizeof(grn_geo_point)];
- grn_geo_point point;
+ compute_min_and_max(base_point, *diff_bit, geo_min, geo_max);
- grn_gton(geo_key_base, base_point, sizeof(grn_geo_point));
- point.latitude = lat_far;
- point.longitude = lng_far;
- grn_gton(geo_key_far, &point, sizeof(grn_geo_point));
- for (i = 0; i < sizeof(grn_geo_point); i++) {
- if (((geo_key_base[i] >> 6) & 3) != ((geo_key_far[i] >> 6) & 3)) {
- *diff_bit = 0;
- diff_bit_mask = 0xff >> 0;
- break;
- } else if (((geo_key_base[i] >> 4) & 3) != ((geo_key_far[i] >> 4) & 3)) {
- *diff_bit = 2;
- diff_bit_mask = 0xff >> 2;
- break;
- } else if (((geo_key_base[i] >> 2) & 3) != ((geo_key_far[i] >> 2) & 3)) {
- *diff_bit = 4;
- diff_bit_mask = 0xff >> 4;
- break;
- } else if (((geo_key_base[i] >> 0) & 3) != ((geo_key_far[i] >> 0) & 3)) {
- *diff_bit = 6;
- diff_bit_mask = 0xff >> 6;
- break;
- }
- }
- *diff_byte = i;
- memcpy(geo_key_min, geo_key_base, i + 1);
- geo_key_min[i] &= ~diff_bit_mask;
- memset(geo_key_min + i + 1, 0, sizeof(grn_geo_point) - i - 1);
- memcpy(geo_key_max, geo_key_base, i + 1);
- geo_key_max[i] |= diff_bit_mask;
- memset(geo_key_max + i + 1, 0xff, sizeof(grn_geo_point) - i - 1);
- grn_ntog((uint8_t *)geo_min, geo_key_min, sizeof(grn_geo_point));
- grn_ntog((uint8_t *)geo_max, geo_key_max, sizeof(grn_geo_point));
+ return i;
}
typedef struct
@@ -6616,66 +6653,76 @@ typedef struct
int key_size;
} mesh_entry;
-static void
+static int
grn_table_sort_geo_collect_points(grn_ctx *ctx, grn_obj *table, grn_obj *index,
grn_pat *pat,
+ geo_entry *entries, int n_entries,
int n, int accessorp,
grn_geo_point *base_point,
- int lng_far, int lat_far, double d_far,
- geo_entry **entries, int *n_entries)
+ double d_far,
+ grn_geo_point *geo_min, grn_geo_point *geo_max,
+ int diff_bit)
{
- int n_meshes, e;
- grn_geo_point geo_min, geo_max, geo_base;
- mesh_entry meshes[20];
- int diff_byte = 0, diff_bit = 0, key_size;
+ int n_meshes;
+ grn_geo_point geo_base;
+ mesh_entry meshes[19];
int lat_diff, lng_diff;
double lng1, lat1, lng2, lat2, x, y, d;
geo_entry *ep, *p;
- grn_table_sort_geo_detect_mesh(ctx, table, base_point, lng_far, lat_far, d_far,
- &geo_min, &geo_max, &diff_byte, &diff_bit);
- key_size = diff_byte * 8 + diff_bit;
- lat1 = base_point->latitude;
- lng1 = base_point->longitude;
- lat_diff = geo_max.latitude - geo_min.latitude;
- lng_diff = geo_max.longitude - geo_min.longitude;
- if (lat1 >= geo_min.latitude + lat_diff / 2) {
- geo_base.latitude = geo_max.latitude;
- if (lng1 >= geo_max.longitude + lng_diff / 2) {
- geo_base.longitude = geo_max.longitude;
+ lat1 = GEO_INT2RAD(base_point->latitude);
+ lng1 = GEO_INT2RAD(base_point->longitude);
+ lat_diff = geo_max->latitude - geo_min->latitude;
+ lng_diff = geo_max->longitude - geo_min->longitude;
+ if (base_point->latitude >= geo_min->latitude + lat_diff / 2) {
+ geo_base.latitude = geo_max->latitude;
+ if (base_point->longitude >= geo_min->longitude + lng_diff / 2) {
+ geo_base.longitude = geo_max->longitude;
} else {
- geo_base.longitude = geo_min.longitude;
+ geo_base.longitude = geo_min->longitude;
}
} else {
- geo_base.latitude = geo_min.latitude;
- if (lng1 >= geo_max.longitude + lng_diff / 2) {
- geo_base.longitude = geo_max.longitude;
+ geo_base.latitude = geo_min->latitude;
+ if (base_point->longitude >= geo_min->longitude + lng_diff / 2) {
+ geo_base.longitude = geo_max->longitude;
} else {
- geo_base.longitude = geo_min.longitude;
+ geo_base.longitude = geo_min->longitude;
}
}
n_meshes = 0;
#define add_mesh(lat_diff_,lng_diff_,key_size_)\
- meshes[n_meshes].key.latitude = geo_min.latitude + lat_diff_;\
- meshes[n_meshes].key.longitude = geo_min.longitude + lng_diff_;\
+ meshes[n_meshes].key.latitude = geo_base.latitude + (lat_diff_);\
+ meshes[n_meshes].key.longitude = geo_base.longitude + (lng_diff_);\
meshes[n_meshes].key_size = key_size_;\
n_meshes++;
- add_mesh(0, 0, key_size);
- add_mesh(0, -lng_diff, key_size);
- add_mesh(-lat_diff, -lng_diff, key_size);
- add_mesh(-lat_diff, 0, key_size);
+ if (geo_base.latitude != geo_min->latitude ||
+ geo_base.longitude != geo_min->longitude) {
+ add_mesh(1, 1, diff_bit);
+ }
+ if (geo_base.latitude != geo_min->latitude ||
+ geo_base.longitude != geo_max->longitude) {
+ add_mesh(1, -lng_diff + 1, diff_bit);
+ }
+ if (geo_base.latitude != geo_max->latitude ||
+ geo_base.longitude != geo_max->longitude) {
+ add_mesh(-lat_diff + 1, -lng_diff + 1, diff_bit);
+ }
+ if (geo_base.latitude != geo_max->latitude ||
+ geo_base.longitude != geo_min->longitude) {
+ add_mesh(-lat_diff + 1, 1, diff_bit);
+ }
#define add_sub_mesh(lat_diff_cmp,lng_diff_cmp,lat_diff_base,lng_diff_base)\
- lat2 = geo_base.latitude + lat_diff_cmp;\
- lng2 = geo_base.longitude + lng_diff_cmp;\
+ lat2 = GEO_INT2RAD(geo_base.latitude + lat_diff_cmp);\
+ lng2 = GEO_INT2RAD(geo_base.longitude + lng_diff_cmp);\
x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5);\
y = (lat2 - lat1);\
d = ((x * x) + (y * y));\
- if (d > d_far) {\
- add_mesh(lat_diff_base, lng_diff_base, key_size + 2);\
+ if (d < d_far) {\
+ add_mesh(lat_diff_base, lng_diff_base, diff_bit + 2);\
}
add_sub_mesh(lat_diff + 1, -lng_diff / 2,
@@ -6717,53 +6764,51 @@ grn_table_sort_geo_collect_points(grn_ctx *ctx, grn_obj *table, grn_obj *index,
#undef add_sub_mesh
#undef add_mesh
- e = 0;
- if ((ep = *entries = GRN_MALLOC(sizeof(geo_entry) * (n + 1)))) {
+ ep = entries + n_entries;
+ while (n_meshes--) {
grn_id tid;
- while (n_meshes--) {
- grn_pat_cursor *pc = grn_pat_cursor_open(ctx, pat,
- &(meshes[n_meshes].key),
- meshes[n_meshes].key_size,
- NULL, 0,
- 0, -1,
- GRN_CURSOR_PREFIX|GRN_CURSOR_SIZE_BY_BIT);
- if (pc) {
- while ((tid = grn_pat_cursor_next(ctx, pc))) {
- grn_ii_cursor *ic = grn_ii_cursor_open(ctx, (grn_ii *)index, tid, 0, 0, 1, 0);
- if (ic) {
- grn_geo_point pos;
- grn_ii_posting *posting;
- grn_pat_get_key(ctx, pat, tid, &pos, sizeof(grn_geo_point));
- lng2 = GEO_INT2RAD(pos.longitude);
- lat2 = GEO_INT2RAD(pos.latitude);
- x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5);
- y = (lat2 - lat1);
- d = ((x * x) + (y * y));
- while ((posting = grn_ii_cursor_next(ctx, ic))) {
- grn_id rid = accessorp
- ? grn_table_get(ctx, table, &posting->rid, sizeof(grn_id))
- : posting->rid;
- if (rid) {
- for (p = ep; *entries < p && p[-1].d > d; p--) {
- p->id = p[-1].id;
- p->d = p[-1].d;
- }
- p->id = rid;
- p->d = d;
- if (e < n) {
- ep++;
- e++;
- }
+ grn_pat_cursor *pc = grn_pat_cursor_open(ctx, pat,
+ &(meshes[n_meshes].key),
+ meshes[n_meshes].key_size,
+ NULL, 0,
+ 0, -1,
+ GRN_CURSOR_PREFIX|GRN_CURSOR_SIZE_BY_BIT);
+ if (pc) {
+ while ((tid = grn_pat_cursor_next(ctx, pc))) {
+ grn_ii_cursor *ic = grn_ii_cursor_open(ctx, (grn_ii *)index, tid, 0, 0, 1, 0);
+ if (ic) {
+ grn_geo_point pos;
+ grn_ii_posting *posting;
+ grn_pat_get_key(ctx, pat, tid, &pos, sizeof(grn_geo_point));
+ lng2 = GEO_INT2RAD(pos.longitude);
+ lat2 = GEO_INT2RAD(pos.latitude);
+ x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5);
+ y = (lat2 - lat1);
+ d = ((x * x) + (y * y));
+ while ((posting = grn_ii_cursor_next(ctx, ic))) {
+ grn_id rid = accessorp
+ ? grn_table_get(ctx, table, &posting->rid, sizeof(grn_id))
+ : posting->rid;
+ if (rid) {
+ for (p = ep; entries < p && p[-1].d > d; p--) {
+ p->id = p[-1].id;
+ p->d = p[-1].d;
+ }
+ p->id = rid;
+ p->d = d;
+ if (n_entries < n) {
+ ep++;
+ n_entries++;
}
}
- grn_ii_cursor_close(ctx, ic);
}
+ grn_ii_cursor_close(ctx, ic);
}
- grn_pat_cursor_close(ctx, pc);
}
+ grn_pat_cursor_close(ctx, pc);
}
}
- *n_entries = e;
+ return n_entries;
}
static int
@@ -6797,32 +6842,39 @@ grn_table_sort_geo(grn_ctx *ctx, grn_obj *table, int offset, int limit,
grn_ii_cursor_close(ctx, ic);
}
}
+ grn_pat_cursor_close(ctx, pc);
} else {
- int lng_far, lat_far;
- double d_far;
- grn_geo_point *base_point;
- geo_entry *array = NULL, *ep;
- int n = 0;
- base_point = (grn_geo_point *)GRN_BULK_HEAD(arg);
- grn_table_sort_geo_detect_far_point(ctx, table, index, pat,
- pc, e, accessorp,
- base_point,
- &lat_far, &lng_far, &d_far);
- grn_table_sort_geo_collect_points(ctx, table, index, pat,
- e, accessorp,
- base_point,
- lat_far, lng_far, d_far,
- &array, &n);
- if (array) {
+ geo_entry *entries;
+
+ if ((entries = GRN_MALLOC(sizeof(geo_entry) * (e + 1)))) {
+ int n, diff_bit;
+ double d_far;
grn_id *v;
- for (i = 0, ep = array + offset; i < limit && ep < array + n; i++, ep++) {
+ grn_geo_point *base_point, geo_min, geo_max;
+ geo_entry *ep;
+
+ base_point = (grn_geo_point *)GRN_BULK_HEAD(arg);
+ n = grn_table_sort_geo_detect_far_point(ctx, table, index, pat,
+ entries, pc, e, accessorp,
+ base_point,
+ &d_far,
+ &geo_min, &geo_max,
+ &diff_bit);
+ grn_pat_cursor_close(ctx, pc);
+ if (diff_bit > 0) {
+ n += grn_table_sort_geo_collect_points(ctx, table, index, pat,
+ entries, n, e, accessorp,
+ base_point, d_far,
+ &geo_min, &geo_max,
+ diff_bit);
+ }
+ for (i = 0, ep = entries + offset; i < limit && ep < entries + n; i++, ep++) {
if (!grn_array_add(ctx, (grn_array *)result, (void **)&v)) { break; }
*v = ep->id;
}
- GRN_FREE(array);
+ GRN_FREE(entries);
}
}
- grn_pat_cursor_close(ctx, pc);
}
}
return i;