null+****@clear*****
null+****@clear*****
2011年 10月 15日 (土) 17:28:19 JST
Kouhei Sutou 2011-10-15 08:28:19 +0000 (Sat, 15 Oct 2011)
New Revision: e65e87f0039f6bb2da3d9b959423774d3f66b567
Log:
[benchmark][geo][select] add a benchmark.
Added files:
test/benchmark/bench-geo-select.c
test/benchmark/bench-geo-select.sh
test/benchmark/geo-select-generate-grn.rb
Modified files:
.gitignore
test/benchmark/Makefile.am
Modified: .gitignore (+4 -0)
===================================================================
--- .gitignore 2011-10-13 14:36:30 +0000 (cb0bcc1)
+++ .gitignore 2011-10-15 08:28:19 +0000 (d0f2ccc)
@@ -62,6 +62,10 @@ version.sh
/coverage
/coverage.info
/test/unit/lib/*-*.*.*/
+/test/benchmark/tmp/
+/test/benchmark/fixtures/geo-select/13_2010.CSV
+/test/benchmark/fixtures/geo-select/load.grn
+/test/benchmark/bench-geo-select
/packages/apt/debian/pool/*/*/*/*/*.diff.gz
/packages/apt/debian/pool/*/*/*/*/*.tar.gz
/packages/apt/debian/pool/*/*/*/*/*.build
Modified: test/benchmark/Makefile.am (+25 -6)
===================================================================
--- test/benchmark/Makefile.am 2011-10-13 14:36:30 +0000 (d84a7b7)
+++ test/benchmark/Makefile.am 2011-10-15 08:28:19 +0000 (8a429cc)
@@ -5,7 +5,8 @@ SUBDIRS = \
if WITH_BENCHMARK
noinst_PROGRAMS = \
bench-table-factory \
- bench-geo-distance
+ bench-geo-distance \
+ bench-geo-select
endif
INCLUDES = \
@@ -25,9 +26,27 @@ LIBS = \
bench_table_factory_SOURCES = bench-table-factory.c
bench_geo_distance_SOURCES = bench-geo-distance.c
+bench_geo_select_SOURCES = bench-geo-select.c
-benchmark:
- @for benchmark in $(noinst_PROGRAMS); do \
- echo $${benchmark}:; \
- ./$${benchmark}; \
- done
+benchmarks = \
+ run-bench-table-factory \
+ run-bench-geo-distance \
+ run-bench-geo-select
+
+run-bench-table-factory: bench-table-factory
+ @echo $@:
+ ./bench-table-factory
+
+run-bench-geo-distance: bench-geo-distance
+ @echo $@:
+ ./bench-geo-distance
+
+run-bench-geo-select: bench-geo-select
+ @echo $@:
+ env \
+ RUBY="$(RUBY)" \
+ GROONGA="$(GROONGA)" \
+ srcdir="$(srcdir)" \
+ $(srcdir)/bench-geo-select.sh
+
+benchmark: $(benchmarks)
Added: test/benchmark/bench-geo-select.c (+195 -0) 100644
===================================================================
--- /dev/null
+++ test/benchmark/bench-geo-select.c 2011-10-15 08:28:19 +0000 (78070ad)
@@ -0,0 +1,195 @@
+/* -*- c-basic-offset: 2; coding: utf-8 -*- */
+/*
+ Copyright (C) 2011 Kouhei Sutou <kou****@clear*****>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+/*
+ groonga: 3ad91b868909444f66a36dbcbdbe2292ed14bd72
+ CFLAGS: -O0 -g
+ CPU: Intel(R) Core(TM) i5 CPU U 470 @ 1.33GHz stepping 05
+
+ % (cd test/benchmark; make --quiet run-bench-geo-select)
+ run-bench-geo-select:
+ (time)
+ select_in_rectangle: (1.84038)
+*/
+
+#include <string.h>
+
+#include <db.h>
+#include <groonga.h>
+
+#include "lib/benchmark.h"
+
+#define GET(context, name) (grn_ctx_get(context, name, strlen(name)))
+
+typedef struct _BenchmarkData
+{
+ gboolean report_result;
+
+ grn_ctx *context;
+ grn_obj *database;
+ grn_obj *table;
+ grn_obj *index_column;
+ grn_obj *result;
+
+ grn_obj top_left_point;
+ grn_obj bottom_right_point;
+} BenchmarkData;
+
+static void
+set_geo_point(grn_ctx *context, grn_obj *geo_point, const gchar *geo_point_text)
+{
+ grn_obj point_text;
+
+ GRN_TEXT_INIT(&point_text, 0);
+ GRN_TEXT_PUTS(context, &point_text, geo_point_text);
+ grn_obj_cast(context, &point_text, geo_point, GRN_FALSE);
+ grn_obj_unlink(context, &point_text);
+}
+
+static void
+bench_setup(gpointer user_data)
+{
+ BenchmarkData *data = user_data;
+ const gchar *tokyo_station = "35.68136,139.76609";
+ const gchar *ikebukuro_station = "35.72890,139.71036";
+
+ data->result = grn_table_create(data->context, NULL, 0, NULL,
+ GRN_OBJ_TABLE_HASH_KEY | GRN_OBJ_WITH_SUBREC,
+ data->table, NULL);
+
+ set_geo_point(data->context, &(data->top_left_point),
+ ikebukuro_station);
+ set_geo_point(data->context, &(data->bottom_right_point),
+ tokyo_station);
+}
+
+static void
+bench_geo_select_in_rectangle(gpointer user_data)
+{
+ BenchmarkData *data = user_data;
+
+ grn_geo_select_in_rectangle(data->context,
+ data->index_column,
+ &(data->top_left_point),
+ &(data->bottom_right_point),
+ data->result,
+ GRN_OP_OR);
+}
+
+static void
+bench_teardown(gpointer user_data)
+{
+ BenchmarkData *data = user_data;
+
+ if (data->report_result) {
+ g_print("result: %d\n", grn_table_size(data->context, data->result));
+ }
+
+ grn_obj_unlink(data->context, data->result);
+}
+
+static gchar *
+get_tmp_dir(void)
+{
+ gchar *current_dir;
+ gchar *tmp_dir;
+
+ current_dir = g_get_current_dir();
+ tmp_dir = g_build_filename(current_dir, "tmp", NULL);
+ g_free(current_dir);
+
+ return tmp_dir;
+}
+
+static void
+setup_database(BenchmarkData *data)
+{
+ gchar *tmp_dir;
+ gchar *database_path;
+
+ tmp_dir = get_tmp_dir();
+ database_path = g_build_filename(tmp_dir, "geo-select", "db", NULL);
+ data->database = grn_db_open(data->context, database_path);
+
+ data->table = GET(data->context, "Addresses");
+ data->index_column = GET(data->context, "Locations.address");
+
+ g_free(database_path);
+}
+
+static void
+teardown_database(BenchmarkData *data)
+{
+ grn_obj_unlink(data->context, data->index_column);
+ grn_obj_unlink(data->context, data->table);
+ grn_obj_unlink(data->context, data->database);
+}
+
+int
+main(int argc, gchar **argv)
+{
+ BenchmarkData data;
+ BenchReporter *reporter;
+ gint n = 100;
+
+ grn_init();
+ bench_init(&argc, &argv);
+
+ data.report_result = g_getenv("GROONGA_BENCH_REPORT_RESULT") != NULL;
+
+ data.context = g_new(grn_ctx, 1);
+ grn_ctx_init(data.context, 0);
+
+ setup_database(&data);
+ GRN_WGS84_GEO_POINT_INIT(&(data.top_left_point), 0);
+ GRN_WGS84_GEO_POINT_INIT(&(data.bottom_right_point), 0);
+
+ {
+ const gchar *groonga_bench_n;
+ groonga_bench_n = g_getenv("GROONGA_BENCH_N");
+ if (groonga_bench_n) {
+ n = atoi(groonga_bench_n);
+ }
+ }
+
+ reporter = bench_reporter_new();
+
+#define REGISTER(label, type) \
+ bench_reporter_register(reporter, label, n, \
+ bench_setup, \
+ bench_geo_select_ ## type, \
+ bench_teardown, \
+ &data)
+ REGISTER("select_in_rectangle", in_rectangle);
+#undef REGISTER
+
+ bench_reporter_run(reporter);
+ g_object_unref(reporter);
+
+ grn_obj_unlink(data.context, &(data.top_left_point));
+ grn_obj_unlink(data.context, &(data.bottom_right_point));
+ teardown_database(&data);
+
+ grn_ctx_fin(data.context);
+ g_free(data.context);
+
+ bench_quit();
+ grn_fin();
+
+ return 0;
+}
Added: test/benchmark/bench-geo-select.sh (+38 -0) 100755
===================================================================
--- /dev/null
+++ test/benchmark/bench-geo-select.sh 2011-10-15 08:28:19 +0000 (7e84c8f)
@@ -0,0 +1,38 @@
+#!/bin/sh
+
+base_dir="$(dirname $0)"
+
+fixture_dir="${srcdir}/fixtures/geo-select"
+data_dir="${base_dir}/fixtures/geo-select"
+csv_xz="${fixture_dir}/13_2010.CSV.xz"
+csv="${data_dir}/13_2010.CSV"
+grn="${data_dir}/load.grn"
+
+geo_select_generate_grn_rb="${base_dir}/geo-select-generate-grn.rb"
+
+db="${base_dir}/tmp/geo-select/db"
+
+bench_geo_select="./bench-geo-select"
+
+mkdir -p "${data_dir}"
+if [ ! -s "${csv}" ] || [ "${csv}" -ot "${csv_xz}" ]; then
+ echo "extracting ${csv_xz}..."
+ xzcat "${csv_xz}" | iconv --from-code cp932 --to-code utf-8 > "${csv}"
+fi
+
+if [ ! -s "${grn}" ] || [ "${grn}" -ot "${csv}" ]; then
+ echo "generating test data..."
+ "${RUBY}" "${geo_select_generate_grn_rb}" "${csv}" "${grn}"
+fi
+
+if [ ! -s "${db}" ] || [ "${db}" -ot "${grn}" ]; then
+ echo "creating test database..."
+ rm -rf "$(dirname ${db})"
+ mkdir -p "$(dirname ${db})"
+ "${GROONGA}" -n "${db}" < "${grn}"
+fi
+
+if [ "${GROONGA_BENCH_DEBUG}" = "yes" ]; then
+ bench_geo_select="../../libtool --mode=execute gdb --args ${bench_geo_select}"
+fi
+${bench_geo_select}
Added: test/benchmark/geo-select-generate-grn.rb (+51 -0) 100755
===================================================================
--- /dev/null
+++ test/benchmark/geo-select-generate-grn.rb 2011-10-15 08:28:19 +0000 (f8654b0)
@@ -0,0 +1,51 @@
+#!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
+
+if ARGV.size != 2
+ puts "Usage: #{$0} SOURCE_CSV OUTPUT_GRN"
+ puts " e.g.: #{$0} fixtures/geo-select/13_2010.CSV fixtures/geo-select/load.grn"
+ exit(false)
+end
+
+csv, grn = ARGV
+
+require "fileutils"
+require "csv"
+
+FileUtils.mkdir_p(File.dirname(grn))
+File.open(grn, "w") do |output|
+ output.print(<<-EOH.strip)
+table_create Addresses TABLE_HASH_KEY ShortText
+column_create Addresses location COLUMN_SCALAR WGS84GeoPoint
+
+table_create Locations TABLE_PAT_KEY WGS84GeoPoint
+column_create Locations address COLUMN_INDEX Addresses location
+
+load --table Addresses
+[
+["_key", "location"]
+EOH
+
+ headers = nil
+ csv_foreach_args = [csv]
+ csv_foreach_args << {:encoding => "UTF-8"} if defined?(Encoding)
+ CSV.foreach(*csv_foreach_args) do |row|
+ if headers.nil?
+ headers = row
+ else
+ record = {}
+ headers.each_with_index do |header, i|
+ record[header] = row[i]
+ end
+ name =
+ record["都道府県名"] + record["市区町村名"] +
+ record["大字・町丁目"] + record["街区符号・地番"]
+ location = "%sx%s" % [record["緯度"], record["経度"]]
+ output.print(",\n[\"#{name}\", \"#{location}\"]")
+ end
+ end
+ output.print(<<-EOF)
+
+]
+EOF
+end