Kouhei Sutou
null+****@clear*****
Wed Jun 22 00:42:14 JST 2016
Kouhei Sutou 2016-06-22 00:42:14 +0900 (Wed, 22 Jun 2016) New Revision: 71c56dab010713c3f9990841ea6ee60969be21a1 https://github.com/groonga/groonga/commit/71c56dab010713c3f9990841ea6ee60969be21a1 Message: Add benchmark for NFKC implementation Added files: benchmark/bench-nfkc.c Modified files: .gitignore benchmark/Makefile.am Modified: .gitignore (+1 -0) =================================================================== --- .gitignore 2016-06-22 00:28:31 +0900 (7e1903d) +++ .gitignore 2016-06-22 00:42:14 +0900 (2134e98) @@ -106,6 +106,7 @@ CMakeFiles /benchmark/bench-range-select /benchmark/bench-result-set /benchmark/bench-between-sequential +/benchmark/bench-nfkc /packages/*/*.log /packages/apt/env.sh /packages/apt/debian/groonga-keyring.postrm Modified: benchmark/Makefile.am (+11 -2) =================================================================== --- benchmark/Makefile.am 2016-06-22 00:28:31 +0900 (d128fde) +++ benchmark/Makefile.am 2016-06-22 00:42:14 +0900 (bdc4698) @@ -13,7 +13,8 @@ noinst_PROGRAMS = \ bench-query-optimizer \ bench-range-select \ bench-result-set \ - bench-between-sequential + bench-between-sequential \ + bench-nfkc endif EXTRA_DIST = \ @@ -60,6 +61,9 @@ nodist_EXTRA_bench_result_set_SOURCES = $(NONEXISTENT_CXX_SOURCE) bench_between_sequential_SOURCES = bench-between-sequential.c nodist_EXTRA_bench_between_sequential_SOURCES = $(NONEXISTENT_CXX_SOURCE) +bench_nfkc_SOURCES = bench-nfkc.c +nodist_EXTRA_bench_nfkc_SOURCES = $(NONEXISTENT_CXX_SOURCE) + benchmarks = \ run-bench-table-factory \ run-bench-geo-distance \ @@ -68,7 +72,8 @@ benchmarks = \ run-bench-query-optimizer \ run-bench-range-select \ run-bench-result-set \ - run-bench-between-sequential + run-bench-between-sequential \ + run-bench-nfkc run-bench-table-factory: bench-table-factory @echo $@: @@ -132,4 +137,8 @@ run-bench-between-sequential: bench-between-sequential GRN_RUBY_SCRIPTS_DIR="$(top_srcdir)/lib/mrb/scripts" \ ./bench-between-sequential +run-bench-nfkc: bench-nfkc + @echo $@: + ./bench-nfkc + benchmark: $(benchmarks) Added: benchmark/bench-nfkc.c (+239 -0) 100644 =================================================================== --- /dev/null +++ benchmark/bench-nfkc.c 2016-06-22 00:42:14 +0900 (7c601fc) @@ -0,0 +1,239 @@ +/* -*- c-basic-offset: 2; coding: utf-8 -*- */ +/* + Copyright (C) 2015-2016 Kouhei Sutou <kou �� clear-code.com> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include <stdlib.h> +#include <stdio.h> + +#include <glib.h> + +#include <groonga.h> + +#include "lib/benchmark.h" + +const char *grn_nfkc_map1(const unsigned char *str); +const char *grn_nfkc50_map1(const unsigned char *str); +const char *grn_nfkc_map2(const unsigned char *prefix, + const unsigned char *suffix); +const char *grn_nfkc50_map2(const unsigned char *prefix, + const unsigned char *suffix); + +#include "../lib/nfkc.c" +#include "../lib/nfkc50.c" + +#define MAX_UNICODE 0x110000 +#define BUFFER_SIZE 0x100 + +static int +ucs2utf8(unsigned int i, unsigned char *buf) +{ + unsigned char *p = buf; + if (i < 0x80) { + *p++ = i; + } else { + if (i < 0x800) { + *p++ = (i >> 6) | 0xc0; + } else { + if (i < 0x00010000) { + *p++ = (i >> 12) | 0xe0; + } else { + if (i < 0x00200000) { + *p++ = (i >> 18) | 0xf0; + } else { + if (i < 0x04000000) { + *p++ = (i >> 24) | 0xf8; + } else if (i < 0x80000000) { + *p++ = (i >> 30) | 0xfc; + *p++ = ((i >> 24) & 0x3f) | 0x80; + } + *p++ = ((i >> 18) & 0x3f) | 0x80; + } + *p++ = ((i >> 12) & 0x3f) | 0x80; + } + *p++ = ((i >> 6) & 0x3f) | 0x80; + } + *p++ = (0x3f & i) | 0x80; + } + *p = '\0'; + return (p - buf); +} + +static void +bench_map1_switch(gpointer user_data) +{ + uint64_t code_point; + char utf8[7]; + + for (code_point = 1; code_point < MAX_UNICODE; code_point++) { + ucs2utf8(code_point, (unsigned char *)utf8); + grn_nfkc_map1(utf8); + } +} + +static void +bench_map1_table(gpointer user_data) +{ + uint64_t code_point; + char utf8[7]; + + for (code_point = 1; code_point < MAX_UNICODE; code_point++) { + ucs2utf8(code_point, (unsigned char *)utf8); + grn_nfkc50_map1(utf8); + } +} + +static void +bench_map2_switch(gpointer user_data) +{ + uint64_t prefix_code_point; + uint64_t suffix_code_point = 0x11ba; + char prefix_utf8[7]; + char suffix_utf8[7]; + + ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8); + for (prefix_code_point = 1; + prefix_code_point < MAX_UNICODE; + prefix_code_point++) { + ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8); + grn_nfkc_map2(prefix_utf8, suffix_utf8); + } +} + +static void +bench_map2_table(gpointer user_data) +{ + uint64_t prefix_code_point; + uint64_t suffix_code_point = 0x11ba; + char prefix_utf8[7]; + char suffix_utf8[7]; + + ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8); + for (prefix_code_point = 1; + prefix_code_point < MAX_UNICODE; + prefix_code_point++) { + ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8); + grn_nfkc50_map2(prefix_utf8, suffix_utf8); + } +} + +static void +check_map1(gpointer user_data) +{ + uint64_t code_point; + char utf8[7]; + + for (code_point = 1; code_point < MAX_UNICODE; code_point++) { + const char *a; + const char *b; + + ucs2utf8(code_point, (unsigned char *)utf8); + a = grn_nfkc_map1(utf8); + b = grn_nfkc50_map1(utf8); + if (a == b) { + continue; + } + if (!a || !b) { + printf("%lx: %s: %s != %s\n", code_point, utf8, a, b); + continue; + } + if (strcmp(a, b) != 0) { + printf("%lx: %s: %s != %s\n", code_point, utf8, a, b); + } + } +} + +static void +check_map2(gpointer user_data) +{ + uint64_t prefix_code_point; + uint64_t suffix_code_point; + char prefix_utf8[7]; + char suffix_utf8[7]; + + for (prefix_code_point = 1; + prefix_code_point < MAX_UNICODE; + prefix_code_point++) { + ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8); + for (suffix_code_point = 1; + suffix_code_point < MAX_UNICODE; + suffix_code_point++) { + const char *a; + const char *b; + + ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8); + a = grn_nfkc_map2(prefix_utf8, suffix_utf8); + b = grn_nfkc50_map2(prefix_utf8, suffix_utf8); + if (a == b) { + continue; + } + if (!a || !b) { + printf("%lx-%lx: %s-%s: %s != %s\n", + prefix_code_point, suffix_code_point, + prefix_utf8, suffix_utf8, + a, b); + continue; + } + if (strcmp(a, b) != 0) { + printf("%lx-%lx: %s-%s: %s != %s\n", + prefix_code_point, suffix_code_point, + prefix_utf8, suffix_utf8, + a, b); + } + } + if ((prefix_code_point % 10000) == 0) { + printf("%" G_GUINT64_FORMAT "\n", prefix_code_point); + } + } +} + +int +main(int argc, gchar **argv) +{ + grn_rc rc; + BenchReporter *reporter; + gint n = 1; + + rc = grn_init(); + if (rc != GRN_SUCCESS) { + g_print("failed to initialize Groonga: <%d>: %s\n", + rc, grn_get_global_error_message()); + return EXIT_FAILURE; + } + bench_init(&argc, &argv); + + reporter = bench_reporter_new(); + +#define REGISTER(label, bench_function) \ + bench_reporter_register(reporter, label, n, \ + NULL, \ + bench_function, \ + NULL, \ + NULL) + REGISTER("map1 - switch", bench_map1_switch); + REGISTER("map1 - table", bench_map1_table); + REGISTER("map2 - switch", bench_map2_switch); + REGISTER("map2 - table", bench_map2_table); + + /* REGISTER("check - map1", check_map1); */ + /* REGISTER("check - map2", check_map2); */ +#undef REGISTER + + bench_reporter_run(reporter); + g_object_unref(reporter); + + return EXIT_SUCCESS; +} -------------- next part -------------- HTML����������������������������... Download