[Groonga-commit] groonga/groonga at 71c56da [master] Add benchmark for NFKC implementation

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Jun 22 00:42:14 JST 2016


Kouhei Sutou	2016-06-22 00:42:14 +0900 (Wed, 22 Jun 2016)

  New Revision: 71c56dab010713c3f9990841ea6ee60969be21a1
  https://github.com/groonga/groonga/commit/71c56dab010713c3f9990841ea6ee60969be21a1

  Message:
    Add benchmark for NFKC implementation

  Added files:
    benchmark/bench-nfkc.c
  Modified files:
    .gitignore
    benchmark/Makefile.am

  Modified: .gitignore (+1 -0)
===================================================================
--- .gitignore    2016-06-22 00:28:31 +0900 (7e1903d)
+++ .gitignore    2016-06-22 00:42:14 +0900 (2134e98)
@@ -106,6 +106,7 @@ CMakeFiles
 /benchmark/bench-range-select
 /benchmark/bench-result-set
 /benchmark/bench-between-sequential
+/benchmark/bench-nfkc
 /packages/*/*.log
 /packages/apt/env.sh
 /packages/apt/debian/groonga-keyring.postrm

  Modified: benchmark/Makefile.am (+11 -2)
===================================================================
--- benchmark/Makefile.am    2016-06-22 00:28:31 +0900 (d128fde)
+++ benchmark/Makefile.am    2016-06-22 00:42:14 +0900 (bdc4698)
@@ -13,7 +13,8 @@ noinst_PROGRAMS =				\
 	bench-query-optimizer			\
 	bench-range-select			\
 	bench-result-set			\
-	bench-between-sequential
+	bench-between-sequential		\
+	bench-nfkc
 endif
 
 EXTRA_DIST =					\
@@ -60,6 +61,9 @@ nodist_EXTRA_bench_result_set_SOURCES = $(NONEXISTENT_CXX_SOURCE)
 bench_between_sequential_SOURCES = bench-between-sequential.c
 nodist_EXTRA_bench_between_sequential_SOURCES = $(NONEXISTENT_CXX_SOURCE)
 
+bench_nfkc_SOURCES = bench-nfkc.c
+nodist_EXTRA_bench_nfkc_SOURCES = $(NONEXISTENT_CXX_SOURCE)
+
 benchmarks =					\
 	run-bench-table-factory			\
 	run-bench-geo-distance			\
@@ -68,7 +72,8 @@ benchmarks =					\
 	run-bench-query-optimizer		\
 	run-bench-range-select			\
 	run-bench-result-set			\
-	run-bench-between-sequential
+	run-bench-between-sequential		\
+	run-bench-nfkc
 
 run-bench-table-factory: bench-table-factory
 	@echo $@:
@@ -132,4 +137,8 @@ run-bench-between-sequential: bench-between-sequential
 	  GRN_RUBY_SCRIPTS_DIR="$(top_srcdir)/lib/mrb/scripts"	\
 	  ./bench-between-sequential
 
+run-bench-nfkc: bench-nfkc
+	@echo $@:
+	./bench-nfkc
+
 benchmark: $(benchmarks)

  Added: benchmark/bench-nfkc.c (+239 -0) 100644
===================================================================
--- /dev/null
+++ benchmark/bench-nfkc.c    2016-06-22 00:42:14 +0900 (7c601fc)
@@ -0,0 +1,239 @@
+/* -*- c-basic-offset: 2; coding: utf-8 -*- */
+/*
+  Copyright (C) 2015-2016  Kouhei Sutou <kou �� clear-code.com>
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <glib.h>
+
+#include <groonga.h>
+
+#include "lib/benchmark.h"
+
+const char *grn_nfkc_map1(const unsigned char *str);
+const char *grn_nfkc50_map1(const unsigned char *str);
+const char *grn_nfkc_map2(const unsigned char *prefix,
+                          const unsigned char *suffix);
+const char *grn_nfkc50_map2(const unsigned char *prefix,
+                            const unsigned char *suffix);
+
+#include "../lib/nfkc.c"
+#include "../lib/nfkc50.c"
+
+#define MAX_UNICODE 0x110000
+#define BUFFER_SIZE 0x100
+
+static int
+ucs2utf8(unsigned int i, unsigned char *buf)
+{
+  unsigned char *p = buf;
+  if (i < 0x80) {
+    *p++ = i;
+  } else {
+    if (i < 0x800) {
+      *p++ = (i >> 6) | 0xc0;
+    } else {
+      if (i < 0x00010000) {
+        *p++ = (i >> 12) | 0xe0;
+      } else {
+        if (i < 0x00200000) {
+          *p++ = (i >> 18) | 0xf0;
+        } else {
+          if (i < 0x04000000) {
+            *p++ = (i >> 24) | 0xf8;
+          } else if (i < 0x80000000) {
+            *p++ = (i >> 30) | 0xfc;
+            *p++ = ((i >> 24) & 0x3f) | 0x80;
+          }
+          *p++ = ((i >> 18) & 0x3f) | 0x80;
+        }
+        *p++ = ((i >> 12) & 0x3f) | 0x80;
+      }
+      *p++ = ((i >> 6) & 0x3f) | 0x80;
+    }
+    *p++ = (0x3f & i) | 0x80;
+  }
+  *p = '\0';
+  return (p - buf);
+}
+
+static void
+bench_map1_switch(gpointer user_data)
+{
+  uint64_t code_point;
+  char utf8[7];
+
+  for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
+    ucs2utf8(code_point, (unsigned char *)utf8);
+    grn_nfkc_map1(utf8);
+  }
+}
+
+static void
+bench_map1_table(gpointer user_data)
+{
+  uint64_t code_point;
+  char utf8[7];
+
+  for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
+    ucs2utf8(code_point, (unsigned char *)utf8);
+    grn_nfkc50_map1(utf8);
+  }
+}
+
+static void
+bench_map2_switch(gpointer user_data)
+{
+  uint64_t prefix_code_point;
+  uint64_t suffix_code_point = 0x11ba;
+  char prefix_utf8[7];
+  char suffix_utf8[7];
+
+  ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
+  for (prefix_code_point = 1;
+       prefix_code_point < MAX_UNICODE;
+       prefix_code_point++) {
+    ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
+    grn_nfkc_map2(prefix_utf8, suffix_utf8);
+  }
+}
+
+static void
+bench_map2_table(gpointer user_data)
+{
+  uint64_t prefix_code_point;
+  uint64_t suffix_code_point = 0x11ba;
+  char prefix_utf8[7];
+  char suffix_utf8[7];
+
+  ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
+  for (prefix_code_point = 1;
+       prefix_code_point < MAX_UNICODE;
+       prefix_code_point++) {
+    ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
+    grn_nfkc50_map2(prefix_utf8, suffix_utf8);
+  }
+}
+
+static void
+check_map1(gpointer user_data)
+{
+  uint64_t code_point;
+  char utf8[7];
+
+  for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
+    const char *a;
+    const char *b;
+
+    ucs2utf8(code_point, (unsigned char *)utf8);
+    a = grn_nfkc_map1(utf8);
+    b = grn_nfkc50_map1(utf8);
+    if (a == b) {
+      continue;
+    }
+    if (!a || !b) {
+      printf("%lx: %s: %s != %s\n", code_point, utf8, a, b);
+      continue;
+    }
+    if (strcmp(a, b) != 0) {
+      printf("%lx: %s: %s != %s\n", code_point, utf8, a, b);
+    }
+  }
+}
+
+static void
+check_map2(gpointer user_data)
+{
+  uint64_t prefix_code_point;
+  uint64_t suffix_code_point;
+  char prefix_utf8[7];
+  char suffix_utf8[7];
+
+  for (prefix_code_point = 1;
+       prefix_code_point < MAX_UNICODE;
+       prefix_code_point++) {
+    ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
+    for (suffix_code_point = 1;
+         suffix_code_point < MAX_UNICODE;
+         suffix_code_point++) {
+      const char *a;
+      const char *b;
+
+      ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
+      a = grn_nfkc_map2(prefix_utf8, suffix_utf8);
+      b = grn_nfkc50_map2(prefix_utf8, suffix_utf8);
+      if (a == b) {
+        continue;
+      }
+      if (!a || !b) {
+        printf("%lx-%lx: %s-%s: %s != %s\n",
+               prefix_code_point, suffix_code_point,
+               prefix_utf8, suffix_utf8,
+               a, b);
+        continue;
+      }
+      if (strcmp(a, b) != 0) {
+        printf("%lx-%lx: %s-%s: %s != %s\n",
+               prefix_code_point, suffix_code_point,
+               prefix_utf8, suffix_utf8,
+               a, b);
+      }
+    }
+    if ((prefix_code_point % 10000) == 0) {
+      printf("%" G_GUINT64_FORMAT "\n", prefix_code_point);
+    }
+  }
+}
+
+int
+main(int argc, gchar **argv)
+{
+  grn_rc rc;
+  BenchReporter *reporter;
+  gint n = 1;
+
+  rc = grn_init();
+  if (rc != GRN_SUCCESS) {
+    g_print("failed to initialize Groonga: <%d>: %s\n",
+            rc, grn_get_global_error_message());
+    return EXIT_FAILURE;
+  }
+  bench_init(&argc, &argv);
+
+  reporter = bench_reporter_new();
+
+#define REGISTER(label, bench_function)                 \
+  bench_reporter_register(reporter, label, n,           \
+                          NULL,                         \
+                          bench_function,               \
+                          NULL,                         \
+                          NULL)
+  REGISTER("map1 - switch", bench_map1_switch);
+  REGISTER("map1 -  table", bench_map1_table);
+  REGISTER("map2 - switch", bench_map2_switch);
+  REGISTER("map2 -  table", bench_map2_table);
+
+  /* REGISTER("check - map1", check_map1); */
+  /* REGISTER("check - map2", check_map2); */
+#undef REGISTER
+
+  bench_reporter_run(reporter);
+  g_object_unref(reporter);
+
+  return EXIT_SUCCESS;
+}
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index