null+****@clear*****
null+****@clear*****
2010年 12月 16日 (木) 22:04:13 JST
Kouhei Sutou 2010-12-16 13:04:13 +0000 (Thu, 16 Dec 2010)
New Revision: 30c0c29a2b344b42e65462f07e665aa624e055d4
Log:
implement create-dataset by C. #768
Added files:
src/suggest/groonga_suggest_create_dataset.c
Removed files:
src/suggest/create-dataset.sh
Modified files:
.gitignore
src/suggest/Makefile.am
Modified: .gitignore (+1 -0)
===================================================================
--- .gitignore 2010-12-16 13:02:50 +0000 (55db794)
+++ .gitignore 2010-12-16 13:04:13 +0000 (279debb)
@@ -81,3 +81,4 @@ version.sh
/misc/
/src/suggest/groonga-suggest-httpd
/src/suggest/groonga-suggest-learner
+/src/suggest/groonga-suggest-create-dataset
Modified: src/suggest/Makefile.am (+10 -2)
===================================================================
--- src/suggest/Makefile.am 2010-12-16 13:02:50 +0000 (6ba7af9)
+++ src/suggest/Makefile.am 2010-12-16 13:04:13 +0000 (7d6d8ae)
@@ -1,7 +1,9 @@
if ENABLE_SUGGEST_LEARNER
-bin_PROGRAMS = groonga-suggest-learner groonga-suggest-httpd
+bin_PROGRAMS = \
+ groonga-suggest-learner \
+ groonga-suggest-httpd \
+ groonga-suggest-create-dataset
noinst_LTLIBRARIES = libutil.la
-EXTRA_DIST = create-dataset.sh
endif
AM_CFLAGS = -fno-strict-aliasing $(COVERAGE_CFLAGS) $(GRN_CFLAGS)
@@ -39,4 +41,10 @@ groonga_suggest_httpd_LDADD = \
$(LIBZMQ_LIBS) \
$(MESSAGE_PACK_LIBS)
+groonga_suggest_create_dataset_SOURCES = groonga_suggest_create_dataset.c
+groonga_suggest_create_dataset_CFLAGS = \
+ $(AM_CFLAGS)
+groonga_suggest_create_dataset_LDADD = \
+ $(top_builddir)/lib/libgroonga.la
+
libutil_la_SOURCES = util.c
Deleted: src/suggest/create-dataset.sh (+0 -48) 100755
===================================================================
--- src/suggest/create-dataset.sh 2010-12-16 13:02:50 +0000 (0691a7c)
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/sh
-# Making table-set for groonga suggestion function.
-# (c) Brazil, Inc.
-
-if [ 2 != $# ]; then
- echo "usage: $0 dbpath dataset_name"
- exit 1
-fi
-
-DBPATH=$1
-DATASET=$2
-
-if [ ! -f ${DBPATH} ]; then
- echo "quit" | groonga -n ${DBPATH}
-fi
-
-groonga ${DBPATH} <<_EOT_
-register suggest/suggest
-table_create event_type TABLE_HASH_KEY ShortText
-table_create bigram TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram
-table_create kana TABLE_PAT_KEY|KEY_NORMALIZE ShortText
-
-table_create item_${DATASET} TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenDelimit
-column_create bigram item_${DATASET}_key COLUMN_INDEX|WITH_POSITION item_${DATASET} _key
-column_create item_${DATASET} kana COLUMN_VECTOR kana
-column_create kana item_${DATASET}_kana COLUMN_INDEX item_${DATASET} kana
-column_create item_${DATASET} freq COLUMN_SCALAR Int32
-column_create item_${DATASET} last COLUMN_SCALAR Time
-column_create item_${DATASET} boost COLUMN_SCALAR Int32
-column_create item_${DATASET} freq2 COLUMN_SCALAR Int32
-column_create item_${DATASET} buzz COLUMN_SCALAR Int32
-
-table_create pair_${DATASET} TABLE_HASH_KEY UInt64
-column_create pair_${DATASET} pre COLUMN_SCALAR item_${DATASET}
-column_create pair_${DATASET} post COLUMN_SCALAR item_${DATASET}
-column_create pair_${DATASET} freq0 COLUMN_SCALAR Int32
-column_create pair_${DATASET} freq1 COLUMN_SCALAR Int32
-column_create pair_${DATASET} freq2 COLUMN_SCALAR Int32
-column_create item_${DATASET} co COLUMN_INDEX pair_${DATASET} pre
-
-table_create sequence_${DATASET} TABLE_HASH_KEY ShortText
-table_create event_${DATASET} TABLE_NO_KEY
-column_create sequence_${DATASET} events COLUMN_VECTOR|RING_BUFFER event_${DATASET}
-column_create event_${DATASET} type COLUMN_SCALAR event_type
-column_create event_${DATASET} time COLUMN_SCALAR Time
-column_create event_${DATASET} item COLUMN_SCALAR item_${DATASET}
-column_create event_${DATASET} sequence COLUMN_SCALAR sequence_${DATASET}
-_EOT_
Added: src/suggest/groonga_suggest_create_dataset.c (+160 -0) 100644
===================================================================
--- /dev/null
+++ src/suggest/groonga_suggest_create_dataset.c 2010-12-16 13:04:13 +0000 (4051e7a)
@@ -0,0 +1,160 @@
+/* -*- c-basic-offset: 2 -*- */
+/* Copyright(C) 2010- Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <groonga.h>
+
+static void
+usage(FILE *output, int argc, char **argv)
+{
+ fprintf(output, "Usage: %s DB_PATH DATASET_NAME\n", argv[0]);
+ fprintf(output, " e.g.: %s /tmp/db shops\n", argv[0]);
+}
+
+static void
+output(grn_ctx *ctx)
+{
+ int flags = 0;
+ char *str;
+ unsigned int str_len;
+
+ do {
+ grn_ctx_recv(ctx, &str, &str_len, &flags);
+ if (str_len > 0 || ctx->rc) {
+ if (ctx->rc) {
+ printf("ERROR (%d): %s\n", ctx->rc, ctx->errbuf);
+ }
+ if (str_len > 0) {
+ printf("%.*s\n", str_len, str);
+ }
+ }
+ } while (flags & GRN_CTX_MORE);
+}
+
+static void
+send_command(grn_ctx *ctx, grn_obj *buffer, const char *command,
+ const char *dataset_name)
+{
+ const char *p = command;
+ const char *dataset_place_holder = "${DATASET}";
+ char *dataset_place_holder_position;
+
+ if (ctx->rc != GRN_SUCCESS) {
+ return;
+ }
+
+ GRN_BULK_REWIND(buffer);
+ while ((dataset_place_holder_position = strstr(p, dataset_place_holder))) {
+ GRN_TEXT_PUT(ctx, buffer, p, dataset_place_holder_position - p);
+ GRN_TEXT_PUTS(ctx, buffer, dataset_name);
+ p = dataset_place_holder_position + strlen(dataset_place_holder);
+ }
+ GRN_TEXT_PUTS(ctx, buffer, p);
+ GRN_TEXT_PUTS(ctx, buffer, "\n");
+ printf("> %.*s", (int)GRN_TEXT_LEN(buffer), GRN_TEXT_VALUE(buffer));
+ grn_ctx_send(ctx, GRN_TEXT_VALUE(buffer), GRN_TEXT_LEN(buffer), 0);
+ output(ctx);
+}
+
+
+int
+main(int argc, char **argv)
+{
+ const char *db_path;
+ const char *dataset_name;
+ grn_ctx ctx_, *ctx;
+ grn_obj *db;
+ grn_bool success = GRN_TRUE;
+
+ if (argc != 3) {
+ usage(stderr, argc, argv);
+ return(EXIT_FAILURE);
+ }
+
+ db_path = argv[1];
+ dataset_name = argv[2];
+
+ grn_init();
+
+ ctx = &ctx_;
+ grn_ctx_init(ctx, 0);
+ if (access(db_path, F_OK) == 0) {
+ db = grn_db_open(ctx, db_path);
+ if (!db) {
+ fprintf(stderr, "DB open failed (%s): %s\n", db_path, ctx->errbuf);
+ }
+ } else {
+ db = grn_db_create(ctx, db_path, NULL);
+ if (!db) {
+ fprintf(stderr, "DB create failed (%s): %s\n", db_path, ctx->errbuf);
+ }
+ }
+
+ if (db) {
+ grn_obj text;
+ GRN_TEXT_INIT(&text, 0);
+#define SEND(string) send_command(ctx, &text, string, dataset_name)
+ SEND("register suggest/suggest");
+ SEND("table_create event_type TABLE_HASH_KEY ShortText");
+ SEND("table_create bigram TABLE_PAT_KEY|KEY_NORMALIZE ShortText "
+ "--default_tokenizer TokenBigram");
+ SEND("table_create kana TABLE_PAT_KEY|KEY_NORMALIZE ShortText");
+ SEND("table_create item_${DATASET} TABLE_PAT_KEY|KEY_NORMALIZE "
+ "ShortText --default_tokenizer TokenDelimit");
+ SEND("column_create bigram item_${DATASET}_key "
+ "COLUMN_INDEX|WITH_POSITION item_${DATASET} _key");
+ SEND("column_create item_${DATASET} kana COLUMN_VECTOR kana");
+ SEND("column_create kana item_${DATASET}_kana COLUMN_INDEX "
+ "item_${DATASET} kana");
+ SEND("column_create item_${DATASET} freq COLUMN_SCALAR Int32");
+ SEND("column_create item_${DATASET} last COLUMN_SCALAR Time");
+ SEND("column_create item_${DATASET} boost COLUMN_SCALAR Int32");
+ SEND("column_create item_${DATASET} freq2 COLUMN_SCALAR Int32");
+ SEND("column_create item_${DATASET} buzz COLUMN_SCALAR Int32");
+
+ SEND("table_create pair_${DATASET} TABLE_HASH_KEY UInt64");
+ SEND("column_create pair_${DATASET} pre COLUMN_SCALAR item_${DATASET}");
+ SEND("column_create pair_${DATASET} post COLUMN_SCALAR item_${DATASET}");
+ SEND("column_create pair_${DATASET} freq0 COLUMN_SCALAR Int32");
+ SEND("column_create pair_${DATASET} freq1 COLUMN_SCALAR Int32");
+ SEND("column_create pair_${DATASET} freq2 COLUMN_SCALAR Int32");
+ SEND("column_create item_${DATASET} co COLUMN_INDEX pair_${DATASET} pre");
+
+ SEND("table_create sequence_${DATASET} TABLE_HASH_KEY ShortText");
+ SEND("table_create event_${DATASET} TABLE_NO_KEY");
+ SEND("column_create sequence_${DATASET} events "
+ "COLUMN_VECTOR|RING_BUFFER event_${DATASET}");
+ SEND("column_create event_${DATASET} type COLUMN_SCALAR event_type");
+ SEND("column_create event_${DATASET} time COLUMN_SCALAR Time");
+ SEND("column_create event_${DATASET} item COLUMN_SCALAR item_${DATASET}");
+ SEND("column_create event_${DATASET} sequence COLUMN_SCALAR "
+ "sequence_${DATASET}");
+#undef SEND
+ success = ctx->rc == GRN_SUCCESS;
+ GRN_OBJ_FIN(ctx, &text);
+ GRN_OBJ_FIN(ctx, db);
+ } else {
+ success = GRN_FALSE;
+ }
+ grn_ctx_fin(ctx);
+ grn_fin();
+
+ return success ? EXIT_SUCCESS : EXIT_FAILURE;
+}