[Groonga-commit] groonga/groonga at 6baff61 [master] Support token metadata

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Sep 10 12:09:48 JST 2018


Kouhei Sutou	2018-09-10 12:09:48 +0900 (Mon, 10 Sep 2018)

  Revision: 6baff61a3e593e7853b0b0bf682d399dc120cffe
  https://github.com/groonga/groonga/commit/6baff61a3e593e7853b0b0bf682d399dc120cffe

  Message:
    Support token metadata
    
    New API:
    
      * grn_token_get_metadata()
      * grn_token_metadata_get_size()
      * grn_token_metadata_at()
      * grn_token_metadata_get()
      * grn_token_metadata_add()

  Added files:
    include/groonga/token_metadata.h
    lib/token_metadata.c
  Copied files:
    lib/grn_token_metadata.h
      (from lib/grn_token.h)
  Modified files:
    include/groonga.h
    include/groonga/Makefile.am
    include/groonga/token.h
    lib/c_sources.am
    lib/grn_token.h
    lib/token.c

  Modified: include/groonga.h (+2 -0)
===================================================================
--- include/groonga.h    2018-09-04 17:45:12 +0900 (583104af9)
+++ include/groonga.h    2018-09-10 12:09:48 +0900 (b465d6910)
@@ -1,5 +1,6 @@
 /*
   Copyright(C) 2014-2018 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou �� clear-code.com>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -51,6 +52,7 @@
 #include "groonga/thread.h"
 #include "groonga/time.h"
 #include "groonga/token.h"
+#include "groonga/token_metadata.h"
 #include "groonga/type.h"
 #include "groonga/util.h"
 #include "groonga/window_function.h"

  Modified: include/groonga/Makefile.am (+2 -1)
===================================================================
--- include/groonga/Makefile.am    2018-09-04 17:45:12 +0900 (b9e1dadf3)
+++ include/groonga/Makefile.am    2018-09-10 12:09:48 +0900 (adbecf008)
@@ -37,8 +37,9 @@ groonga_include_HEADERS =			\
 	thread.h				\
 	time.h					\
 	token.h					\
-	tokenizer.h				\
 	token_filter.h				\
+	token_metadata.h			\
+	tokenizer.h				\
 	tokenizer_query_deprecated.h		\
 	type.h					\
 	nfkc.h					\

  Modified: include/groonga/token.h (+5 -0)
===================================================================
--- include/groonga/token.h    2018-09-04 17:45:12 +0900 (fe55458b0)
+++ include/groonga/token.h    2018-09-10 12:09:48 +0900 (25f7345d9)
@@ -1,6 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2014-2018 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou �� clear-code.com>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -161,6 +162,10 @@ grn_token_set_overlap(grn_ctx *ctx,
                       grn_token *token,
                       grn_bool have_overlap);
 
+GRN_API grn_obj *
+grn_token_get_metadata(grn_ctx *ctx,
+                       grn_token *token);
+
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif  /* __cplusplus */

  Added: include/groonga/token_metadata.h (+49 -0) 100644
===================================================================
--- /dev/null
+++ include/groonga/token_metadata.h    2018-09-10 12:09:48 +0900 (75e0a9ad5)
@@ -0,0 +1,49 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2018 Kouhei Sutou <kou �� clear-code.com>
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif  /* __cplusplus */
+
+GRN_API size_t
+grn_token_metadata_get_size(grn_ctx *ctx,
+                            grn_obj *metadata);
+GRN_API grn_rc
+grn_token_metadata_at(grn_ctx *ctx,
+                      grn_obj *metadata,
+                      size_t i,
+                      grn_obj *name,
+                      grn_obj *value);
+GRN_API grn_rc
+grn_token_metadata_get(grn_ctx *ctx,
+                       grn_obj *metadata,
+                       const char *name,
+                       int name_length,
+                       grn_obj *value);
+GRN_API grn_rc
+grn_token_metadata_add(grn_ctx *ctx,
+                       grn_obj *metadata,
+                       const char *name,
+                       int name_length,
+                       grn_obj *value);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif  /* __cplusplus */

  Modified: lib/c_sources.am (+2 -0)
===================================================================
--- lib/c_sources.am    2018-09-04 17:45:12 +0900 (1895fb0a7)
+++ lib/c_sources.am    2018-09-10 12:09:48 +0900 (287f7f8e5)
@@ -107,6 +107,8 @@ libgroonga_c_sources =				\
 	grn_token.h				\
 	token_cursor.c				\
 	grn_token_cursor.h			\
+	token_metadata.c			\
+	grn_token_metadata.h			\
 	tokenizer.c				\
 	grn_tokenizer.h				\
 	tokenizers.c				\

  Modified: lib/grn_token.h (+2 -0)
===================================================================
--- lib/grn_token.h    2018-09-04 17:45:12 +0900 (6de9f4206)
+++ lib/grn_token.h    2018-09-10 12:09:48 +0900 (f93a3c936)
@@ -1,6 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2009-2018 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou �� clear-code.com>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -31,6 +32,7 @@ struct _grn_token {
   uint32_t source_length;
   uint32_t source_first_character_length;
   grn_bool have_overlap;
+  grn_obj metadata;
 };
 
 grn_rc grn_token_init(grn_ctx *ctx, grn_token *token);

  Copied: lib/grn_token_metadata.h (+5 -14) 63%
===================================================================
--- lib/grn_token.h    2018-09-04 17:45:12 +0900 (6de9f4206)
+++ lib/grn_token_metadata.h    2018-09-10 12:09:48 +0900 (8baff5297)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2009-2018 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou �� clear-code.com>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -24,19 +24,10 @@
 extern "C" {
 #endif
 
-struct _grn_token {
-  grn_obj data;
-  grn_token_status status;
-  uint64_t source_offset;
-  uint32_t source_length;
-  uint32_t source_first_character_length;
-  grn_bool have_overlap;
-};
-
-grn_rc grn_token_init(grn_ctx *ctx, grn_token *token);
-grn_rc grn_token_fin(grn_ctx *ctx, grn_token *token);
-grn_rc grn_token_reset(grn_ctx *ctx, grn_token *token);
-grn_rc grn_token_copy(grn_ctx *ctx, grn_token *token, grn_token *source);
+void grn_token_metadata_init(grn_ctx *ctx, grn_obj *metadata);
+void grn_token_metadata_fin(grn_ctx *ctx, grn_obj *metadata);
+void grn_token_metadata_reset(grn_ctx *ctx, grn_obj *metadata);
+void grn_token_metadata_copy(grn_ctx *ctx, grn_obj *metadata, grn_obj *source);
 
 #ifdef __cplusplus
 }

  Modified: lib/token.c (+19 -0)
===================================================================
--- lib/token.c    2018-09-04 17:45:12 +0900 (ae111be3f)
+++ lib/token.c    2018-09-10 12:09:48 +0900 (f98e512c5)
@@ -1,6 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2012-2018 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou �� clear-code.com>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -17,6 +18,7 @@
 */
 
 #include "grn_token.h"
+#include "grn_token_metadata.h"
 
 grn_rc
 grn_token_init(grn_ctx *ctx, grn_token *token)
@@ -28,6 +30,7 @@ grn_token_init(grn_ctx *ctx, grn_token *token)
   token->source_length = 0;
   token->source_first_character_length = 0;
   token->have_overlap = GRN_FALSE;
+  grn_token_metadata_init(ctx, &(token->metadata));
   GRN_API_RETURN(ctx->rc);
 }
 
@@ -35,6 +38,7 @@ grn_rc
 grn_token_fin(grn_ctx *ctx, grn_token *token)
 {
   GRN_API_ENTER;
+  grn_token_metadata_fin(ctx, &(token->metadata));
   GRN_OBJ_FIN(ctx, &(token->data));
   GRN_API_RETURN(ctx->rc);
 }
@@ -229,6 +233,18 @@ exit:
   GRN_API_RETURN(ctx->rc);
 }
 
+grn_obj *
+grn_token_get_metadata(grn_ctx *ctx, grn_token *token)
+{
+  GRN_API_ENTER;
+  if (!token) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[token][data][get][metadata] token must not be NULL");
+    GRN_API_RETURN(NULL);
+  }
+  GRN_API_RETURN(&(token->metadata));
+}
+
 grn_rc
 grn_token_reset(grn_ctx *ctx, grn_token *token)
 {
@@ -243,6 +259,7 @@ grn_token_reset(grn_ctx *ctx, grn_token *token)
   token->source_length = 0;
   token->source_first_character_length = 0;
   token->have_overlap = GRN_FALSE;
+  grn_token_metadata_reset(ctx, &(token->metadata));
 exit:
   GRN_API_RETURN(ctx->rc);
 }
@@ -266,6 +283,8 @@ grn_token_copy(grn_ctx *ctx,
   token->source_length = source->source_length;
   token->source_first_character_length = source->source_first_character_length;
   token->have_overlap = source->have_overlap;
+  grn_token_metadata_reset(ctx, &(token->metadata));
+  grn_token_metadata_copy(ctx, &(token->metadata), &(source->metadata));
 exit:
   GRN_API_RETURN(ctx->rc);
 }

  Added: lib/token_metadata.c (+213 -0) 100644
===================================================================
--- /dev/null
+++ lib/token_metadata.c    2018-09-10 12:09:48 +0900 (7634535cf)
@@ -0,0 +1,213 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2018 Kouhei Sutou <kou �� clear-code.com>
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include "grn_token_metadata.h"
+
+void
+grn_token_metadata_init(grn_ctx *ctx,
+                        grn_obj *metadata)
+{
+  GRN_TEXT_INIT(metadata, GRN_OBJ_VECTOR);
+}
+
+void
+grn_token_metadata_fin(grn_ctx *ctx,
+                       grn_obj *metadata)
+{
+  GRN_OBJ_FIN(ctx, metadata);
+}
+
+void
+grn_token_metadata_reset(grn_ctx *ctx,
+                         grn_obj *metadata)
+{
+  GRN_BULK_REWIND(metadata);
+}
+
+void
+grn_token_metadata_copy(grn_ctx *ctx,
+                        grn_obj *metadata,
+                        grn_obj *source)
+{
+  int i;
+  int n;
+
+  n = grn_vector_size(ctx, source);
+  for (i = 0; i < n; i++) {
+    const char *value;
+    unsigned int value_length;
+    int domain;
+    value_length = grn_vector_get_element(ctx, source, i, &value, NULL, &domain);
+    grn_vector_add_element(ctx,
+                           metadata,
+                           value,
+                           value_length,
+                           0,
+                           domain);
+  }
+}
+
+size_t
+grn_token_metadata_get_size(grn_ctx *ctx,
+                            grn_obj *metadata)
+{
+  size_t size;
+  GRN_API_ENTER;
+  if (!metadata) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[token][metadata][get][size] token metadata must not be NULL");
+    GRN_API_RETURN(0);
+  }
+  size = grn_vector_size(ctx, metadata) / 2;
+  GRN_API_RETURN(size);
+}
+
+grn_rc
+grn_token_metadata_at(grn_ctx *ctx,
+                      grn_obj *metadata,
+                      size_t i,
+                      grn_obj *name,
+                      grn_obj *value)
+{
+  size_t n;
+  const char *raw_name;
+  unsigned int raw_name_length;
+  grn_id name_domain;
+  const char *raw_value;
+  unsigned int raw_value_length;
+  grn_id value_domain;
+
+  GRN_API_ENTER;
+  if (!metadata) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[token][metadata][at] token metadata must not be NULL");
+    GRN_API_RETURN(ctx->rc);
+  }
+
+  n = grn_vector_size(ctx, metadata) / 2;
+  if (i >= n) {
+    GRN_BULK_REWIND(name);
+    GRN_BULK_REWIND(value);
+    GRN_API_RETURN(GRN_SUCCESS);
+  }
+
+  raw_name_length = grn_vector_get_element(ctx,
+                                           metadata,
+                                           i * 2,
+                                           &raw_name,
+                                           NULL,
+                                           &name_domain);
+  grn_obj_reinit(ctx, name, name_domain, 0);
+  grn_bulk_write(ctx, name, raw_name, raw_name_length);
+
+  raw_value_length = grn_vector_get_element(ctx,
+                                            metadata,
+                                            i * 2 + 1,
+                                            &raw_value,
+                                            NULL,
+                                            &value_domain);
+  grn_obj_reinit(ctx, value, value_domain, 0);
+  grn_bulk_write(ctx, value, raw_value, raw_value_length);
+
+  GRN_API_RETURN(GRN_SUCCESS);
+}
+
+grn_rc
+grn_token_metadata_get(grn_ctx *ctx,
+                       grn_obj *metadata,
+                       const char *name,
+                       int name_length,
+                       grn_obj *value)
+{
+  size_t i;
+  size_t n;
+
+  GRN_API_ENTER;
+  if (!metadata) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[token][metadata][get] token metadata must not be NULL");
+    GRN_API_RETURN(ctx->rc);
+  }
+
+  if (name_length < 0) {
+    name_length = strlen(name);
+  }
+
+  n = grn_vector_size(ctx, metadata) / 2;
+  for (i = 0; i < n; i++) {
+    const char *current_name;
+    unsigned int current_name_length;
+
+    current_name_length = grn_vector_get_element(ctx,
+                                                 metadata,
+                                                 i * 2,
+                                                 &current_name,
+                                                 NULL,
+                                                 NULL);
+    if (name_length == current_name_length &&
+        memcmp(name, current_name, name_length) == 0) {
+      const char *raw_value;
+      unsigned int raw_value_length;
+      grn_id domain;
+
+      raw_value_length = grn_vector_get_element(ctx,
+                                                metadata,
+                                                i * 2 + 1,
+                                                &raw_value,
+                                                NULL,
+                                                &domain);
+      grn_obj_reinit(ctx, value, domain, 0);
+      grn_bulk_write(ctx, value, raw_value, raw_value_length);
+      GRN_API_RETURN(GRN_SUCCESS);
+    }
+  }
+  GRN_BULK_REWIND(value);
+
+  GRN_API_RETURN(GRN_SUCCESS);
+}
+
+grn_rc
+grn_token_metadata_add(grn_ctx *ctx,
+                       grn_obj *metadata,
+                       const char *name,
+                       int name_length,
+                       grn_obj *value)
+{
+  GRN_API_ENTER;
+  if (!metadata) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[token][metadata][add] token metadata must not be NULL");
+    GRN_API_RETURN(ctx->rc);
+  }
+
+  if (name_length < 0) {
+    name_length = strlen(name);
+  }
+
+  grn_vector_add_element(ctx, metadata, name, name_length, 0, GRN_DB_TEXT);
+  if (ctx->rc == GRN_SUCCESS) {
+    grn_vector_add_element(ctx,
+                           metadata,
+                           GRN_BULK_HEAD(value),
+                           GRN_BULK_VSIZE(value),
+                           0,
+                           value->header.domain);
+  }
+
+  GRN_API_RETURN(ctx->rc);
+}
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180910/56aafa6c/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index