[Groonga-commit] groonga/groonga at a947f80 [master] mruby: implement scan_info_build() in mruby

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Aug 6 20:24:49 JST 2014


Kouhei Sutou	2014-08-06 20:24:49 +0900 (Wed, 06 Aug 2014)

  New Revision: a947f80ad22594f76401a9b95e9882d19dcfebe7
  https://github.com/groonga/groonga/commit/a947f80ad22594f76401a9b95e9882d19dcfebe7

  Message:
    mruby: implement scan_info_build() in mruby
    
    Yay!

  Added files:
    lib/mrb/scripts/scan_info_builder.rb
  Copied files:
    lib/mrb/scripts/scan_info_data.rb
      (from lib/mrb/scripts/scan_info.rb)
  Modified files:
    lib/mrb/mrb_bulk.c
    lib/mrb/mrb_expr.c
    lib/mrb/scripts/expression.rb
    lib/mrb/scripts/scan_info.rb
    lib/mrb/scripts/sources.am

  Modified: lib/mrb/mrb_bulk.c (+16 -0)
===================================================================
--- lib/mrb/mrb_bulk.c    2014-08-05 12:59:05 +0900 (aa680b0)
+++ lib/mrb/mrb_bulk.c    2014-08-06 20:24:49 +0900 (746ba78)
@@ -79,6 +79,20 @@ mrb_grn_bulk_get_value(mrb_state *mrb, mrb_value self)
   return mrb_value_;
 }
 
+static mrb_value
+mrb_grn_bulk_equal(mrb_state *mrb, mrb_value self)
+{
+  mrb_value mrb_other;
+
+  mrb_get_args(mrb, "o", &mrb_other);
+
+  if (!mrb_obj_is_kind_of(mrb, mrb_other, mrb_class(mrb, self))) {
+    return mrb_false_value();
+  }
+
+  return mrb_bool_value(DATA_PTR(self) == DATA_PTR(mrb_other));
+}
+
 void
 grn_mrb_bulk_init(grn_ctx *ctx)
 {
@@ -95,5 +109,7 @@ grn_mrb_bulk_init(grn_ctx *ctx)
                     mrb_grn_bulk_get_domain, MRB_ARGS_NONE());
   mrb_define_method(mrb, klass, "value",
                     mrb_grn_bulk_get_value, MRB_ARGS_NONE());
+  mrb_define_method(mrb, klass, "==",
+                    mrb_grn_bulk_equal, MRB_ARGS_REQ(1));
 }
 #endif

  Modified: lib/mrb/mrb_expr.c (+35 -261)
===================================================================
--- lib/mrb/mrb_expr.c    2014-08-05 12:59:05 +0900 (362e834)
+++ lib/mrb/mrb_expr.c    2014-08-06 20:24:49 +0900 (286047d)
@@ -71,257 +71,6 @@ mrb_grn_expr_code_new(mrb_state *mrb, grn_expr_code *code)
   return mrb_obj_new(mrb, klass, 1, &mrb_code);
 }
 
-static scan_info **
-scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
-                grn_operator op, uint32_t size)
-{
-  grn_obj *var;
-  scan_stat stat;
-  int i, m = 0, o = 0;
-  scan_info **sis, *si = NULL;
-  grn_expr_code *c, *ce;
-  grn_expr *e = (grn_expr *)expr;
-  mrb_state *mrb = ctx->impl->mrb.state;
-  mrb_value mrb_expr;
-  mrb_value mrb_var;
-  mrb_value mrb_si;
-
-  mrb_expr = grn_mrb_value_from_grn_obj(mrb, expr);
-  mrb_var = mrb_funcall(mrb, mrb_expr,
-                        "get_var_by_offset", 1, mrb_fixnum_value(0));
-  if (mrb_nil_p(mrb_var)) {
-    return NULL;
-  }
-
-  var = mrb_cptr(mrb_var);
-  for (stat = SCAN_START, c = e->codes, ce = &e->codes[e->codes_curr]; c < ce; c++) {
-    switch (c->op) {
-    case GRN_OP_MATCH :
-    case GRN_OP_NEAR :
-    case GRN_OP_NEAR2 :
-    case GRN_OP_SIMILAR :
-    case GRN_OP_PREFIX :
-    case GRN_OP_SUFFIX :
-    case GRN_OP_EQUAL :
-    case GRN_OP_NOT_EQUAL :
-    case GRN_OP_LESS :
-    case GRN_OP_GREATER :
-    case GRN_OP_LESS_EQUAL :
-    case GRN_OP_GREATER_EQUAL :
-    case GRN_OP_GEO_WITHINP5 :
-    case GRN_OP_GEO_WITHINP6 :
-    case GRN_OP_GEO_WITHINP8 :
-    case GRN_OP_TERM_EXTRACT :
-      if (stat < SCAN_COL1 || SCAN_CONST < stat) { return NULL; }
-      stat = SCAN_START;
-      m++;
-      break;
-    case GRN_OP_AND :
-    case GRN_OP_OR :
-    case GRN_OP_AND_NOT :
-    case GRN_OP_ADJUST :
-      if (stat != SCAN_START) { return NULL; }
-      o++;
-      if (o >= m) { return NULL; }
-      break;
-    case GRN_OP_PUSH :
-      stat = (c->value == var) ? SCAN_VAR : SCAN_CONST;
-      break;
-    case GRN_OP_GET_VALUE :
-      switch (stat) {
-      case SCAN_START :
-      case SCAN_CONST :
-      case SCAN_VAR :
-        stat = SCAN_COL1;
-        break;
-      case SCAN_COL1 :
-        stat = SCAN_COL2;
-        break;
-      case SCAN_COL2 :
-        break;
-      default :
-        return NULL;
-        break;
-      }
-      break;
-    case GRN_OP_CALL :
-      if ((c->flags & GRN_EXPR_CODE_RELATIONAL_EXPRESSION) || c + 1 == ce) {
-        stat = SCAN_START;
-        m++;
-      } else {
-        stat = SCAN_COL2;
-      }
-      break;
-    default :
-      return NULL;
-      break;
-    }
-  }
-  if (stat || m != o + 1) { return NULL; }
-  if (!(sis = GRN_MALLOCN(scan_info *, m + m + o))) { return NULL; }
-  for (i = 0, stat = SCAN_START, c = e->codes, ce = &e->codes[e->codes_curr]; c < ce; c++) {
-    switch (c->op) {
-    case GRN_OP_MATCH :
-    case GRN_OP_NEAR :
-    case GRN_OP_NEAR2 :
-    case GRN_OP_SIMILAR :
-    case GRN_OP_PREFIX :
-    case GRN_OP_SUFFIX :
-    case GRN_OP_EQUAL :
-    case GRN_OP_NOT_EQUAL :
-    case GRN_OP_LESS :
-    case GRN_OP_GREATER :
-    case GRN_OP_LESS_EQUAL :
-    case GRN_OP_GREATER_EQUAL :
-    case GRN_OP_GEO_WITHINP5 :
-    case GRN_OP_GEO_WITHINP6 :
-    case GRN_OP_GEO_WITHINP8 :
-    case GRN_OP_TERM_EXTRACT :
-      stat = SCAN_START;
-      mrb_si = mrb_grn_scan_info_new(mrb, si);
-      mrb_funcall(mrb, mrb_si, "op=", 1, mrb_fixnum_value(c->op));
-      mrb_funcall(mrb, mrb_si, "end=", 1, mrb_fixnum_value(c - e->codes));
-      sis[i++] = si;
-      mrb_funcall(mrb, mrb_si, "match_resolve_index", 0);
-      si = NULL;
-      break;
-    case GRN_OP_AND :
-    case GRN_OP_OR :
-    case GRN_OP_AND_NOT :
-    case GRN_OP_ADJUST :
-      if (!grn_scan_info_put_logical_op(ctx, sis, &i, c->op, c - e->codes)) { return NULL; }
-      stat = SCAN_START;
-      break;
-    case GRN_OP_PUSH :
-      if (!si) {
-        si = grn_scan_info_open(ctx, c - e->codes);
-        if (!si) {
-          int j;
-          for (j = 0; j < i; j++) { grn_scan_info_close(ctx, sis[j]); }
-          GRN_FREE(sis);
-          return NULL;
-        }
-      }
-      if (c->value == var) {
-        stat = SCAN_VAR;
-      } else {
-        mrb_si = mrb_grn_scan_info_new(mrb, si);
-        mrb_funcall(mrb, mrb_si, "push_arg",
-                    1, grn_mrb_value_from_grn_obj(mrb, c->value));
-        if (stat == SCAN_START) { grn_scan_info_set_flags(si, grn_scan_info_get_flags(si) | SCAN_PRE_CONST); }
-        stat = SCAN_CONST;
-      }
-      break;
-    case GRN_OP_GET_VALUE :
-      switch (stat) {
-      case SCAN_START :
-        if (!si) {
-          si = grn_scan_info_open(ctx, c - e->codes);
-          if (!si) {
-            int j;
-            for (j = 0; j < i; j++) { grn_scan_info_close(ctx, sis[j]); }
-            GRN_FREE(sis);
-            return NULL;
-          }
-        }
-        // fallthru
-      case SCAN_CONST :
-      case SCAN_VAR :
-        stat = SCAN_COL1;
-        mrb_si = mrb_grn_scan_info_new(mrb, si);
-        mrb_funcall(mrb, mrb_si, "push_arg",
-                    1, grn_mrb_value_from_grn_obj(mrb, c->value));
-        break;
-      case SCAN_COL1 :
-        {
-          int j;
-          grn_obj inspected;
-          GRN_TEXT_INIT(&inspected, 0);
-          GRN_TEXT_PUTS(ctx, &inspected, "<");
-          grn_inspect_name(ctx, &inspected, c->value);
-          GRN_TEXT_PUTS(ctx, &inspected, ">: <");
-          grn_inspect(ctx, &inspected, expr);
-          GRN_TEXT_PUTS(ctx, &inspected, ">");
-          ERR(GRN_INVALID_ARGUMENT,
-              "invalid expression: can't use column as a value: %.*s",
-              (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected));
-          GRN_OBJ_FIN(ctx, &inspected);
-          for (j = 0; j < i; j++) { grn_scan_info_close(ctx, sis[j]); }
-          GRN_FREE(sis);
-          return NULL;
-        }
-        stat = SCAN_COL2;
-        break;
-      case SCAN_COL2 :
-        break;
-      default :
-        break;
-      }
-      break;
-    case GRN_OP_CALL :
-      if (!si) {
-        si = grn_scan_info_open(ctx, c - e->codes);
-        if (!si) {
-          int j;
-          for (j = 0; j < i; j++) { grn_scan_info_close(ctx, sis[j]); }
-          GRN_FREE(sis);
-          return NULL;
-        }
-      }
-      if ((c->flags & GRN_EXPR_CODE_RELATIONAL_EXPRESSION) || c + 1 == ce) {
-        stat = SCAN_START;
-        mrb_si = mrb_grn_scan_info_new(mrb, si);
-        mrb_funcall(mrb, mrb_si, "op=", 1, mrb_fixnum_value(c->op));
-        mrb_funcall(mrb, mrb_si, "end=", 1, mrb_fixnum_value(c - e->codes));
-        sis[i++] = si;
-        mrb_funcall(mrb, mrb_si, "call_relational_resolve_indexes", 0);
-        si = NULL;
-      } else {
-        stat = SCAN_COL2;
-      }
-      break;
-    default :
-      break;
-    }
-  }
-  if (op == GRN_OP_OR && !size) {
-    // for debug
-    if (!(grn_scan_info_get_flags(sis[0]) & SCAN_PUSH) || (grn_scan_info_get_logical_op(sis[0]) != op)) {
-      int j;
-      ERR(GRN_INVALID_ARGUMENT, "invalid expr");
-      for (j = 0; j < i; j++) { grn_scan_info_close(ctx, sis[j]); }
-      GRN_FREE(sis);
-      return NULL;
-    } else {
-      grn_scan_info_set_flags(sis[0], grn_scan_info_get_flags(sis[0]) & ~SCAN_PUSH);
-      grn_scan_info_set_logical_op(sis[0], op);
-    }
-  } else {
-    if (!grn_scan_info_put_logical_op(ctx, sis, &i, op, c - e->codes)) { return NULL; }
-  }
-  *n = i;
-  return sis;
-}
-
-static mrb_value
-mrb_grn_expr_build(mrb_state *mrb, mrb_value self)
-{
-  int *n;
-  uint32_t size;
-  scan_info **sis;
-  grn_operator op;
-  grn_obj *expr;
-  grn_ctx *ctx = (grn_ctx *)mrb->ud;
-  mrb_value mrb_expr, mrb_n;
-
-  mrb_get_args(mrb, "ooii", &mrb_expr, &mrb_n, &op, &size);
-  expr = mrb_cptr(mrb_expr);
-  n = mrb_cptr(mrb_n);
-
-  sis = scan_info_build(ctx, expr, n, op, size);
-  return mrb_cptr_value(mrb, sis);
-}
-
 static mrb_value
 mrb_grn_scan_info_initialize(mrb_state *mrb, mrb_value self)
 {
@@ -404,7 +153,11 @@ mrb_grn_scan_info_set_query(mrb_state *mrb, mrb_value self)
 
   mrb_get_args(mrb, "o", &mrb_query);
   si = DATA_PTR(self);
-  grn_scan_info_set_query(si, DATA_PTR(mrb_query));
+  if (mrb_nil_p(mrb_query)) {
+    grn_scan_info_set_query(si, NULL);
+  } else {
+    grn_scan_info_set_query(si, DATA_PTR(mrb_query));
+  }
   return self;
 }
 
@@ -571,9 +324,6 @@ grn_mrb_expr_init(grn_ctx *ctx)
   struct RClass *object_class = ctx->impl->mrb.object_class;
   struct RClass *klass;
 
-  mrb_define_class_method(mrb, module,
-                          "build", mrb_grn_expr_build, MRB_ARGS_REQ(4));
-
   klass = mrb_define_class_under(mrb, module, "ScanInfo", mrb->object_class);
   MRB_SET_INSTANCE_TT(klass, MRB_TT_DATA);
   mrb_define_method(mrb, klass, "initialize",
@@ -626,22 +376,46 @@ grn_mrb_expr_init(grn_ctx *ctx)
 
   grn_mrb_load(ctx, "expression.rb");
   grn_mrb_load(ctx, "scan_info.rb");
+  grn_mrb_load(ctx, "scan_info_data.rb");
+  grn_mrb_load(ctx, "scan_info_builder.rb");
 }
 
 scan_info **
 grn_mrb_scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
                         grn_operator op, uint32_t size)
 {
-  scan_info **sis;
-  mrb_state *mrb = ctx->impl->mrb.state;
+  grn_mrb_data *data = &(ctx->impl->mrb);
+  mrb_state *mrb = data->state;
+  mrb_value mrb_expression;
   mrb_value mrb_sis;
+  scan_info **sis;
+  int i;
 
-  mrb_sis = mrb_funcall(mrb, mrb_obj_value(ctx->impl->mrb.module), "build", 4,
-                        mrb_cptr_value(mrb, expr),
-                        mrb_cptr_value(mrb, n),
+  mrb_expression = grn_mrb_value_from_grn_obj(mrb, expr);
+  mrb_sis = mrb_funcall(mrb, mrb_expression, "build_scan_info", 2,
                         mrb_fixnum_value(op),
                         mrb_fixnum_value(size));
-  sis = mrb_cptr(mrb_sis);
+
+  if (mrb_nil_p(mrb_sis)) {
+    return NULL;
+  }
+
+  *n = RARRAY_LEN(mrb_sis);
+  sis = GRN_MALLOCN(scan_info *, *n);
+  for (i = 0; i < *n; i++) {
+    mrb_value mrb_si;
+    mrb_value mrb_si_data;
+    scan_info *si;
+    int start;
+
+    mrb_si_data = RARRAY_PTR(mrb_sis)[i];
+    start = mrb_fixnum(mrb_funcall(mrb, mrb_si_data, "start", 0));
+    si = grn_scan_info_open(ctx, start);
+    mrb_si = mrb_grn_scan_info_new(mrb, si);
+    mrb_funcall(mrb, mrb_si, "apply", 1, mrb_si_data);
+    sis[i] = si;
+  }
+
   return sis;
 }
 #endif

  Modified: lib/mrb/scripts/expression.rb (+13 -0)
===================================================================
--- lib/mrb/scripts/expression.rb    2014-08-05 12:59:05 +0900 (1d5d57e)
+++ lib/mrb/scripts/expression.rb    2014-08-06 20:24:49 +0900 (dbeac49)
@@ -1,4 +1,17 @@
 module Groonga
   class Expression
+    def build_scan_info(op, size)
+      begin
+        builder = ScanInfoBuilder.new(self, op, size)
+        builder.build
+      rescue => error
+        backtrace = error.backtrace
+        puts "#{error.class}: #{error.message}"
+        backtrace.each do |entry|
+          puts entry
+        end
+        nil
+      end
+    end
   end
 end

  Modified: lib/mrb/scripts/scan_info.rb (+10 -119)
===================================================================
--- lib/mrb/scripts/scan_info.rb    2014-08-05 12:59:05 +0900 (a1c6a8c)
+++ lib/mrb/scripts/scan_info.rb    2014-08-06 20:24:49 +0900 (80c9267)
@@ -7,127 +7,18 @@ module Groonga
       PRE_CONST = 0x08
     end
 
-    def each_arg
-      i = 0
-      loop do
-        arg = get_arg(i)
-        break if arg.nil?
-        yield(arg)
-        i += 1
+    def apply(data)
+      self.op = data.op
+      self.logical_op = data.logical_op
+      self.end = data.end
+      self.query = data.query
+      self.flags = data.flags
+      data.args.each do |arg|
+        push_arg(arg)
       end
-      nil
-    end
-
-    def match_resolve_index
-      each_arg do |arg|
-        case arg
-        when Expression
-          match_resolve_index_expression(arg)
-        when Accessor
-          match_resolve_index_accessor(arg)
-        when Object
-          match_resolve_index_db_obj(arg)
-        else
-          self.query = arg
-        end
+      data.indexes.each do |index, section_id, weight|
+        put_index(index, section_id, weight)
       end
     end
-
-    def call_relational_resolve_indexes
-      # better index resolving framework for functions should be implemented
-      each_arg do |arg|
-        call_relational_resolve_index(arg)
-      end
-    end
-
-    private
-    def match_resolve_index_expression(expression)
-      codes = expression.codes
-      n_codes = codes.size
-      i = 0
-      while i < n_codes
-        code = codes[i]
-        value = code.value
-        case value
-        when Groonga::Accessor
-          match_resolve_index_expression_accessor(code)
-        when Groonga::FixedSizeColumn, Groonga::VariableSizeColumn
-          match_resolve_index_expression_data_column(code)
-        when Groonga::IndexColumn
-          section_id = 0
-          rest_n_codes = n_codes - i
-          if rest_n_codes >= 2 and
-              codes[i + 1].value.is_a?(Groonga::Bulk) and
-              codes[i + 1].value.domain == Groonga::ID::UINT32 and
-              codes[i + 2].op == Groonga::Operator::GET_MEMBER
-            section_id = codes[i + 1].value.value + 1
-            code = codes[i + 2]
-            i += 2
-          end
-          put_index(value, section_id, code.weight)
-        end
-        i += 1
-      end
-    end
-
-    def match_resolve_index_expression_accessor(expr_code)
-      accessor = expr_code.value
-      self.flags |= Flags::ACCESSOR
-      index_info = accessor.find_index(op)
-      return if index_info.nil?
-      if accessor.next
-        put_index(accessor, index_info.section_id, expr_code.weight)
-      else
-        put_index(index_info.index, index_info.section_id, expr_code.weight)
-      end
-    end
-
-    def match_resolve_index_expression_data_column(expr_code)
-      column = expr_code.value
-      index_info = column.find_index(op)
-      return if index_info.nil?
-      put_index(index_info.index, index_info.section_id, expr_code.weight)
-    end
-
-    def match_resolve_index_db_obj(db_obj)
-      index_info = db_obj.find_index(op)
-      return if index_info.nil?
-      put_index(index_info.index, index_info.section_id, 1)
-    end
-
-    def match_resolve_index_accessor(accessor)
-      self.flags |= Flags::ACCESSOR
-      index_info = accessor.find_index(op)
-      return if index_info.nil?
-      if accessor.next
-        put_index(accessor, index_info.section_id, 1)
-      else
-        put_index(index_info.index, index_info.section_id, 1)
-      end
-    end
-
-    def call_relational_resolve_index(object)
-      case object
-      when Accessor
-        call_relational_resolve_index_accessor(object)
-      when Bulk
-        self.query = object
-      else
-        call_relational_resolve_index_db_obj(object)
-      end
-    end
-
-    def call_relational_resolve_index_db_obj(db_obj)
-      index_info = db_obj.find_index(op)
-      return if index_info.nil?
-      put_index(index_info.index, index_info.section_id, 1)
-    end
-
-    def call_relational_resolve_index_accessor(accessor)
-      self.flags |= Flags::ACCESSOR
-      index_info = accessor.find_index(op)
-      return if index_info.nil?
-      put_index(index_info.index, index_info.section_id, 1)
-    end
   end
 end

  Added: lib/mrb/scripts/scan_info_builder.rb (+243 -0) 100644
===================================================================
--- /dev/null
+++ lib/mrb/scripts/scan_info_builder.rb    2014-08-06 20:24:49 +0900 (d110a7e)
@@ -0,0 +1,243 @@
+module Groonga
+  # TODO: Move me
+  class ExpressionCode
+    module Flags
+      RELATIONAL_EXPRESSION = 0x01
+    end
+  end
+
+  class ScanInfoBuilder
+    module Status
+      START = 0
+      VAR = 1
+      COL1 = 2
+      COL2 = 3
+      CONST = 4
+    end
+
+    def initialize(expression, operator, size)
+      @data_list = []
+      @expression = expression
+      @operator = operator
+      @size = size
+    end
+
+    RELATION_OPERATORS = [
+      Operator::MATCH,
+      Operator::NEAR,
+      Operator::NEAR2,
+      Operator::SIMILAR,
+      Operator::PREFIX,
+      Operator::SUFFIX,
+      Operator::EQUAL,
+      Operator::NOT_EQUAL,
+      Operator::LESS,
+      Operator::GREATER,
+      Operator::LESS_EQUAL,
+      Operator::GREATER_EQUAL,
+      Operator::GEO_WITHINP5,
+      Operator::GEO_WITHINP6,
+      Operator::GEO_WITHINP8,
+      Operator::TERM_EXTRACT,
+    ]
+
+    LOGICAL_OPERATORS = [
+      Operator::AND,
+      Operator::OR,
+      Operator::AND_NOT,
+      Operator::ADJUST,
+    ]
+    def build
+      return nil unless valid?
+
+      status = Status::START
+      variable =****@expre*****_var_by_offset(0)
+      data = nil
+      codes =****@expre*****
+      n_codes = codes.size
+      codes.each_with_index do |code, i|
+        case code.op
+        when *RELATION_OPERATORS
+          status = Status::START
+          data.op = code.op
+          data.end = i
+          data.match_resolve_index
+          @data_list << data
+          data = nil
+        when *LOGICAL_OPERATORS
+          put_logical_op(code.op, i)
+          # TODO: rescue and return nil
+          status = Status::START
+        when Operator::PUSH
+          data ||= ScanInfoData.new(i)
+          if code.value == variable
+            status = Status::VAR
+          else
+            data.args << code.value
+            if status == Status::START
+              data.flags |= ScanInfo::Flags::PRE_CONST
+            end
+            status = Status::CONST
+          end
+        when Operator::GET_VALUE
+          case status
+          when Status::START
+            data ||= ScanInfoData.new(i)
+            status = Status::COL1
+            data.args << code.value
+          when Status::CONST, Status::VAR
+            status = Status::COL1
+            data.args << code.value
+          when Status::COL1
+            raise "invalid expression: can't use column as a value: #{code.value.inspect}"
+            status = Status::COL2
+          when Status::COL2
+            # Do nothing
+          end
+        when Operator::CALL
+          data ||= ScanInfoData.new(i)
+          if (code.flags & ExpressionCode::Flags::RELATIONAL_EXPRESSION) != 0 or
+              (i + 1) == n_codes
+            status = Status::START
+            data.op = code.op
+            data.end = i
+            data.call_relational_resolve_indexes
+            @data_list << data
+            data = nil
+          else
+            status = Status::COL2
+          end
+        end
+      end
+
+      if @operator == Operator::OR and @size == 0
+        first_data = @data_list.first
+        if (first_data.flags & ScanInfo::Flags::PUSH) == 0 or
+            first_data.logical_op != @operator
+          raise "invalid expr"
+        else
+          first_data.flags &= ~ScanInfo::Flags::PUSH
+          first_data.logical_op = @operator
+        end
+      else
+        put_logical_op(@operator, n_codes)
+      end
+
+      @data_list
+    end
+
+    private
+    def valid?
+      n_relation_expressions = 0
+      n_logical_expressions = 0
+      status = Status::START
+      variable =****@expre*****_var_by_offset(0)
+      codes =****@expre*****
+      codes.each do |code|
+        case code.op
+        when *RELATION_OPERATORS
+          return false if status < Status::COL1
+          return false if status > Status::CONST
+          status = Status::START
+          n_relation_expressions += 1
+        when *LOGICAL_OPERATORS
+          return false if status != Status::START
+          n_logical_expressions += 1
+          return false if n_logical_expressions >= n_relation_expressions
+        when Operator::PUSH
+          if code.value == variable
+            status = Status::VAR
+          else
+            status = Status::CONST
+          end
+        when Operator::GET_VALUE
+          case status
+          when Status::START, Status::CONST, Status::VAR
+            status = Status::COL1
+          when Status::COL1
+            status = Status::COL2
+          when Status::COL2
+            # Do nothing
+          else
+            return false
+          end
+        when Operator::CALL
+          if (code.flags & ExpressionCode::Flags::RELATIONAL_EXPRESSION) != 0 or
+              code == codes.last
+            status = Status::START
+            n_relation_expressions += 1
+          else
+            status = Status::COL2
+          end
+        else
+          return false
+        end
+      end
+
+      return false if status != Status::START
+      return false if n_relation_expressions != (n_logical_expressions + 1)
+
+      true
+    end
+
+    def put_logical_op(operator, start)
+      n_parens = 1
+      n_dif_ops = 0
+      r = 0
+      j = @data_list.size
+      while j > 0
+        j -= 1
+        data = @data_list[j]
+        if (data.flags & ScanInfo::Flags::POP) != 0
+          n_dif_ops += 1
+          n_parens += 1
+        else
+          if (data.flags & ScanInfo::Flags::PUSH) != 0
+            n_parens -= 1
+            if n_parens == 0
+              if r == 0
+                if n_dif_ops > 0
+                  if j > 0 and operator != Operator::AND_NOT
+                    n_parens = 1
+                    n_dif_ops = 0
+                    r = j
+                  else
+                    new_data = ScanInfoData.new(start)
+                    new_data.flags = ScanInfo::Flags::POP
+                    new_data.logical_op = operator
+                    @data_list << new_data
+                  end
+                else
+                  data.flags &= ~ScanInfo::Flags::PUSH
+                  data.logical_op = operator
+                end
+              else
+                if n_dif_ops > 0
+                  new_data = ScanInfoData.new(start)
+                  new_data.flags = ScanInfo::Flags::POP
+                  new_data.logical_op = operator
+                  @data_list << new_data
+                else
+                  data.flags &= ~ScanInfo::Flags::PUSH
+                  data.logical_op = operator
+                  @data_list =
+                    @data_list[0...j] +
+                    @data_list[r..-1] +
+                    @data_list[j...r]
+                end
+              end
+            end
+          else
+            if operator == Operator::AND_NOT or operator != data.logical_op
+              n_dif_ops += 1
+            end
+          end
+        end
+
+        if j < 0
+          raise GRN_INVALID_ARGUMENT.new("unmatched nesting level")
+        end
+      end
+    end
+  end
+end

  Copied: lib/mrb/scripts/scan_info_data.rb (+33 -28) 75%
===================================================================
--- lib/mrb/scripts/scan_info.rb    2014-08-05 12:59:05 +0900 (a1c6a8c)
+++ lib/mrb/scripts/scan_info_data.rb    2014-08-06 20:24:49 +0900 (119824e)
@@ -1,25 +1,26 @@
 module Groonga
-  class ScanInfo
-    module Flags
-      ACCESSOR  = 0x01
-      PUSH      = 0x02
-      POP       = 0x04
-      PRE_CONST = 0x08
-    end
-
-    def each_arg
-      i = 0
-      loop do
-        arg = get_arg(i)
-        break if arg.nil?
-        yield(arg)
-        i += 1
-      end
-      nil
+  class ScanInfoData
+    attr_accessor :start
+    attr_accessor :end
+    attr_accessor :op
+    attr_accessor :logical_op
+    attr_accessor :query
+    attr_accessor :args
+    attr_accessor :indexes
+    attr_accessor :flags
+    def initialize(start)
+      @start = start
+      @end = 0
+      @op = 0
+      @logical_op = Operator::OR
+      @query = nil
+      @args = []
+      @indexes = []
+      @flags = ScanInfo::Flags::PUSH
     end
 
     def match_resolve_index
-      each_arg do |arg|
+      @args.each do |arg|
         case arg
         when Expression
           match_resolve_index_expression(arg)
@@ -35,7 +36,7 @@ module Groonga
 
     def call_relational_resolve_indexes
       # better index resolving framework for functions should be implemented
-      each_arg do |arg|
+      @args.each do |arg|
         call_relational_resolve_index(arg)
       end
     end
@@ -49,17 +50,17 @@ module Groonga
         code = codes[i]
         value = code.value
         case value
-        when Groonga::Accessor
+        when Accessor
           match_resolve_index_expression_accessor(code)
-        when Groonga::FixedSizeColumn, Groonga::VariableSizeColumn
+        when FixedSizeColumn, VariableSizeColumn
           match_resolve_index_expression_data_column(code)
-        when Groonga::IndexColumn
+        when IndexColumn
           section_id = 0
           rest_n_codes = n_codes - i
           if rest_n_codes >= 2 and
-              codes[i + 1].value.is_a?(Groonga::Bulk) and
-              codes[i + 1].value.domain == Groonga::ID::UINT32 and
-              codes[i + 2].op == Groonga::Operator::GET_MEMBER
+              codes[i + 1].value.is_a?(Bulk) and
+              codes[i + 1].value.domain == ID::UINT32 and
+              codes[i + 2].op == Operator::GET_MEMBER
             section_id = codes[i + 1].value.value + 1
             code = codes[i + 2]
             i += 2
@@ -72,7 +73,7 @@ module Groonga
 
     def match_resolve_index_expression_accessor(expr_code)
       accessor = expr_code.value
-      self.flags |= Flags::ACCESSOR
+      self.flags |= ScanInfo::Flags::ACCESSOR
       index_info = accessor.find_index(op)
       return if index_info.nil?
       if accessor.next
@@ -96,7 +97,7 @@ module Groonga
     end
 
     def match_resolve_index_accessor(accessor)
-      self.flags |= Flags::ACCESSOR
+      self.flags |= ScanInfo::Flags::ACCESSOR
       index_info = accessor.find_index(op)
       return if index_info.nil?
       if accessor.next
@@ -124,10 +125,14 @@ module Groonga
     end
 
     def call_relational_resolve_index_accessor(accessor)
-      self.flags |= Flags::ACCESSOR
+      self.flags |= ScanInfo::Flags::ACCESSOR
       index_info = accessor.find_index(op)
       return if index_info.nil?
       put_index(index_info.index, index_info.section_id, 1)
     end
+
+    def put_index(index, section_id, weight)
+      @indexes << [index, section_id, weight]
+    end
   end
 end

  Modified: lib/mrb/scripts/sources.am (+2 -1)
===================================================================
--- lib/mrb/scripts/sources.am    2014-08-05 12:59:05 +0900 (b4f7717)
+++ lib/mrb/scripts/sources.am    2014-08-06 20:24:49 +0900 (eda7f4a)
@@ -2,4 +2,5 @@ RUBY_SCRIPT_FILES =				\
 	eval_context.rb				\
 	expression.rb				\
 	index_info.rb				\
-	scan_info.rb
+	scan_info.rb				\
+	scan_info_builder.rb
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index