[Groonga-commit] groonga/grnxx at e0f8cdb [master] Implement Sorter, but not tested yet.

Back to archive index

susumu.yata null+****@clear*****
Wed Jul 30 11:40:41 JST 2014


susumu.yata	2014-07-30 11:40:41 +0900 (Wed, 30 Jul 2014)

  New Revision: e0f8cdbcb1518bb1e60d2088fbe8b021cd3750ee
  https://github.com/groonga/grnxx/commit/e0f8cdbcb1518bb1e60d2088fbe8b021cd3750ee

  Message:
    Implement Sorter, but not tested yet.

  Modified files:
    include/grnxx/sorter.hpp
    lib/grnxx/sorter.cpp

  Modified: include/grnxx/sorter.hpp (+3 -1)
===================================================================
--- include/grnxx/sorter.hpp    2014-07-30 11:34:30 +0900 (3e4b886)
+++ include/grnxx/sorter.hpp    2014-07-30 11:40:41 +0900 (b27bd68)
@@ -55,8 +55,10 @@ class Sorter {
   bool sort(Error *error, RecordSet *record_set);
 
  private:
-  RecordSet *record_set_;
   unique_ptr<SorterNode> head_;
+  RecordSet *record_set_;
+  Int offset_;
+  Int limit_;
 
   Sorter();
 };

  Modified: lib/grnxx/sorter.cpp (+279 -36)
===================================================================
--- lib/grnxx/sorter.cpp    2014-07-30 11:34:30 +0900 (8e3f48f)
+++ lib/grnxx/sorter.cpp    2014-07-30 11:40:41 +0900 (e82ec48)
@@ -11,17 +11,17 @@ class SorterNode {
  public:
   static unique_ptr<SorterNode> create(Error *error, Order &&order);
 
-  SorterNode() : next_(nullptr) {}
+  SorterNode() : next_(), error_(nullptr) {}
   virtual ~SorterNode() {}
 
-  // Return the next node.
-  SorterNode *next() const {
-    return next_.get();
-  }
   // Set the next node.
   void set_next(unique_ptr<SorterNode> &&next) {
     next_ = std::move(next);
   }
+  // Set a reference to an error object.
+  void set_error(Error *error) {
+    error_ = error;
+  }
 
   // Sort records in [begin, end).
   //
@@ -29,12 +29,13 @@ class SorterNode {
   // On failure, returns false and stores error information into "*error" if
   // "error" != nullptr.
   virtual bool sort(Error *error,
-                    RecordSet *record_set,
+                    RecordSubset subset,
                     Int begin,
                     Int end) = 0;
 
- private:
+ protected:
   unique_ptr<SorterNode> next_;
+  Error *error_;
 };
 
 namespace {
@@ -58,27 +59,241 @@ struct ReverseComparer {
 template <typename T>
 class Node : public SorterNode {
  public:
-  using Value = typename T::Value;
+  using PriorTo = T;
+  using Value   = typename PriorTo::Value;
 
-  Node(Order &&order) : SorterNode(), order_(std::move(order)) {}
+  explicit Node(Order &&order)
+      : SorterNode(),
+        order_(std::move(order)),
+        values_(),
+        prior_to_() {}
   ~Node() {}
 
-  bool sort(Error *error, RecordSet *record_set, Int begin, Int end);
+  bool sort(Error *error, RecordSubset subset, Int begin, Int end);
 
  private:
   Order order_;
-  std::vector<Value> values_;
+  Array<Value> values_;
+  PriorTo prior_to_;
+
+  // Sort records with ternary quick sort.
+  //
+  // Switches to insertion sort when the sorting range becomes small enough.
+  bool quick_sort(RecordSubset records, Value *values,
+                  Int begin, Int end);
+
+  // Sort records with insertion sort.
+  //
+  // Insertion sort should be used when there few records.
+  bool insertion_sort(RecordSubset records, Value *values);
 
-  // TODO: A pointer to records is not available.
-//  void quick_sort(Record *records, Value *values, Int size,
-//                  Int begin, Int end);
-//  void insertion_sort(Record *records, Value *values, Int size);
-//  void move_pivot_first(Record *records, Value *values, Int size);
+  // Choose the pivot and move it to the front.
+  void move_pivot_first(RecordSubset records, Value *values);
 };
 
 template <typename T>
-bool Node<T>::sort(Error *error, RecordSet *record_set, Int begin, Int end) {
-  return false;
+bool Node<T>::sort(Error *error, RecordSubset subset, Int begin, Int end) {
+  if (!order_.expression->evaluate(error, subset, &values_)) {
+    return false;
+  }
+  set_error(error);
+  return quick_sort(subset, values_.data(), begin, end);
+}
+
+template <typename T>
+bool Node<T>::quick_sort(RecordSubset records, Value *values,
+                         Int begin, Int end) {
+  // Use ternary quick sort if there are enough records.
+  //
+  // TODO: Currently, the threshold is 16.
+  //       This value should be optimized and replaced with a named constant.
+  while (records.size() >= 16) {
+    move_pivot_first(records, values);
+    const Value pivot = values[0];
+    Int left = 1;
+    Int right = records.size();
+    Int pivot_left = 1;
+    Int pivot_right = records.size();
+    for ( ; ; ) {
+      // Move entries based on comparison against the pivot.
+      // Prior entries are moved to left.
+      // Less prior entries are moved to right.
+      // Entries which equal to the pivot are moved to the edges.
+      while (left < right) {
+        if (prior_to_(pivot, values[left])) {
+          break;
+        } else if (pivot == values[left]) {
+          std::swap(values[left], values[pivot_left]);
+          records.swap(left, pivot_left);
+          ++pivot_left;
+        }
+        ++left;
+      }
+      while (left < right) {
+        --right;
+        if (prior_to_(values[right], pivot)) {
+          break;
+        } else if (values[right] == pivot) {
+          --pivot_right;
+          std::swap(values[right], values[pivot_right]);
+          records.swap(right, pivot_right);
+        }
+      }
+      if (left >= right) {
+        break;
+      }
+      std::swap(values[left], values[right]);
+      records.swap(left, right);
+      ++left;
+    }
+
+    // 左端の値を境界の左に移動する.
+    while (pivot_left > 0) {
+      --pivot_left;
+      --left;
+      std::swap(values[pivot_left], values[left]);
+      records.swap(pivot_left, left);
+    }
+    // 右端の値を境界の右に移動する.
+    while (pivot_right < records.size()) {
+      std::swap(values[pivot_right], values[right]);
+      records.swap(pivot_right, right);
+      ++pivot_right;
+      ++right;
+    }
+
+    // 枢軸と同値の範囲 [left, right) には次の検索条件を適用する.
+    if (this->next_) {
+      if (((right - left) >= 2) && (begin < right) && (end > left)) {
+        Int next_begin = (begin < left) ? 0 : (begin - left);
+        Int next_end = ((end > right) ? right : end) - left;
+        if (!this->next_->sort(this->error_,
+                               records.subset(left, right - left),
+                               next_begin, next_end)) {
+          return false;
+        }
+      }
+    }
+
+    // 再帰呼び出しの深さを抑えるため,左側 [0, left) と右側 [right, records.size()) の
+    // 小さい方を再帰呼び出しで処理し,大きい方を次のループで処理する.
+    if (left < (records.size() - right)) {
+      if ((begin < left) && (left >= 2)) {
+        Int next_end = (end < left) ? end : left;
+        if (!quick_sort(records.subset(0, left), values, begin, next_end)) {
+          return false;
+        }
+      }
+      if (end <= right) {
+        return true;
+      }
+      records = records.subset(right);
+      values += right;
+      begin -= right;
+      if (begin < 0) {
+        begin = 0;
+      }
+      end -= right;
+    } else {
+      if ((end > right) && ((records.size() - right) >= 2)) {
+        Int next_begin = (begin < right) ? 0 : (begin - right);
+        Int next_end = end - right;
+        if (!quick_sort(records.subset(right, records.size() - right),
+                        values + right, next_begin, next_end)) {
+          return false;
+        }
+      }
+      if (begin >= left) {
+        return true;
+      }
+      records = records.subset(0, left);
+      if (end > left) {
+        end = left;
+      }
+    }
+  }
+
+  if (records.size() >= 2) {
+    return insertion_sort(records, values);
+  }
+  return true;
+}
+
+template <typename T>
+bool Node<T>::insertion_sort(RecordSubset records, Value *values) {
+  for (Int i = 1; i < records.size(); ++i) {
+    for (Int j = i; j > 0; --j) {
+      if (prior_to_(values[j], values[j - 1])) {
+        std::swap(values[j], values[j - 1]);
+        records.swap(j, j - 1);
+      } else {
+        break;
+      }
+    }
+  }
+
+  // Apply the next sorting if there are records having the same value.
+  if (this->next_) {
+    Int begin = 0;
+    for (Int i = 1; i < records.size(); ++i) {
+      if (values[i] != values[begin]) {
+        if ((i - begin) >= 2) {
+          if (!this->next_->sort(this->error_,
+                                 records.subset(begin, i - begin),
+                                 0, i - begin)) {
+            return false;
+          }
+        }
+        begin = i;
+      }
+    }
+    if ((records.size() - begin) >= 2) {
+      if (!this->next_->sort(this->error_,
+                             records.subset(begin, records.size() - begin),
+                             0, records.size() - begin)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+template <typename T>
+void Node<T>::move_pivot_first(RecordSubset records, Value *values) {
+  // Choose the median from values[1], values[1 / size], and values[size - 2].
+  // The reason why not using values[0] and values[size - 1] is to avoid the
+  // worst case which occurs when the records are sorted in reverse order.
+  Int first = 1;
+  Int middle = records.size() / 2;
+  Int last = records.size() - 2;
+  if (prior_to_(values[first], values[middle])) {
+    // first < middle.
+    if (prior_to_(values[middle], values[last])) {
+      // first < middle < last.
+      std::swap(values[0], values[middle]);
+      records.swap(0, middle);
+    } else if (prior_to_(values[first], values[last])) {
+      // first < last < middle.
+      std::swap(values[0], values[last]);
+      records.swap(0, last);
+    } else {
+      // last < first < middle.
+      std::swap(values[0], values[first]);
+      records.swap(0, first);
+    }
+  } else if (prior_to_(values[last], values[middle])) {
+    // last < middle < first.
+    std::swap(values[0], values[middle]);
+    records.swap(0, middle);
+  } else if (prior_to_(values[last], values[first])) {
+    // middle < last < first.
+    std::swap(values[0], values[last]);
+    records.swap(0, last);
+  } else {
+    // middle < first < last.
+    std::swap(values[0], values[first]);
+    records.swap(0, first);
+  }
 }
 
 }  // namespace
@@ -86,46 +301,55 @@ bool Node<T>::sort(Error *error, RecordSet *record_set, Int begin, Int end) {
 unique_ptr<SorterNode> SorterNode::create(Error *error, Order &&order) {
   unique_ptr<SorterNode> node;
   switch (order.expression->data_type()) {
-    // TODO: Bool type does not support pointer.
+    // TODO: Bool does not support comparison operators <, >.
+    //       Also, Bool does not support pointer.
 //    case BOOL_DATA: {
-//      node.reset(new (nothrow) Node<Bool>(std::move(order)));
+//      if (order.type == REGULAR_ORDER) {
+//        node.reset(new (nothrow) Node<RegularComparer<Bool>>(
+//            std::move(order)));
+//      } else {
+//        node.reset(new (nothrow) Node<ReverseComparer<Bool>>(
+//            std::move(order)));
+//      }
 //      break;
 //    }
     case INT_DATA: {
       if (order.type == REGULAR_ORDER) {
-        node.reset(new (nothrow) Node<RegularComparer<Int>>(std::move(order)));
+        node.reset(new (nothrow) Node<RegularComparer<Int>>(
+            std::move(order)));
       } else {
-        node.reset(new (nothrow) Node<ReverseComparer<Int>>(std::move(order)));
+        node.reset(new (nothrow) Node<ReverseComparer<Int>>(
+            std::move(order)));
       }
       break;
     }
     case FLOAT_DATA: {
       if (order.type == REGULAR_ORDER) {
-        node.reset(
-            new (nothrow) Node<RegularComparer<Float>>(std::move(order)));
+        node.reset(new (nothrow) Node<RegularComparer<Float>>(
+            std::move(order)));
       } else {
-        node.reset(
-            new (nothrow) Node<ReverseComparer<Float>>(std::move(order)));
+        node.reset(new (nothrow) Node<ReverseComparer<Float>>(
+            std::move(order)));
       }
       break;
     }
     case TIME_DATA: {
       if (order.type == REGULAR_ORDER) {
-        node.reset(
-            new (nothrow) Node<RegularComparer<Time>>(std::move(order)));
+        node.reset(new (nothrow) Node<RegularComparer<Time>>(
+            std::move(order)));
       } else {
-        node.reset(
-            new (nothrow) Node<ReverseComparer<Time>>(std::move(order)));
+        node.reset(new (nothrow) Node<ReverseComparer<Time>>(
+            std::move(order)));
       }
       break;
     }
     case TEXT_DATA: {
       if (order.type == REGULAR_ORDER) {
-        node.reset(
-            new (nothrow) Node<RegularComparer<Text>>(std::move(order)));
+        node.reset(new (nothrow) Node<RegularComparer<Text>>(
+            std::move(order)));
       } else {
-        node.reset(
-            new (nothrow) Node<ReverseComparer<Text>>(std::move(order)));
+        node.reset(new (nothrow) Node<ReverseComparer<Text>>(
+            std::move(order)));
       }
       break;
     }
@@ -146,6 +370,10 @@ unique_ptr<Sorter> Sorter::create(
     Error *error,
     unique_ptr<OrderSet> &&order_set,
     const SorterOptions &options) {
+  if ((options.offset < 0) || (options.limit < 0)) {
+    GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Invalid argument");
+    return nullptr;
+  }
   unique_ptr<Sorter> sorter(new (nothrow) Sorter);
   if (!sorter) {
     GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed");
@@ -179,16 +407,31 @@ bool Sorter::finish(Error *error) {
     // Nothing to do.
     return true;
   }
+  if ((offset_ >= record_set_->size()) || (limit_ <= 0)) {
+    record_set_->clear();
+    return true;
+  }
+  Int begin = offset_;
+  Int end;
+  if (limit_ <= (record_set_->size() - offset_)) {
+    end = offset_ + limit_;
+  } else {
+    end = record_set_->size() - offset_;
+  }
   if (record_set_->size() <= 1) {
     return true;
   }
-  return head_->sort(error, record_set_, 0, record_set_->size());
+  return head_->sort(error, record_set_->subset(), begin, end);
 }
 
 bool Sorter::sort(Error *error, RecordSet *record_set) {
   return reset(error, record_set) && finish(error);
 }
 
-Sorter::Sorter() : record_set_(nullptr), head_() {}
+Sorter::Sorter()
+    : head_(),
+      record_set_(nullptr),
+      offset_(0),
+      limit_(0) {}
 
 }  // namespace grnxx
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index