[Groonga-commit] groonga/grnxx at 326a386 [master] Specialize Sorter for Score. (#119)

Back to archive index

susumu.yata null+****@clear*****
Tue Dec 16 10:46:40 JST 2014


susumu.yata	2014-11-26 18:39:40 +0900 (Wed, 26 Nov 2014)

  New Revision: 326a38611f5226b0ab32c9bd64de9ee2bcb23e8b
  https://github.com/groonga/grnxx/commit/326a38611f5226b0ab32c9bd64de9ee2bcb23e8b

  Message:
    Specialize Sorter for Score. (#119)

  Modified files:
    lib/grnxx/impl/sorter.cpp

  Modified: lib/grnxx/impl/sorter.cpp (+235 -2)
===================================================================
--- lib/grnxx/impl/sorter.cpp    2014-11-26 17:38:58 +0900 (e4aa32d)
+++ lib/grnxx/impl/sorter.cpp    2014-11-26 18:39:40 +0900 (21ab0fe)
@@ -29,6 +29,234 @@ class Node {
   Node *next_;
 };
 
+// --- ScoreNode ---
+
+struct RegularScoreComparer {
+  bool operator()(const Record &lhs, const Record &rhs) const {
+    if (lhs.score.is_na()) {
+      return false;
+    } else if (rhs.score.is_na()) {
+      return true;
+    }
+    return lhs.score.raw() < rhs.score.raw();
+  }
+};
+
+struct ReverseScoreComparer {
+  bool operator()(const Record &lhs, const Record &rhs) const {
+    if (lhs.score.is_na()) {
+      return false;
+    } else if (rhs.score.is_na()) {
+      return true;
+    }
+    return lhs.score.raw() > rhs.score.raw();
+  }
+};
+
+template <typename T>
+class ScoreNode : public Node {
+ public:
+  using Comparer = T;
+
+  explicit ScoreNode(SorterOrder &&order)
+      : Node(std::move(order)),
+        comparer_() {}
+  ~ScoreNode() = default;
+
+  void sort(ArrayRef<Record> records, size_t begin, size_t end) {
+    return quick_sort(records, begin, end);
+  }
+
+ private:
+  Comparer comparer_;
+
+  // Sort records with ternary quick sort.
+  //
+  // Switches to insertion sort when the sorting range becomes small enough.
+  //
+  // On success, returns true.
+  // On failure, throws an exception.
+  void quick_sort(ArrayRef<Record> records, size_t begin, size_t end);
+
+  // Sort records with insertion sort.
+  //
+  // Insertion sort should be used when there few records.
+  //
+  // On success, returns true.
+  // On failure, throws an exception.
+  void insertion_sort(ArrayRef<Record> records);
+
+  // Choose the pivot and move it to the front.
+  void move_pivot_first(ArrayRef<Record> records);
+};
+
+template <typename T>
+void ScoreNode<T>::quick_sort(ArrayRef<Record> records,
+                              size_t begin,
+                              size_t end) {
+  // Use ternary quick sort if there are enough records.
+  //
+  // TODO: Currently, the threshold is 16.
+  //       This value should be optimized and replaced with a named constant.
+  while (records.size() >= 16) {
+    move_pivot_first(records);
+    const Record pivot = records[0];
+    size_t left = 1;
+    size_t right = records.size();
+    size_t pivot_left = 1;
+    size_t pivot_right = records.size();
+    for ( ; ; ) {
+      // Move entries based on comparison against the pivot.
+      // Prior entries are moved to left.
+      // Less prior entries are moved to right.
+      // Entries which equal to the pivot are moved to the edges.
+      while (left < right) {
+        if (comparer_(pivot, records[left])) {
+          break;
+        } else if (pivot.score.match(records[left].score)) {
+          std::swap(records[left], records[pivot_left]);
+          ++pivot_left;
+        }
+        ++left;
+      }
+      while (left < right) {
+        --right;
+        if (comparer_(records[right], pivot)) {
+          break;
+        } else if (records[right].score.match(pivot.score)) {
+          --pivot_right;
+          std::swap(records[right], records[pivot_right]);
+        }
+      }
+      if (left >= right) {
+        break;
+      }
+      std::swap(records[left], records[right]);
+      ++left;
+    }
+
+    // Move left pivot-equivalent entries to the left side of the boundary.
+    while (pivot_left > 0) {
+      --pivot_left;
+      --left;
+      std::swap(records[pivot_left], records[left]);
+    }
+    // Move right pivot-equivalent entries to the right side of the boundary.
+    while (pivot_right < records.size()) {
+      std::swap(records[pivot_right], records[right]);
+      ++pivot_right;
+      ++right;
+    }
+
+    // Apply the next sort condition to the pivot-equivalent records.
+    if (this->next_) {
+      if (((right - left) >= 2) && (begin < right) && (end > left)) {
+        size_t next_begin = (begin < left) ? 0 : (begin - left);
+        size_t next_end = ((end > right) ? right : end) - left;
+        this->next_->sort(records.ref(left, right - left),
+                          next_begin, next_end);
+      }
+    }
+
+    // There are the left group and the right group.
+    // A recursive call is used for sorting the smaller group.
+    // The recursion depth is less than log_2(n) where n is the number of
+    // records.
+    // The next loop of the current call is used for sorting the larger group.
+    if (left < (records.size() - right)) {
+      if ((begin < left) && (left >= 2)) {
+        size_t next_end = (end < left) ? end : left;
+        quick_sort(records.ref(0, left), begin, next_end);
+      }
+      if (end <= right) {
+        return;
+      }
+      records = records.ref(right);
+      begin = (begin < right) ? 0 : (begin - right);
+      end -= right;
+    } else {
+      if ((end > right) && ((records.size() - right) >= 2)) {
+        size_t next_begin = (begin < right) ? 0 : (begin - right);
+        size_t next_end = end - right;
+        quick_sort(records.ref(right), next_begin, next_end);
+      }
+      if (begin >= left) {
+        return;
+      }
+      records = records.ref(0, left);
+      if (end > left) {
+        end = left;
+      }
+    }
+  }
+
+  if (records.size() >= 2) {
+    insertion_sort(records);
+  }
+}
+
+template <typename T>
+void ScoreNode<T>::insertion_sort(ArrayRef<Record> records) {
+  for (size_t i = 1; i < records.size(); ++i) {
+    for (size_t j = i; j > 0; --j) {
+      if (comparer_(records[j], records[j - 1])) {
+        std::swap(records[j], records[j - 1]);
+      } else {
+        break;
+      }
+    }
+  }
+
+  // Apply the next sorting if there are records having the same value.
+  if (this->next_) {
+    size_t begin = 0;
+    for (size_t i = 1; i < records.size(); ++i) {
+      if (records[i].score.unmatch(records[begin].score)) {
+        if ((i - begin) >= 2) {
+          this->next_->sort(records.ref(begin, i - begin), 0, i - begin);
+        }
+        begin = i;
+      }
+    }
+    if ((records.size() - begin) >= 2) {
+      this->next_->sort(records.ref(begin), 0, records.size() - begin);
+    }
+  }
+}
+
+template <typename T>
+void ScoreNode<T>::move_pivot_first(ArrayRef<Record> records) {
+  // Choose the median from records[1], records[1 / size], and
+  // records[size - 2].
+  // The reason why not using records[0] and records[size - 1] is to avoid the
+  // worst case which occurs when the records are sorted in reverse order.
+  size_t first = 1;
+  size_t middle = records.size() / 2;
+  size_t last = records.size() - 2;
+  if (comparer_(records[first], records[middle])) {
+    // first < middle.
+    if (comparer_(records[middle], records[last])) {
+      // first < middle < last.
+      std::swap(records[0], records[middle]);
+    } else if (comparer_(records[first], records[last])) {
+      // first < last < middle.
+      std::swap(records[0], records[last]);
+    } else {
+      // last < first < middle.
+      std::swap(records[0], records[first]);
+    }
+  } else if (comparer_(records[last], records[middle])) {
+    // last < middle < first.
+    std::swap(records[0], records[middle]);
+  } else if (comparer_(records[last], records[first])) {
+    // middle < last < first.
+    std::swap(records[0], records[last]);
+  } else {
+    // middle < first < last.
+    std::swap(records[0], records[first]);
+  }
+}
+
 // --- BoolNode ---
 
 class BoolNode : public Node {
@@ -366,8 +594,6 @@ using IntNode = ConvertNode<Int, T>;
 
 // --- FloatNode ---
 
-// TODO: Sorter for Score should be specialized.
-
 // NOTE: This implementation assumes IEEE754.
 struct RegularFloatConverter {
   uint64_t operator()(Float value) const {
@@ -733,6 +959,13 @@ void Sorter::sort(Array<Record> *records) {
 }
 
 Node *Sorter::create_node(SorterOrder &&order) try {
+  if (order.expression->is_score()) {
+    if (order.type == SORTER_REGULAR_ORDER) {
+      return new ScoreNode<RegularScoreComparer>(std::move(order));
+    } else {
+      return new ScoreNode<ReverseScoreComparer>(std::move(order));
+    }
+  }
   switch (order.expression->data_type()) {
     case BOOL_DATA: {
       return new BoolNode(std::move(order));
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index