susumu.yata
null+****@clear*****
Wed Jul 30 11:40:41 JST 2014
susumu.yata 2014-07-30 11:40:41 +0900 (Wed, 30 Jul 2014) New Revision: e0f8cdbcb1518bb1e60d2088fbe8b021cd3750ee https://github.com/groonga/grnxx/commit/e0f8cdbcb1518bb1e60d2088fbe8b021cd3750ee Message: Implement Sorter, but not tested yet. Modified files: include/grnxx/sorter.hpp lib/grnxx/sorter.cpp Modified: include/grnxx/sorter.hpp (+3 -1) =================================================================== --- include/grnxx/sorter.hpp 2014-07-30 11:34:30 +0900 (3e4b886) +++ include/grnxx/sorter.hpp 2014-07-30 11:40:41 +0900 (b27bd68) @@ -55,8 +55,10 @@ class Sorter { bool sort(Error *error, RecordSet *record_set); private: - RecordSet *record_set_; unique_ptr<SorterNode> head_; + RecordSet *record_set_; + Int offset_; + Int limit_; Sorter(); }; Modified: lib/grnxx/sorter.cpp (+279 -36) =================================================================== --- lib/grnxx/sorter.cpp 2014-07-30 11:34:30 +0900 (8e3f48f) +++ lib/grnxx/sorter.cpp 2014-07-30 11:40:41 +0900 (e82ec48) @@ -11,17 +11,17 @@ class SorterNode { public: static unique_ptr<SorterNode> create(Error *error, Order &&order); - SorterNode() : next_(nullptr) {} + SorterNode() : next_(), error_(nullptr) {} virtual ~SorterNode() {} - // Return the next node. - SorterNode *next() const { - return next_.get(); - } // Set the next node. void set_next(unique_ptr<SorterNode> &&next) { next_ = std::move(next); } + // Set a reference to an error object. + void set_error(Error *error) { + error_ = error; + } // Sort records in [begin, end). // @@ -29,12 +29,13 @@ class SorterNode { // On failure, returns false and stores error information into "*error" if // "error" != nullptr. virtual bool sort(Error *error, - RecordSet *record_set, + RecordSubset subset, Int begin, Int end) = 0; - private: + protected: unique_ptr<SorterNode> next_; + Error *error_; }; namespace { @@ -58,27 +59,241 @@ struct ReverseComparer { template <typename T> class Node : public SorterNode { public: - using Value = typename T::Value; + using PriorTo = T; + using Value = typename PriorTo::Value; - Node(Order &&order) : SorterNode(), order_(std::move(order)) {} + explicit Node(Order &&order) + : SorterNode(), + order_(std::move(order)), + values_(), + prior_to_() {} ~Node() {} - bool sort(Error *error, RecordSet *record_set, Int begin, Int end); + bool sort(Error *error, RecordSubset subset, Int begin, Int end); private: Order order_; - std::vector<Value> values_; + Array<Value> values_; + PriorTo prior_to_; + + // Sort records with ternary quick sort. + // + // Switches to insertion sort when the sorting range becomes small enough. + bool quick_sort(RecordSubset records, Value *values, + Int begin, Int end); + + // Sort records with insertion sort. + // + // Insertion sort should be used when there few records. + bool insertion_sort(RecordSubset records, Value *values); - // TODO: A pointer to records is not available. -// void quick_sort(Record *records, Value *values, Int size, -// Int begin, Int end); -// void insertion_sort(Record *records, Value *values, Int size); -// void move_pivot_first(Record *records, Value *values, Int size); + // Choose the pivot and move it to the front. + void move_pivot_first(RecordSubset records, Value *values); }; template <typename T> -bool Node<T>::sort(Error *error, RecordSet *record_set, Int begin, Int end) { - return false; +bool Node<T>::sort(Error *error, RecordSubset subset, Int begin, Int end) { + if (!order_.expression->evaluate(error, subset, &values_)) { + return false; + } + set_error(error); + return quick_sort(subset, values_.data(), begin, end); +} + +template <typename T> +bool Node<T>::quick_sort(RecordSubset records, Value *values, + Int begin, Int end) { + // Use ternary quick sort if there are enough records. + // + // TODO: Currently, the threshold is 16. + // This value should be optimized and replaced with a named constant. + while (records.size() >= 16) { + move_pivot_first(records, values); + const Value pivot = values[0]; + Int left = 1; + Int right = records.size(); + Int pivot_left = 1; + Int pivot_right = records.size(); + for ( ; ; ) { + // Move entries based on comparison against the pivot. + // Prior entries are moved to left. + // Less prior entries are moved to right. + // Entries which equal to the pivot are moved to the edges. + while (left < right) { + if (prior_to_(pivot, values[left])) { + break; + } else if (pivot == values[left]) { + std::swap(values[left], values[pivot_left]); + records.swap(left, pivot_left); + ++pivot_left; + } + ++left; + } + while (left < right) { + --right; + if (prior_to_(values[right], pivot)) { + break; + } else if (values[right] == pivot) { + --pivot_right; + std::swap(values[right], values[pivot_right]); + records.swap(right, pivot_right); + } + } + if (left >= right) { + break; + } + std::swap(values[left], values[right]); + records.swap(left, right); + ++left; + } + + // 左端の値を境界の左に移動する. + while (pivot_left > 0) { + --pivot_left; + --left; + std::swap(values[pivot_left], values[left]); + records.swap(pivot_left, left); + } + // 右端の値を境界の右に移動する. + while (pivot_right < records.size()) { + std::swap(values[pivot_right], values[right]); + records.swap(pivot_right, right); + ++pivot_right; + ++right; + } + + // 枢軸と同値の範囲 [left, right) には次の検索条件を適用する. + if (this->next_) { + if (((right - left) >= 2) && (begin < right) && (end > left)) { + Int next_begin = (begin < left) ? 0 : (begin - left); + Int next_end = ((end > right) ? right : end) - left; + if (!this->next_->sort(this->error_, + records.subset(left, right - left), + next_begin, next_end)) { + return false; + } + } + } + + // 再帰呼び出しの深さを抑えるため,左側 [0, left) と右側 [right, records.size()) の + // 小さい方を再帰呼び出しで処理し,大きい方を次のループで処理する. + if (left < (records.size() - right)) { + if ((begin < left) && (left >= 2)) { + Int next_end = (end < left) ? end : left; + if (!quick_sort(records.subset(0, left), values, begin, next_end)) { + return false; + } + } + if (end <= right) { + return true; + } + records = records.subset(right); + values += right; + begin -= right; + if (begin < 0) { + begin = 0; + } + end -= right; + } else { + if ((end > right) && ((records.size() - right) >= 2)) { + Int next_begin = (begin < right) ? 0 : (begin - right); + Int next_end = end - right; + if (!quick_sort(records.subset(right, records.size() - right), + values + right, next_begin, next_end)) { + return false; + } + } + if (begin >= left) { + return true; + } + records = records.subset(0, left); + if (end > left) { + end = left; + } + } + } + + if (records.size() >= 2) { + return insertion_sort(records, values); + } + return true; +} + +template <typename T> +bool Node<T>::insertion_sort(RecordSubset records, Value *values) { + for (Int i = 1; i < records.size(); ++i) { + for (Int j = i; j > 0; --j) { + if (prior_to_(values[j], values[j - 1])) { + std::swap(values[j], values[j - 1]); + records.swap(j, j - 1); + } else { + break; + } + } + } + + // Apply the next sorting if there are records having the same value. + if (this->next_) { + Int begin = 0; + for (Int i = 1; i < records.size(); ++i) { + if (values[i] != values[begin]) { + if ((i - begin) >= 2) { + if (!this->next_->sort(this->error_, + records.subset(begin, i - begin), + 0, i - begin)) { + return false; + } + } + begin = i; + } + } + if ((records.size() - begin) >= 2) { + if (!this->next_->sort(this->error_, + records.subset(begin, records.size() - begin), + 0, records.size() - begin)) { + return false; + } + } + } + return true; +} + +template <typename T> +void Node<T>::move_pivot_first(RecordSubset records, Value *values) { + // Choose the median from values[1], values[1 / size], and values[size - 2]. + // The reason why not using values[0] and values[size - 1] is to avoid the + // worst case which occurs when the records are sorted in reverse order. + Int first = 1; + Int middle = records.size() / 2; + Int last = records.size() - 2; + if (prior_to_(values[first], values[middle])) { + // first < middle. + if (prior_to_(values[middle], values[last])) { + // first < middle < last. + std::swap(values[0], values[middle]); + records.swap(0, middle); + } else if (prior_to_(values[first], values[last])) { + // first < last < middle. + std::swap(values[0], values[last]); + records.swap(0, last); + } else { + // last < first < middle. + std::swap(values[0], values[first]); + records.swap(0, first); + } + } else if (prior_to_(values[last], values[middle])) { + // last < middle < first. + std::swap(values[0], values[middle]); + records.swap(0, middle); + } else if (prior_to_(values[last], values[first])) { + // middle < last < first. + std::swap(values[0], values[last]); + records.swap(0, last); + } else { + // middle < first < last. + std::swap(values[0], values[first]); + records.swap(0, first); + } } } // namespace @@ -86,46 +301,55 @@ bool Node<T>::sort(Error *error, RecordSet *record_set, Int begin, Int end) { unique_ptr<SorterNode> SorterNode::create(Error *error, Order &&order) { unique_ptr<SorterNode> node; switch (order.expression->data_type()) { - // TODO: Bool type does not support pointer. + // TODO: Bool does not support comparison operators <, >. + // Also, Bool does not support pointer. // case BOOL_DATA: { -// node.reset(new (nothrow) Node<Bool>(std::move(order))); +// if (order.type == REGULAR_ORDER) { +// node.reset(new (nothrow) Node<RegularComparer<Bool>>( +// std::move(order))); +// } else { +// node.reset(new (nothrow) Node<ReverseComparer<Bool>>( +// std::move(order))); +// } // break; // } case INT_DATA: { if (order.type == REGULAR_ORDER) { - node.reset(new (nothrow) Node<RegularComparer<Int>>(std::move(order))); + node.reset(new (nothrow) Node<RegularComparer<Int>>( + std::move(order))); } else { - node.reset(new (nothrow) Node<ReverseComparer<Int>>(std::move(order))); + node.reset(new (nothrow) Node<ReverseComparer<Int>>( + std::move(order))); } break; } case FLOAT_DATA: { if (order.type == REGULAR_ORDER) { - node.reset( - new (nothrow) Node<RegularComparer<Float>>(std::move(order))); + node.reset(new (nothrow) Node<RegularComparer<Float>>( + std::move(order))); } else { - node.reset( - new (nothrow) Node<ReverseComparer<Float>>(std::move(order))); + node.reset(new (nothrow) Node<ReverseComparer<Float>>( + std::move(order))); } break; } case TIME_DATA: { if (order.type == REGULAR_ORDER) { - node.reset( - new (nothrow) Node<RegularComparer<Time>>(std::move(order))); + node.reset(new (nothrow) Node<RegularComparer<Time>>( + std::move(order))); } else { - node.reset( - new (nothrow) Node<ReverseComparer<Time>>(std::move(order))); + node.reset(new (nothrow) Node<ReverseComparer<Time>>( + std::move(order))); } break; } case TEXT_DATA: { if (order.type == REGULAR_ORDER) { - node.reset( - new (nothrow) Node<RegularComparer<Text>>(std::move(order))); + node.reset(new (nothrow) Node<RegularComparer<Text>>( + std::move(order))); } else { - node.reset( - new (nothrow) Node<ReverseComparer<Text>>(std::move(order))); + node.reset(new (nothrow) Node<ReverseComparer<Text>>( + std::move(order))); } break; } @@ -146,6 +370,10 @@ unique_ptr<Sorter> Sorter::create( Error *error, unique_ptr<OrderSet> &&order_set, const SorterOptions &options) { + if ((options.offset < 0) || (options.limit < 0)) { + GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Invalid argument"); + return nullptr; + } unique_ptr<Sorter> sorter(new (nothrow) Sorter); if (!sorter) { GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed"); @@ -179,16 +407,31 @@ bool Sorter::finish(Error *error) { // Nothing to do. return true; } + if ((offset_ >= record_set_->size()) || (limit_ <= 0)) { + record_set_->clear(); + return true; + } + Int begin = offset_; + Int end; + if (limit_ <= (record_set_->size() - offset_)) { + end = offset_ + limit_; + } else { + end = record_set_->size() - offset_; + } if (record_set_->size() <= 1) { return true; } - return head_->sort(error, record_set_, 0, record_set_->size()); + return head_->sort(error, record_set_->subset(), begin, end); } bool Sorter::sort(Error *error, RecordSet *record_set) { return reset(error, record_set) && finish(error); } -Sorter::Sorter() : record_set_(nullptr), head_() {} +Sorter::Sorter() + : head_(), + record_set_(nullptr), + offset_(0), + limit_(0) {} } // namespace grnxx -------------- next part -------------- HTML����������������������������...Download