susumu.yata
null+****@clear*****
Tue Dec 16 10:43:55 JST 2014
susumu.yata 2014-11-20 18:53:18 +0900 (Thu, 20 Nov 2014) New Revision: 3f39e49e48d4895f353406c1770dd4ad636c6bcd https://github.com/groonga/grnxx/commit/3f39e49e48d4895f353406c1770dd4ad636c6bcd Message: Enable Sorter for Int, Float, and Text. (#112) Modified files: lib/grnxx/impl/sorter.cpp Modified: lib/grnxx/impl/sorter.cpp (+300 -374) =================================================================== --- lib/grnxx/impl/sorter.cpp 2014-11-20 17:47:31 +0900 (962e26a) +++ lib/grnxx/impl/sorter.cpp 2014-11-20 18:53:18 +0900 (f44d97f) @@ -59,7 +59,7 @@ class SeparatorNode : public TypedNode<Bool> { : TypedNode<Bool>(std::move(order)), prior_value_((order.type == SORTER_REGULAR_ORDER) ? Bool::false_value() : Bool::true_value()) {} - ~SeparatorNode() {} + ~SeparatorNode() = default; void sort(ArrayRef<Record> records, size_t begin, size_t end); @@ -113,384 +113,307 @@ void SeparatorNode::sort(ArrayRef<Record> records, size_t begin, size_t end) { } } -//// ----- RegularIsPrior ----- - -//struct RegularIsPrior { -// bool operator()(Bool arg) const { -// return !arg; -// } -//}; - -//// ----- ReverseIsPrior ----- - -//struct ReverseIsPrior { -// bool operator()(Bool arg) const { -// return arg; -// } -//}; - -//// ---- QuickSortNode ---- - -//template <typename T> -//struct Equal { -// bool operator()(T lhs, T rhs) const { -// return lhs == rhs; -// } -//}; - -//template <> -//struct Equal<Float> { -// bool operator()(Float lhs, Float rhs) const { -// return (lhs == rhs) || (std::isnan(lhs) && std::isnan(rhs)); -// } -//}; - -//template <typename T> -//class QuickSortNode : public TypedNode<typename T::Value> { -// public: -// using Comparer = T; -// using Value = typename Comparer::Value; - -// explicit QuickSortNode(SorterOrder &&order) -// : TypedNode<Value>(std::move(order)), -// comparer_() {} -// ~QuickSortNode() {} - -// bool sort(Error *error, ArrayRef<Record> records, Int begin, Int end); - -// private: -// Comparer comparer_; - -// // Sort records with ternary quick sort. -// // -// // Switches to insertion sort when the sorting range becomes small enough. -// // -// // On success, returns true. -// // On failure, returns false and stores error information into "*error" if -// // "error" != nullptr. -// bool quick_sort(Error *error, -// ArrayRef<Record> records, -// Value *values, -// Int begin, -// Int end); - -// // Sort records with insertion sort. -// // -// // Insertion sort should be used when there few records. -// // -// // On success, returns true. -// // On failure, returns false and stores error information into "*error" if -// // "error" != nullptr. -// bool insertion_sort(Error *error, ArrayRef<Record> records, Value *values); - -// // Choose the pivot and move it to the front. -// void move_pivot_first(ArrayRef<Record> records, Value *values); -//}; - -//template <typename T> -//bool QuickSortNode<T>::sort(Error *error, -// ArrayRef<Record> records, -// Int begin, -// Int end) { -// if (!this->order_.expression->evaluate(error, records, &this->values_)) { -// return false; -// } -// return quick_sort(error, records, this->values_.data(), begin, end); -//} - -//template <typename T> -//bool QuickSortNode<T>::quick_sort(Error *error, -// ArrayRef<Record> records, -// Value *values, -// Int begin, -// Int end) { -// // Use ternary quick sort if there are enough records. -// // -// // TODO: Currently, the threshold is 16. -// // This value should be optimized and replaced with a named constant. -// while (records.size() >= 16) { -// move_pivot_first(records, values); -// const Value pivot = values[0]; -// Int left = 1; -// Int right = records.size(); -// Int pivot_left = 1; -// Int pivot_right = records.size(); -// for ( ; ; ) { -// // Move entries based on comparison against the pivot. -// // Prior entries are moved to left. -// // Less prior entries are moved to right. -// // Entries which equal to the pivot are moved to the edges. -// while (left < right) { -// if (comparer_(pivot, values[left])) { -// break; -// } else if (Equal<Value>()(pivot, values[left])) { -// std::swap(values[left], values[pivot_left]); -// records.swap(left, pivot_left); -// ++pivot_left; -// } -// ++left; -// } -// while (left < right) { -// --right; -// if (comparer_(values[right], pivot)) { -// break; -// } else if (Equal<Value>()(values[right], pivot)) { -// --pivot_right; -// std::swap(values[right], values[pivot_right]); -// records.swap(right, pivot_right); -// } -// } -// if (left >= right) { -// break; -// } -// std::swap(values[left], values[right]); -// records.swap(left, right); -// ++left; -// } - -// // Move left pivot-equivalent entries to the left side of the boundary. -// while (pivot_left > 0) { -// --pivot_left; -// --left; -// std::swap(values[pivot_left], values[left]); -// records.swap(pivot_left, left); -// } -// // Move right pivot-equivalent entries to the right side of the boundary. -// while (pivot_right < records.size()) { -// std::swap(values[pivot_right], values[right]); -// records.swap(pivot_right, right); -// ++pivot_right; -// ++right; -// } - -// // Apply the next sort condition to the pivot-equivalent records. -// if (this->next_) { -// if (((right - left) >= 2) && (begin < right) && (end > left)) { -// Int next_begin = (begin < left) ? 0 : (begin - left); -// Int next_end = ((end > right) ? right : end) - left; -// if (!this->next_->sort(error, records.ref(left, right - left), -// next_begin, next_end)) { -// return false; -// } -// } -// } - -// // There are the left group and the right group. -// // A recursive call is used for sorting the smaller group. -// // The recursion depth is less than log_2(n) where n is the number of -// // records. -// // The next loop of the current call is used for sorting the larger group. -// if (left < (records.size() - right)) { -// if ((begin < left) && (left >= 2)) { -// Int next_end = (end < left) ? end : left; -// if (!quick_sort(error, records.ref(0, left), values, -// begin, next_end)) { -// return false; -// } -// } -// if (end <= right) { -// return true; -// } -// records = records.ref(right); -// values += right; -// begin -= right; -// if (begin < 0) { -// begin = 0; -// } -// end -= right; -// } else { -// if ((end > right) && ((records.size() - right) >= 2)) { -// Int next_begin = (begin < right) ? 0 : (begin - right); -// Int next_end = end - right; -// if (!quick_sort(error, records.ref(right), -// values + right, next_begin, next_end)) { -// return false; -// } -// } -// if (begin >= left) { -// return true; -// } -// records = records.ref(0, left); -// if (end > left) { -// end = left; -// } -// } -// } - -// if (records.size() >= 2) { -// return insertion_sort(error, records, values); -// } -// return true; -//} - -//template <typename T> -//bool QuickSortNode<T>::insertion_sort(Error *error, -// ArrayRef<Record> records, -// Value *values) { -// for (Int i = 1; i < records.size(); ++i) { -// for (Int j = i; j > 0; --j) { -// if (comparer_(values[j], values[j - 1])) { -// std::swap(values[j], values[j - 1]); -// records.swap(j, j - 1); -// } else { -// break; -// } -// } -// } - -// // Apply the next sorting if there are records having the same value. -// if (this->next_) { -// Int begin = 0; -// for (Int i = 1; i < records.size(); ++i) { -// if (!Equal<Value>()(values[i], values[begin])) { -// if ((i - begin) >= 2) { -// if (!this->next_->sort(error, records.ref(begin, i - begin), -// 0, i - begin)) { -// return false; -// } -// } -// begin = i; -// } -// } -// if ((records.size() - begin) >= 2) { -// if (!this->next_->sort(error, records.ref(begin), -// 0, records.size() - begin)) { -// return false; -// } -// } -// } -// return true; -//} - -//template <typename T> -//void QuickSortNode<T>::move_pivot_first(ArrayRef<Record> records, -// Value *values) { -// // Choose the median from values[1], values[1 / size], and values[size - 2]. -// // The reason why not using values[0] and values[size - 1] is to avoid the -// // worst case which occurs when the records are sorted in reverse order. -// Int first = 1; -// Int middle = records.size() / 2; -// Int last = records.size() - 2; -// if (comparer_(values[first], values[middle])) { -// // first < middle. -// if (comparer_(values[middle], values[last])) { -// // first < middle < last. -// std::swap(values[0], values[middle]); -// records.swap(0, middle); -// } else if (comparer_(values[first], values[last])) { -// // first < last < middle. -// std::swap(values[0], values[last]); -// records.swap(0, last); -// } else { -// // last < first < middle. -// std::swap(values[0], values[first]); -// records.swap(0, first); -// } -// } else if (comparer_(values[last], values[middle])) { -// // last < middle < first. -// std::swap(values[0], values[middle]); -// records.swap(0, middle); -// } else if (comparer_(values[last], values[first])) { -// // middle < last < first. -// std::swap(values[0], values[last]); -// records.swap(0, last); -// } else { -// // middle < first < last. -// std::swap(values[0], values[first]); -// records.swap(0, first); -// } -//} - -//// ----- RegularComparer ----- - -//template <typename T> -//struct RegularComparer { -// using Value = T; -// bool operator()(Value lhs, Value rhs) const { -// return lhs < rhs; -// } -//}; - -//template <> -//struct RegularComparer<Float> { -// using Value = Float; -// bool operator()(Value lhs, Value rhs) const { -// // Numbers are prior to NaN. -// if (std::isnan(lhs)) { -// return false; -// } else if (std::isnan(rhs)) { -// return true; -// } -// return lhs < rhs; -// } -//}; - -//template <> -//struct RegularComparer<GeoPoint> { -// using Value = GeoPoint; -// bool operator()(Value lhs, Value rhs) const { -// if (lhs.latitude() != rhs.latitude()) { -// return lhs.latitude() < rhs.latitude(); -// } -// return lhs.longitude() < rhs.longitude(); -// } -//}; - -//// ----- ReverseComparer ----- - -//template <typename T> -//struct ReverseComparer { -// using Value = T; -// bool operator()(Value lhs, Value rhs) const { -// return RegularComparer<T>()(rhs, lhs); -// } -//}; +// ---- QuickSortNode ---- + +template <typename T> +struct Equal { + bool operator()(const T &lhs, const T &rhs) const { + return lhs.match(rhs); + } +}; + +template <typename T> +class QuickSortNode : public TypedNode<typename T::Value> { + public: + using Comparer = T; + using Value = typename Comparer::Value; + + explicit QuickSortNode(SorterOrder &&order) + : TypedNode<Value>(std::move(order)), + comparer_() {} + ~QuickSortNode() = default; + + void sort(ArrayRef<Record> records, size_t begin, size_t end); + + private: + Comparer comparer_; + + // Sort records with ternary quick sort. + // + // Switches to insertion sort when the sorting range becomes small enough. + // + // On success, returns true. + // On failure, throws an exception. + void quick_sort(ArrayRef<Record> records, + Value *values, + size_t begin, + size_t end); + + // Sort records with insertion sort. + // + // Insertion sort should be used when there few records. + // + // On success, returns true. + // On failure, throws an exception. + void insertion_sort(ArrayRef<Record> records, Value *values); + + // Choose the pivot and move it to the front. + void move_pivot_first(ArrayRef<Record> records, Value *values); +}; + +template <typename T> +void QuickSortNode<T>::sort(ArrayRef<Record> records, + size_t begin, + size_t end) { + this->order_.expression->evaluate(records, &this->values_); + quick_sort(records, this->values_.buffer(), begin, end); +} + +template <typename T> +void QuickSortNode<T>::quick_sort(ArrayRef<Record> records, + Value *values, + size_t begin, + size_t end) { + // Use ternary quick sort if there are enough records. + // + // TODO: Currently, the threshold is 16. + // This value should be optimized and replaced with a named constant. + while (records.size() >= 16) { + move_pivot_first(records, values); + const Value pivot = values[0]; + size_t left = 1; + size_t right = records.size(); + size_t pivot_left = 1; + size_t pivot_right = records.size(); + for ( ; ; ) { + // Move entries based on comparison against the pivot. + // Prior entries are moved to left. + // Less prior entries are moved to right. + // Entries which equal to the pivot are moved to the edges. + while (left < right) { + if (comparer_(pivot, values[left])) { + break; + } else if (Equal<Value>()(pivot, values[left])) { + std::swap(values[left], values[pivot_left]); + std::swap(records[left], records[pivot_left]); + ++pivot_left; + } + ++left; + } + while (left < right) { + --right; + if (comparer_(values[right], pivot)) { + break; + } else if (Equal<Value>()(values[right], pivot)) { + --pivot_right; + std::swap(values[right], values[pivot_right]); + std::swap(records[right], records[pivot_right]); + } + } + if (left >= right) { + break; + } + std::swap(values[left], values[right]); + std::swap(records[left], records[right]); + ++left; + } + + // Move left pivot-equivalent entries to the left side of the boundary. + while (pivot_left > 0) { + --pivot_left; + --left; + std::swap(values[pivot_left], values[left]); + std::swap(records[pivot_left], records[left]); + } + // Move right pivot-equivalent entries to the right side of the boundary. + while (pivot_right < records.size()) { + std::swap(values[pivot_right], values[right]); + std::swap(records[pivot_right], records[right]); + ++pivot_right; + ++right; + } + + // Apply the next sort condition to the pivot-equivalent records. + if (this->next_) { + if (((right - left) >= 2) && (begin < right) && (end > left)) { + size_t next_begin = (begin < left) ? 0 : (begin - left); + size_t next_end = ((end > right) ? right : end) - left; + this->next_->sort(records.ref(left, right - left), + next_begin, next_end); + } + } + + // There are the left group and the right group. + // A recursive call is used for sorting the smaller group. + // The recursion depth is less than log_2(n) where n is the number of + // records. + // The next loop of the current call is used for sorting the larger group. + if (left < (records.size() - right)) { + if ((begin < left) && (left >= 2)) { + size_t next_end = (end < left) ? end : left; + quick_sort(records.ref(0, left), values, begin, next_end); + } + if (end <= right) { + return; + } + records = records.ref(right); + values += right; + begin = (begin < right) ? 0 : (begin - right); + end -= right; + } else { + if ((end > right) && ((records.size() - right) >= 2)) { + size_t next_begin = (begin < right) ? 0 : (begin - right); + size_t next_end = end - right; + quick_sort(records.ref(right), + values + right, next_begin, next_end); + } + if (begin >= left) { + return; + } + records = records.ref(0, left); + if (end > left) { + end = left; + } + } + } + + if (records.size() >= 2) { + insertion_sort(records, values); + } +} + +template <typename T> +void QuickSortNode<T>::insertion_sort(ArrayRef<Record> records, + Value *values) { + for (size_t i = 1; i < records.size(); ++i) { + for (size_t j = i; j > 0; --j) { + if (comparer_(values[j], values[j - 1])) { + std::swap(values[j], values[j - 1]); + std::swap(records[j], records[j - 1]); + } else { + break; + } + } + } + + // Apply the next sorting if there are records having the same value. + if (this->next_) { + size_t begin = 0; + for (size_t i = 1; i < records.size(); ++i) { + if (!Equal<Value>()(values[i], values[begin])) { + if ((i - begin) >= 2) { + this->next_->sort(records.ref(begin, i - begin), 0, i - begin); + } + begin = i; + } + } + if ((records.size() - begin) >= 2) { + this->next_->sort(records.ref(begin), 0, records.size() - begin); + } + } +} + +template <typename T> +void QuickSortNode<T>::move_pivot_first(ArrayRef<Record> records, + Value *values) { + // Choose the median from values[1], values[1 / size], and values[size - 2]. + // The reason why not using values[0] and values[size - 1] is to avoid the + // worst case which occurs when the records are sorted in reverse order. + size_t first = 1; + size_t middle = records.size() / 2; + size_t last = records.size() - 2; + if (comparer_(values[first], values[middle])) { + // first < middle. + if (comparer_(values[middle], values[last])) { + // first < middle < last. + std::swap(values[0], values[middle]); + std::swap(records[0], records[middle]); + } else if (comparer_(values[first], values[last])) { + // first < last < middle. + std::swap(values[0], values[last]); + std::swap(records[0], records[last]); + } else { + // last < first < middle. + std::swap(values[0], values[first]); + std::swap(records[0], records[first]); + } + } else if (comparer_(values[last], values[middle])) { + // last < middle < first. + std::swap(values[0], values[middle]); + std::swap(records[0], records[middle]); + } else if (comparer_(values[last], values[first])) { + // middle < last < first. + std::swap(values[0], values[last]); + std::swap(records[0], records[last]); + } else { + // middle < first < last. + std::swap(values[0], values[first]); + std::swap(records[0], records[first]); + } +} + +// ----- RegularComparer ----- + +template <typename T> +struct RegularComparer { + using Value = T; + + // Return whether "lhs" is prior to "rhs" or not. + bool operator()(const Value &lhs, const Value &rhs) const { + if (lhs.is_na()) { + return false; + } else if (rhs.is_na()) { + return true; + } + return (lhs < rhs).is_true(); + } +}; + +// ----- ReverseComparer ----- + +template <typename T> +struct ReverseComparer { + using Value = T; + + // Return whether "lhs" is prior to "rhs" or not. + bool operator()(const Value &lhs, const Value &rhs) const { + if (lhs.is_na()) { + return false; + } else if (rhs.is_na()) { + return true; + } + return (lhs > rhs).is_true(); + } +}; + +// -- Node -- Node *Node::create(SorterOrder &&order) try { switch (order.expression->data_type()) { case BOOL_DATA: { return new SeparatorNode(std::move(order)); } -// case INT_DATA: { -// if (order.type == REGULAR_ORDER) { -// node.reset(new (nothrow) QuickSortNode<RegularComparer<Int>>( -// std::move(order))); -// } else { -// node.reset(new (nothrow) QuickSortNode<ReverseComparer<Int>>( -// std::move(order))); -// } -// break; -// } -// case FLOAT_DATA: { -// if (order.type == REGULAR_ORDER) { -// node.reset(new (nothrow) QuickSortNode<RegularComparer<Float>>( -// std::move(order))); -// } else { -// node.reset(new (nothrow) QuickSortNode<ReverseComparer<Float>>( -// std::move(order))); -// } -// break; -// } -// case GEO_POINT_DATA: { -// if (order.type == REGULAR_ORDER) { -// node.reset(new (nothrow) QuickSortNode<RegularComparer<GeoPoint>>( -// std::move(order))); -// } else { -// node.reset(new (nothrow) QuickSortNode<ReverseComparer<GeoPoint>>( -// std::move(order))); -// } -// break; -// } -// case TEXT_DATA: { -// if (order.type == REGULAR_ORDER) { -// node.reset(new (nothrow) QuickSortNode<RegularComparer<Text>>( -// std::move(order))); -// } else { -// node.reset(new (nothrow) QuickSortNode<ReverseComparer<Text>>( -// std::move(order))); -// } -// break; -// } + case INT_DATA: { + if (order.type == SORTER_REGULAR_ORDER) { + return new QuickSortNode<RegularComparer<Int>>(std::move(order)); + } else { + return new QuickSortNode<ReverseComparer<Int>>(std::move(order)); + } + } + case FLOAT_DATA: { + if (order.type == SORTER_REGULAR_ORDER) { + return new QuickSortNode<RegularComparer<Float>>(std::move(order)); + } else { + return new QuickSortNode<ReverseComparer<Float>>(std::move(order)); + } + } + case TEXT_DATA: { + if (order.type == SORTER_REGULAR_ORDER) { + return new QuickSortNode<RegularComparer<Text>>(std::move(order)); + } else { + return new QuickSortNode<ReverseComparer<Text>>(std::move(order)); + } + } default: { throw "Invalid data type"; // TODO } @@ -499,6 +422,10 @@ Node *Node::create(SorterOrder &&order) try { throw "Memory allocation failed"; // TODO } +// TODO: Sorter for RowID and Score should be specialized. + +// TODO: Sorter for Text should be specialized. + } // namespace sorter using namespace sorter; @@ -543,7 +470,6 @@ void Sorter::sort(Array<Record> *records) { finish(); } - Sorter::Sorter(Array<SorterOrder> &&orders, const SorterOptions &options) : table_(nullptr), nodes_(), -------------- next part -------------- HTML����������������������������...Download