Susumu Yata
null+****@clear*****
Fri Dec 11 19:04:13 JST 2015
Susumu Yata 2015-12-11 19:04:13 +0900 (Fri, 11 Dec 2015) New Revision: 0e3469f62d4ae4c9ed683c7cfde42945c7c28056 https://github.com/groonga/groonga/commit/0e3469f62d4ae4c9ed683c7cfde42945c7c28056 Message: grn_ts: use multi-key quicksort for sorting by Text values GitHub: #435 Modified files: lib/ts/ts_sorter.c Modified: lib/ts/ts_sorter.c (+239 -2) =================================================================== --- lib/ts/ts_sorter.c 2015-12-10 17:21:45 +0900 (3963693) +++ lib/ts/ts_sorter.c 2015-12-11 19:04:13 +0900 (fc15e08) @@ -1453,6 +1453,243 @@ grn_ts_qsort_by_text_desc(grn_ctx *ctx, grn_ts_sorter_node *node, return GRN_SUCCESS; } +/* grn_ts_text_get_label() returns a label. */ +inline static int +grn_ts_text_get_label(grn_ts_text val, size_t depth) +{ + return (depth < val.size) ? (uint8_t)val.ptr[depth] : -1; +} + +/* grn_ts_text_cmp2() compares Text values. */ +inline static int +grn_ts_text_cmp2(grn_ts_text lhs, grn_ts_text rhs, size_t depth) +{ + size_t min_size = (lhs.size < rhs.size) ? lhs.size : rhs.size; + int result = memcmp(lhs.ptr + depth, rhs.ptr + depth, min_size - depth); + if (result != 0) { + return result; + } + if (lhs.size == rhs.size) { + return 0; + } + return (lhs.size < rhs.size) ? -1 : 1; +} + +/* grn_ts_move_pivot_by_text_asc2() moves the pivot to the front. */ +static void +grn_ts_move_pivot_by_text_asc2(grn_ts_sorter_node *node, grn_ts_text *vals, + grn_ts_record *recs, size_t n_recs, size_t depth) +{ + /* Choose the median from recs[1], recs[n_recs / 2], and recs[n_recs - 2]. */ + size_t first = 1; + size_t middle = n_recs / 2; + size_t last = n_recs - 2; + int first_label = grn_ts_text_get_label(vals[first], depth); + int middle_label = grn_ts_text_get_label(vals[middle], depth); + int last_label = grn_ts_text_get_label(vals[last], depth); + if (first_label < middle_label) { + /* first < middle. */ + if (middle_label < last_label) { + /* first < middle < last */ + grn_ts_rec_swap(&recs[0], &recs[middle]); + grn_ts_text_swap(&vals[0], &vals[middle]); + } else if (first_label < last_label) { + /* first < last < middle. */ + grn_ts_rec_swap(&recs[0], &recs[last]); + grn_ts_text_swap(&vals[0], &vals[last]); + } else { /* last < first < middle. */ + grn_ts_rec_swap(&recs[0], &recs[first]); + grn_ts_text_swap(&vals[0], &vals[first]); + } + } else if (last_label < middle_label) { + /* last < middle < first. */ + grn_ts_rec_swap(&recs[0], &recs[middle]); + grn_ts_text_swap(&vals[0], &vals[middle]); + } else if (last_label < first_label) { + /* middle < last < first. */ + grn_ts_rec_swap(&recs[0], &recs[last]); + grn_ts_text_swap(&vals[0], &vals[last]); + } else { /* middle < first < last. */ + grn_ts_rec_swap(&recs[0], &recs[first]); + grn_ts_text_swap(&vals[0], &vals[first]); + } +} + +/* grn_ts_isort_by_text_asc2() sorts records. */ +static grn_rc +grn_ts_isort_by_text_asc2(grn_ctx *ctx, grn_ts_sorter_node *node, + size_t offset, size_t limit, grn_ts_text *vals, + grn_ts_record *recs, size_t n_recs, size_t depth) +{ + for (size_t i = 1; i < n_recs; ++i) { + for (size_t j = i; j > 0; --j) { + if (grn_ts_text_cmp2(vals[j], vals[j - 1], depth) < 0) { + grn_ts_rec_swap(&recs[j], &recs[j - 1]); + grn_ts_text_swap(&vals[j], &vals[j - 1]); + } else { + break; + } + } + } + /* Apply the next sorting if there are score duplicates. */ + if (node->next) { + grn_rc rc; + size_t begin = 0; + for (size_t i = 1; i < n_recs; ++i) { + if (grn_ts_text_cmp2(vals[i], vals[begin], depth) != 0) { + if ((i - begin) >= 2) { + rc = grn_ts_sorter_node_sort(ctx, node->next, 0, i - begin, + recs + begin, i - begin); + if (rc != GRN_SUCCESS) { + return rc; + } + } + begin = i; + } + } + if ((n_recs - begin) >= 2) { + rc = grn_ts_sorter_node_sort(ctx, node->next, 0, n_recs - begin, + recs + begin, n_recs - begin); + if (rc != GRN_SUCCESS) { + return rc; + } + } + } + return GRN_SUCCESS; +} + +/* grn_ts_qsort_by_text_asc() sorts records. */ +static grn_rc +grn_ts_qsort_by_text_asc2(grn_ctx *ctx, grn_ts_sorter_node *node, + size_t offset, size_t limit, grn_ts_text *vals, + grn_ts_record *recs, size_t n_recs, size_t depth) +{ + grn_rc rc; + /* + * FIXME: Currently, the threshold is 16. + * This value should be optimized and replaced with a named constant. + */ + while (n_recs >= 16) { + grn_ts_move_pivot_by_text_asc(node, vals, recs, n_recs); + int pivot = grn_ts_text_get_label(vals[0], depth); + size_t left = 1, right = n_recs; + size_t pivot_left = 1, pivot_right = n_recs; + for ( ; ; ) { + /* + * Prior entries are moved to left. Less prior entries are moved to + * right. Entries which equal to the pivot are moved to the edges. + */ + while (left < right) { + int label = grn_ts_text_get_label(vals[left], depth); + if (label > pivot) { + break; + } else if (label == pivot) { + grn_ts_rec_swap(&recs[left], &recs[pivot_left]); + grn_ts_text_swap(&vals[left], &vals[pivot_left]); + ++pivot_left; + } + ++left; + } + while (left < right) { + int label; + --right; + label = grn_ts_text_get_label(vals[right], depth); + if (label < pivot) { + break; + } else if (label == pivot) { + --pivot_right; + grn_ts_rec_swap(&recs[right], &recs[pivot_right]); + grn_ts_text_swap(&vals[right], &vals[pivot_right]); + } + } + if (left >= right) { + break; + } + grn_ts_rec_swap(&recs[left], &recs[right]); + grn_ts_text_swap(&vals[left], &vals[right]); + ++left; + } + /* Move left pivot-equivalent entries to the left of the boundary. */ + while (pivot_left > 0) { + --pivot_left; + --left; + grn_ts_rec_swap(&recs[pivot_left], &recs[left]); + grn_ts_text_swap(&vals[pivot_left], &vals[left]); + } + /* Move right pivot-equivalent entries to the right of the boundary. */ + while (pivot_right < n_recs) { + grn_ts_rec_swap(&recs[pivot_right], &recs[right]); + grn_ts_text_swap(&vals[pivot_right], &vals[right]); + ++pivot_right; + ++right; + } + /* Apply the next sort condition to the pivot-equivalent recs. */ + if (((right - left) >= 2) && (offset < right) && (limit > left)) { + size_t next_offset = (offset < left) ? 0 : (offset - left); + size_t next_limit = ((limit > right) ? right : limit) - left; + if (pivot != -1) { + rc = grn_ts_qsort_by_text_asc2(ctx, node, next_offset, next_limit, + vals, recs + left, right - left, + depth + 1); + } else if (node->next) { + rc = grn_ts_sorter_node_sort(ctx, node->next, next_offset, next_limit, + recs + left, right - left); + if (rc != GRN_SUCCESS) { + return rc; + } + } + } + /* + * Use a recursive call to sort the smaller group so that the recursion + * depth is less than log_2(n_recs). + */ + if (left < (n_recs - right)) { + if ((offset < left) && (left >= 2)) { + size_t next_limit = (limit < left) ? limit : left; + rc = grn_ts_qsort_by_text_asc2(ctx, node, offset, next_limit, + vals, recs, left, depth); + if (rc != GRN_SUCCESS) { + return rc; + } + } + if (limit <= right) { + return GRN_SUCCESS; + } + vals += right; + recs += right; + n_recs -= right; + offset = (offset < right) ? 0 : (offset - right); + limit -= right; + } else { + if ((limit > right) && ((n_recs - right) >= 2)) { + size_t next_offset = (offset < right) ? 0 : (offset - right); + size_t next_limit = limit - right; + rc = grn_ts_qsort_by_text_asc2(ctx, node, next_offset, next_limit, + vals + right, recs + right, + n_recs - right, depth); + if (rc != GRN_SUCCESS) { + return rc; + } + } + if (offset >= left) { + return GRN_SUCCESS; + } + n_recs = left; + if (limit > left) { + limit = left; + } + } + } + if (n_recs >= 2) { + rc = grn_ts_isort_by_text_asc2(ctx, node, offset, limit, + vals, recs, n_recs, depth); + if (rc != GRN_SUCCESS) { + return rc; + } + } + return GRN_SUCCESS; +} + /* grn_ts_sorter_node_sort_by_var() sorts records. */ static grn_rc grn_ts_sorter_node_sort_by_var(grn_ctx *ctx, grn_ts_sorter_node *node, @@ -1528,8 +1765,8 @@ grn_ts_sorter_node_sort_by_var(grn_ctx *ctx, grn_ts_sorter_node *node, return grn_ts_qsort_by_text_desc(ctx, node, offset, limit, vals, recs, n_recs); } else { - return grn_ts_qsort_by_text_asc(ctx, node, offset, limit, - vals, recs, n_recs); + return grn_ts_qsort_by_text_asc2(ctx, node, offset, limit, + vals, recs, n_recs, 0); } } case GRN_TS_INT_VECTOR: -------------- next part -------------- HTML����������������������������...Download