A simple parallel sort as a demonstration of how easy (and fast) this is in C++
Revision | e36b2c2fcb589cbaaeee6febc7da353123e41753 (tree) |
---|---|
Time | 2018-12-13 03:20:17 |
Author | Eric Hopper <hopper@omni...> |
Commiter | Eric Hopper |
Initial versions of a C sort vs. a C++ sort.
@@ -0,0 +1,36 @@ | ||
1 | +#include <stdlib.h> | |
2 | +#include <stdint.h> | |
3 | +#include <stdio.h> | |
4 | + | |
5 | +const uint32_t total_size = 1ULL << 31; | |
6 | + | |
7 | +int compare_ints(void const *a, void const *b) | |
8 | +{ | |
9 | + const uint32_t ta = *((uint32_t *)a); | |
10 | + const uint32_t tb = *((uint32_t *)b); | |
11 | + return (ta > tb) ? 1 : ((ta == tb) ? 0 : -1); | |
12 | +} | |
13 | + | |
14 | +uint32_t get_urandom() | |
15 | +{ | |
16 | + FILE *urandom = fopen("/dev/urandom", "rb"); | |
17 | + char buf[4]; | |
18 | + fread(buf, 1U, 4, urandom); | |
19 | + fclose(urandom); | |
20 | + return ((uint32_t)(buf[0]) << 24) | | |
21 | + ((uint32_t)(buf[1]) << 16) | | |
22 | + ((uint32_t)(buf[2]) << 8) | | |
23 | + (uint32_t)(buf[3]); | |
24 | +} | |
25 | + | |
26 | +int main() | |
27 | +{ | |
28 | + uint32_t * const huge_array = calloc(total_size, sizeof(uint32_t)); | |
29 | + uint32_t seed = get_urandom(); | |
30 | + seed = (seed * 48271) % 2147483647ULL; | |
31 | + for (uint32_t i = 0; i < total_size; ++i) { | |
32 | + huge_array[i] = seed; | |
33 | + seed = (seed * 48271) % 2147483647ULL; | |
34 | + } | |
35 | + qsort(huge_array, total_size, sizeof(huge_array[0]), compare_ints); | |
36 | +} |
@@ -0,0 +1,133 @@ | ||
1 | +#include <thread> | |
2 | +#include <algorithm> | |
3 | +#include <vector> | |
4 | +#include <deque> | |
5 | +#include <random> | |
6 | +#include <utility> | |
7 | +#include <cstdint> | |
8 | +//#include <fstream> | |
9 | + | |
10 | +using vecsize_t = ::std::vector<::std::uint32_t>::size_type; | |
11 | +using veciter_t = ::std::vector<::std::uint32_t>::iterator; | |
12 | + | |
13 | +void gen_and_sort_section(const veciter_t &begin, const veciter_t &end, int seed) | |
14 | +{ | |
15 | + ::std::minstd_rand randgen(seed); | |
16 | + ::std::generate(begin, end, randgen); | |
17 | + ::std::sort(begin, end); | |
18 | +} | |
19 | + | |
20 | +void merge_sections(const veciter_t &begin, | |
21 | + const veciter_t &middle, | |
22 | + const veciter_t &end, | |
23 | + ::std::thread thr1, | |
24 | + ::std::thread thr2) | |
25 | +{ | |
26 | + thr1.join(); | |
27 | + thr2.join(); | |
28 | + ::std::inplace_merge(begin, middle, end); | |
29 | +} | |
30 | + | |
31 | +struct sort_thread { | |
32 | + ::std::thread thr; | |
33 | + const int part_begin; | |
34 | + const int part_end; | |
35 | + | |
36 | + sort_thread(decltype(gen_and_sort_section) &func, | |
37 | + const veciter_t &begin, | |
38 | + const veciter_t &end, | |
39 | + int seed, | |
40 | + int part_begin, | |
41 | + int part_end) | |
42 | + : thr(func, begin, end, seed), part_begin(part_begin), part_end(part_end) | |
43 | + { | |
44 | + } | |
45 | + sort_thread(decltype(merge_sections) &func, | |
46 | + const veciter_t &begin, | |
47 | + const veciter_t &middle, | |
48 | + const veciter_t &end, | |
49 | + ::std::thread thr1, | |
50 | + ::std::thread thr2, | |
51 | + int part_begin, | |
52 | + int part_end) | |
53 | + : thr(func, begin, middle, end, ::std::move(thr1), ::std::move(thr2)), | |
54 | + part_begin(part_begin), part_end(part_end) | |
55 | + { | |
56 | + } | |
57 | +}; | |
58 | + | |
59 | +int main() | |
60 | +{ | |
61 | + using ::std::vector; | |
62 | + using ::std::thread; | |
63 | + | |
64 | + constexpr auto size = 1ULL << 31; | |
65 | + ::std::vector<::std::uint32_t> huge_array(size); | |
66 | + const int cpucount = ::std::max(thread::hardware_concurrency() / 2, 1U); | |
67 | + const int sections = cpucount; | |
68 | + vector<decltype(huge_array.begin())> partitions(sections + 1); | |
69 | + | |
70 | + partitions[0] = huge_array.begin(); | |
71 | + for (unsigned int i = 1; i < partitions.size(); ++i) { | |
72 | + auto const prev_part = partitions[i - 1]; | |
73 | + auto const remaining = partitions.size() - i; | |
74 | + partitions[i] = prev_part + (huge_array.end() - prev_part) / remaining; | |
75 | + } | |
76 | + if (partitions[partitions.size() - 1] != huge_array.end()) { | |
77 | + return 1; | |
78 | + } | |
79 | + | |
80 | + ::std::random_device seedmaker; | |
81 | + ::std::deque<sort_thread> rthreads; | |
82 | + for (int i = 0; i < sections; ++i) | |
83 | + { | |
84 | + rthreads.emplace_back( | |
85 | + gen_and_sort_section, partitions[i], partitions[i + 1], seedmaker(), | |
86 | + i, i + 1 | |
87 | + ); | |
88 | + } | |
89 | + while (rthreads.size() >= 2) { | |
90 | + if (rthreads.front().part_end >= sections) { | |
91 | + rthreads.emplace_back(::std::move(rthreads.front())); | |
92 | + rthreads.pop_front(); | |
93 | + } else { | |
94 | + auto thr1{::std::move(rthreads.front())}; | |
95 | + rthreads.pop_front(); | |
96 | + auto thr2{::std::move(rthreads.front())}; | |
97 | + rthreads.pop_front(); | |
98 | + if (thr1.part_end != thr2.part_begin) { | |
99 | + return 2; | |
100 | + } else { | |
101 | + rthreads.emplace_back(merge_sections, | |
102 | + partitions[thr1.part_begin], | |
103 | + partitions[thr1.part_end], | |
104 | + partitions[thr2.part_end], | |
105 | + ::std::move(thr1.thr), | |
106 | + ::std::move(thr2.thr), | |
107 | + thr1.part_begin, | |
108 | + thr2.part_end); | |
109 | + } | |
110 | + } | |
111 | + } | |
112 | + if (rthreads.front().part_begin != 0) { | |
113 | + return 3; | |
114 | + } else if (rthreads.front().part_end != sections) { | |
115 | + return 4; | |
116 | + } else { | |
117 | + rthreads.front().thr.join(); | |
118 | + } | |
119 | + /* | |
120 | + { | |
121 | + ::std::ofstream of("/proc/self/fd/1"); | |
122 | + | |
123 | + of << huge_array[0] << '\n'; | |
124 | + for (vecsize_t i = 1; i < huge_array.size(); ++i) { | |
125 | + if (huge_array[i] < huge_array[i - 1]) { | |
126 | + return 5; | |
127 | + } | |
128 | + of << huge_array[i] << '\n'; | |
129 | + } | |
130 | + } | |
131 | + */ | |
132 | + return 0; | |
133 | +} |