psychlops cpp
Revision | 00d1c2a000216184893b59166c09927d2fd4deba (tree) |
---|---|
Time | 2015-04-04 09:31:44 |
Author | HOSOKAWA Kenchi <hskwk@inte...> |
Commiter | HOSOKAWA Kenchi |
VC12
@@ -0,0 +1,115 @@ | ||
1 | +#pragma once | |
2 | +/** | |
3 | + * @file dSFMT-common.h | |
4 | + * | |
5 | + * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom | |
6 | + * number generator with jump function. This file includes common functions | |
7 | + * used in random number generation and jump. | |
8 | + * | |
9 | + * @author Mutsuo Saito (Hiroshima University) | |
10 | + * @author Makoto Matsumoto (The University of Tokyo) | |
11 | + * | |
12 | + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima | |
13 | + * University. | |
14 | + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima | |
15 | + * University and The University of Tokyo. | |
16 | + * All rights reserved. | |
17 | + * | |
18 | + * The 3-clause BSD License is applied to this software, see | |
19 | + * LICENSE.txt | |
20 | + */ | |
21 | +#ifndef DSFMT_COMMON_H | |
22 | +#define DSFMT_COMMON_H | |
23 | + | |
24 | +#include "dSFMT.h" | |
25 | + | |
26 | +#if defined(HAVE_SSE2) | |
27 | +# include <emmintrin.h> | |
28 | +union X128I_T { | |
29 | + uint64_t u[2]; | |
30 | + __m128i i128; | |
31 | +}; | |
32 | +union X128D_T { | |
33 | + double d[2]; | |
34 | + __m128d d128; | |
35 | +}; | |
36 | +/** mask data for sse2 */ | |
37 | +static const union X128I_T sse2_param_mask = {{DSFMT_MSK1, DSFMT_MSK2}}; | |
38 | +#endif | |
39 | + | |
40 | +#if defined(HAVE_ALTIVEC) | |
41 | +inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, | |
42 | + w128_t *lung) { | |
43 | + const vector unsigned char sl1 = ALTI_SL1; | |
44 | + const vector unsigned char sl1_perm = ALTI_SL1_PERM; | |
45 | + const vector unsigned int sl1_msk = ALTI_SL1_MSK; | |
46 | + const vector unsigned char sr1 = ALTI_SR; | |
47 | + const vector unsigned char sr1_perm = ALTI_SR_PERM; | |
48 | + const vector unsigned int sr1_msk = ALTI_SR_MSK; | |
49 | + const vector unsigned char perm = ALTI_PERM; | |
50 | + const vector unsigned int msk1 = ALTI_MSK; | |
51 | + vector unsigned int w, x, y, z; | |
52 | + | |
53 | + z = a->s; | |
54 | + w = lung->s; | |
55 | + x = vec_perm(w, (vector unsigned int)perm, perm); | |
56 | + y = vec_perm(z, (vector unsigned int)sl1_perm, sl1_perm); | |
57 | + y = vec_sll(y, sl1); | |
58 | + y = vec_and(y, sl1_msk); | |
59 | + w = vec_xor(x, b->s); | |
60 | + w = vec_xor(w, y); | |
61 | + x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm); | |
62 | + x = vec_srl(x, sr1); | |
63 | + x = vec_and(x, sr1_msk); | |
64 | + y = vec_and(w, msk1); | |
65 | + z = vec_xor(z, y); | |
66 | + r->s = vec_xor(z, x); | |
67 | + lung->s = w; | |
68 | +} | |
69 | +#elif defined(HAVE_SSE2) | |
70 | +/** | |
71 | + * This function represents the recursion formula. | |
72 | + * @param r output 128-bit | |
73 | + * @param a a 128-bit part of the internal state array | |
74 | + * @param b a 128-bit part of the internal state array | |
75 | + * @param d a 128-bit part of the internal state array (I/O) | |
76 | + */ | |
77 | +inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) { | |
78 | + __m128i v, w, x, y, z; | |
79 | + | |
80 | + x = a->si; | |
81 | + z = _mm_slli_epi64(x, DSFMT_SL1); | |
82 | + y = _mm_shuffle_epi32(u->si, SSE2_SHUFF); | |
83 | + z = _mm_xor_si128(z, b->si); | |
84 | + y = _mm_xor_si128(y, z); | |
85 | + | |
86 | + v = _mm_srli_epi64(y, DSFMT_SR); | |
87 | + w = _mm_and_si128(y, sse2_param_mask.i128); | |
88 | + v = _mm_xor_si128(v, x); | |
89 | + v = _mm_xor_si128(v, w); | |
90 | + r->si = v; | |
91 | + u->si = y; | |
92 | +} | |
93 | +#else | |
94 | +/** | |
95 | + * This function represents the recursion formula. | |
96 | + * @param r output 128-bit | |
97 | + * @param a a 128-bit part of the internal state array | |
98 | + * @param b a 128-bit part of the internal state array | |
99 | + * @param lung a 128-bit part of the internal state array (I/O) | |
100 | + */ | |
101 | +inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, | |
102 | + w128_t *lung) { | |
103 | + uint64_t t0, t1, L0, L1; | |
104 | + | |
105 | + t0 = a->u[0]; | |
106 | + t1 = a->u[1]; | |
107 | + L0 = lung->u[0]; | |
108 | + L1 = lung->u[1]; | |
109 | + lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0]; | |
110 | + lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1]; | |
111 | + r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0; | |
112 | + r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1; | |
113 | +} | |
114 | +#endif | |
115 | +#endif |
@@ -39,7 +39,7 @@ | ||
39 | 39 | #define SSE2_SHUFF 0x1b |
40 | 40 | #elif defined(HAVE_ALTIVEC) |
41 | 41 | #if defined(__APPLE__) /* For OSX */ |
42 | - #define ALTI_SR (vector unsigned char)(4) | |
42 | + #define ALTI_SR (vector unsigned char)(4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4) | |
43 | 43 | #define ALTI_SR_PERM \ |
44 | 44 | (vector unsigned char)(15,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14) |
45 | 45 | #define ALTI_SR_MSK \ |
@@ -47,7 +47,7 @@ | ||
47 | 47 | #define ALTI_PERM \ |
48 | 48 | (vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3) |
49 | 49 | #else |
50 | - #define ALTI_SR {4} | |
50 | + #define ALTI_SR {4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4} | |
51 | 51 | #define ALTI_SR_PERM {15,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14} |
52 | 52 | #define ALTI_SR_MSK {0x000fffffU,0xffffffffU,0x000fffffU,0xffffffffU} |
53 | 53 | #define ALTI_PERM {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3} |
@@ -66,6 +66,14 @@ | ||
66 | 66 | #include "dSFMT-params11213.h" |
67 | 67 | #elif DSFMT_MEXP == 19937 |
68 | 68 | #include "dSFMT-params19937.h" |
69 | +#elif DSFMT_MEXP == 44497 | |
70 | + #include "dSFMT-params44497.h" | |
71 | +#elif DSFMT_MEXP == 86243 | |
72 | + #include "dSFMT-params86243.h" | |
73 | +#elif DSFMT_MEXP == 132049 | |
74 | + #include "dSFMT-params132049.h" | |
75 | +#elif DSFMT_MEXP == 216091 | |
76 | + #include "dSFMT-params216091.h" | |
69 | 77 | #else |
70 | 78 | #ifdef __GNUC__ |
71 | 79 | #error "DSFMT_MEXP is not valid." |
@@ -20,7 +20,7 @@ | ||
20 | 20 | |
21 | 21 | /* PARAMETERS FOR ALTIVEC */ |
22 | 22 | #if defined(__APPLE__) /* For OSX */ |
23 | - #define ALTI_SL1 (vector unsigned int)(3, 3, 3, 3) | |
23 | + #define ALTI_SL1 (vector unsigned char)(3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3) | |
24 | 24 | #define ALTI_SL1_PERM \ |
25 | 25 | (vector unsigned char)(2,3,4,5,6,7,30,30,10,11,12,13,14,15,0,1) |
26 | 26 | #define ALTI_SL1_MSK \ |
@@ -28,7 +28,7 @@ | ||
28 | 28 | #define ALTI_MSK (vector unsigned int)(DSFMT_MSK32_1, \ |
29 | 29 | DSFMT_MSK32_2, DSFMT_MSK32_3, DSFMT_MSK32_4) |
30 | 30 | #else /* For OTHER OSs(Linux?) */ |
31 | - #define ALTI_SL1 {3, 3, 3, 3} | |
31 | + #define ALTI_SL1 {3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3} | |
32 | 32 | #define ALTI_SL1_PERM \ |
33 | 33 | {2,3,4,5,6,7,30,30,10,11,12,13,14,15,0,1} |
34 | 34 | #define ALTI_SL1_MSK \ |
@@ -15,6 +15,11 @@ | ||
15 | 15 | #include <string.h> |
16 | 16 | #include <stdlib.h> |
17 | 17 | #include "dSFMT-params.h" |
18 | +#include "dSFMT-common.h" | |
19 | + | |
20 | +#if defined(__cplusplus) | |
21 | +extern "C" { | |
22 | +#endif | |
18 | 23 | |
19 | 24 | /** dsfmt internal state vector */ |
20 | 25 | dsfmt_t dsfmt_global_data; |
@@ -26,30 +31,25 @@ static const int dsfmt_mexp = DSFMT_MEXP; | ||
26 | 31 | ----------------*/ |
27 | 32 | inline static uint32_t ini_func1(uint32_t x); |
28 | 33 | inline static uint32_t ini_func2(uint32_t x); |
29 | -inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t array[], | |
34 | +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, | |
30 | 35 | int size); |
31 | -inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t array[], | |
36 | +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, | |
32 | 37 | int size); |
33 | -inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t array[], | |
38 | +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, | |
34 | 39 | int size); |
35 | -inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t array[], | |
40 | +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, | |
36 | 41 | int size); |
37 | 42 | inline static int idxof(int i); |
38 | 43 | static void initial_mask(dsfmt_t *dsfmt); |
39 | 44 | static void period_certification(dsfmt_t *dsfmt); |
40 | 45 | |
41 | 46 | #if defined(HAVE_SSE2) |
42 | -# include <emmintrin.h> | |
43 | -/** mask data for sse2 */ | |
44 | -static __m128i sse2_param_mask; | |
45 | 47 | /** 1 in 64bit for sse2 */ |
46 | -static __m128i sse2_int_one; | |
48 | +static const union X128I_T sse2_int_one = {{1, 1}}; | |
47 | 49 | /** 2.0 double for sse2 */ |
48 | -static __m128d sse2_double_two; | |
50 | +static const union X128D_T sse2_double_two = {{2.0, 2.0}}; | |
49 | 51 | /** -1.0 double for sse2 */ |
50 | -static __m128d sse2_double_m_one; | |
51 | - | |
52 | -static void setup_const(void); | |
52 | +static const union X128D_T sse2_double_m_one = {{-1.0, -1.0}}; | |
53 | 53 | #endif |
54 | 54 | |
55 | 55 | /** |
@@ -66,105 +66,6 @@ inline static int idxof(int i) { | ||
66 | 66 | } |
67 | 67 | #endif |
68 | 68 | |
69 | -/** | |
70 | - * This function represents the recursion formula. | |
71 | - * @param r output | |
72 | - * @param a a 128-bit part of the internal state array | |
73 | - * @param b a 128-bit part of the internal state array | |
74 | - * @param lung a 128-bit part of the internal state array | |
75 | - */ | |
76 | -#if defined(HAVE_ALTIVEC) | |
77 | -inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, | |
78 | - w128_t *lung) { | |
79 | - const vector unsigned char sl1 = ALTI_SL1; | |
80 | - const vector unsigned char sl1_perm = ALTI_SL1_PERM; | |
81 | - const vector unsigned int sl1_msk = ALTI_SL1_MSK; | |
82 | - const vector unsigned char sr1 = ALTI_SR; | |
83 | - const vector unsigned char sr1_perm = ALTI_SR_PERM; | |
84 | - const vector unsigned int sr1_msk = ALTI_SR_MSK; | |
85 | - const vector unsigned char perm = ALTI_PERM; | |
86 | - const vector unsigned int msk1 = ALTI_MSK; | |
87 | - vector unsigned int w, x, y, z; | |
88 | - | |
89 | - z = a->s; | |
90 | - w = lung->s; | |
91 | - x = vec_perm(w, (vector unsigned int)perm, perm); | |
92 | - y = vec_perm(z, sl1_perm, sl1_perm); | |
93 | - y = vec_sll(y, sl1); | |
94 | - y = vec_and(y, sl1_msk); | |
95 | - w = vec_xor(x, b->s); | |
96 | - w = vec_xor(w, y); | |
97 | - x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm); | |
98 | - x = vec_srl(x, sr1); | |
99 | - x = vec_and(x, sr1_msk); | |
100 | - y = vec_and(w, msk1); | |
101 | - z = vec_xor(z, y); | |
102 | - r->s = vec_xor(z, x); | |
103 | - lung->s = w; | |
104 | -} | |
105 | -#elif defined(HAVE_SSE2) | |
106 | -/** | |
107 | - * This function setup some constant variables for SSE2. | |
108 | - */ | |
109 | -static void setup_const(void) { | |
110 | - static int first = 1; | |
111 | - if (!first) { | |
112 | - return; | |
113 | - } | |
114 | - sse2_param_mask = _mm_set_epi32(DSFMT_MSK32_3, DSFMT_MSK32_4, | |
115 | - DSFMT_MSK32_1, DSFMT_MSK32_2); | |
116 | - sse2_int_one = _mm_set_epi32(0, 1, 0, 1); | |
117 | - sse2_double_two = _mm_set_pd(2.0, 2.0); | |
118 | - sse2_double_m_one = _mm_set_pd(-1.0, -1.0); | |
119 | - first = 0; | |
120 | -} | |
121 | - | |
122 | -/** | |
123 | - * This function represents the recursion formula. | |
124 | - * @param r output 128-bit | |
125 | - * @param a a 128-bit part of the internal state array | |
126 | - * @param b a 128-bit part of the internal state array | |
127 | - * @param d a 128-bit part of the internal state array (I/O) | |
128 | - */ | |
129 | -inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) { | |
130 | - __m128i v, w, x, y, z; | |
131 | - | |
132 | - x = a->si; | |
133 | - z = _mm_slli_epi64(x, DSFMT_SL1); | |
134 | - y = _mm_shuffle_epi32(u->si, SSE2_SHUFF); | |
135 | - z = _mm_xor_si128(z, b->si); | |
136 | - y = _mm_xor_si128(y, z); | |
137 | - | |
138 | - v = _mm_srli_epi64(y, DSFMT_SR); | |
139 | - w = _mm_and_si128(y, sse2_param_mask); | |
140 | - v = _mm_xor_si128(v, x); | |
141 | - v = _mm_xor_si128(v, w); | |
142 | - r->si = v; | |
143 | - u->si = y; | |
144 | -} | |
145 | -#else /* standard C */ | |
146 | -/** | |
147 | - * This function represents the recursion formula. | |
148 | - * @param r output 128-bit | |
149 | - * @param a a 128-bit part of the internal state array | |
150 | - * @param b a 128-bit part of the internal state array | |
151 | - * @param lung a 128-bit part of the internal state array (I/O) | |
152 | - */ | |
153 | -inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, | |
154 | - w128_t *lung) { | |
155 | - uint64_t t0, t1, L0, L1; | |
156 | - | |
157 | - t0 = a->u[0]; | |
158 | - t1 = a->u[1]; | |
159 | - L0 = lung->u[0]; | |
160 | - L1 = lung->u[1]; | |
161 | - lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0]; | |
162 | - lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1]; | |
163 | - r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0; | |
164 | - r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1; | |
165 | -} | |
166 | -#endif | |
167 | - | |
168 | 69 | #if defined(HAVE_SSE2) |
169 | 70 | /** |
170 | 71 | * This function converts the double precision floating point numbers which |
@@ -173,7 +74,7 @@ inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, | ||
173 | 74 | * @param w 128bit stracture of double precision floating point numbers (I/O) |
174 | 75 | */ |
175 | 76 | inline static void convert_c0o1(w128_t *w) { |
176 | - w->sd = _mm_add_pd(w->sd, sse2_double_m_one); | |
77 | + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); | |
177 | 78 | } |
178 | 79 | |
179 | 80 | /** |
@@ -183,7 +84,7 @@ inline static void convert_c0o1(w128_t *w) { | ||
183 | 84 | * @param w 128bit stracture of double precision floating point numbers (I/O) |
184 | 85 | */ |
185 | 86 | inline static void convert_o0c1(w128_t *w) { |
186 | - w->sd = _mm_sub_pd(sse2_double_two, w->sd); | |
87 | + w->sd = _mm_sub_pd(sse2_double_two.d128, w->sd); | |
187 | 88 | } |
188 | 89 | |
189 | 90 | /** |
@@ -193,8 +94,8 @@ inline static void convert_o0c1(w128_t *w) { | ||
193 | 94 | * @param w 128bit stracture of double precision floating point numbers (I/O) |
194 | 95 | */ |
195 | 96 | inline static void convert_o0o1(w128_t *w) { |
196 | - w->si = _mm_or_si128(w->si, sse2_int_one); | |
197 | - w->sd = _mm_add_pd(w->sd, sse2_double_m_one); | |
97 | + w->si = _mm_or_si128(w->si, sse2_int_one.i128); | |
98 | + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); | |
198 | 99 | } |
199 | 100 | #else /* standard C and altivec */ |
200 | 101 | /** |
@@ -240,7 +141,7 @@ inline static void convert_o0o1(w128_t *w) { | ||
240 | 141 | * @param array an 128-bit array to be filled by pseudorandom numbers. |
241 | 142 | * @param size number of 128-bit pseudorandom numbers to be generated. |
242 | 143 | */ |
243 | -inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t array[], | |
144 | +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, | |
244 | 145 | int size) { |
245 | 146 | int i, j; |
246 | 147 | w128_t lung; |
@@ -278,7 +179,7 @@ inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t array[], | ||
278 | 179 | * @param array an 128-bit array to be filled by pseudorandom numbers. |
279 | 180 | * @param size number of 128-bit pseudorandom numbers to be generated. |
280 | 181 | */ |
281 | -inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t array[], | |
182 | +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, | |
282 | 183 | int size) { |
283 | 184 | int i, j; |
284 | 185 | w128_t lung; |
@@ -321,7 +222,7 @@ inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t array[], | ||
321 | 222 | * @param array an 128-bit array to be filled by pseudorandom numbers. |
322 | 223 | * @param size number of 128-bit pseudorandom numbers to be generated. |
323 | 224 | */ |
324 | -inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t array[], | |
225 | +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, | |
325 | 226 | int size) { |
326 | 227 | int i, j; |
327 | 228 | w128_t lung; |
@@ -364,7 +265,7 @@ inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t array[], | ||
364 | 265 | * @param array an 128-bit array to be filled by pseudorandom numbers. |
365 | 266 | * @param size number of 128-bit pseudorandom numbers to be generated. |
366 | 267 | */ |
367 | -inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t array[], | |
268 | +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, | |
368 | 269 | int size) { |
369 | 270 | int i, j; |
370 | 271 | w128_t lung; |
@@ -440,10 +341,14 @@ static void initial_mask(dsfmt_t *dsfmt) { | ||
440 | 341 | * @param dsfmt dsfmt state vector. |
441 | 342 | */ |
442 | 343 | static void period_certification(dsfmt_t *dsfmt) { |
443 | - int i, j; | |
444 | 344 | uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2}; |
445 | 345 | uint64_t tmp[2]; |
446 | 346 | uint64_t inner; |
347 | + int i; | |
348 | +#if (DSFMT_PCV2 & 1) != 1 | |
349 | + int j; | |
350 | + uint64_t work; | |
351 | +#endif | |
447 | 352 | |
448 | 353 | tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1); |
449 | 354 | tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2); |
@@ -462,7 +367,6 @@ static void period_certification(dsfmt_t *dsfmt) { | ||
462 | 367 | #if (DSFMT_PCV2 & 1) == 1 |
463 | 368 | dsfmt->status[DSFMT_N].u[1] ^= 1; |
464 | 369 | #else |
465 | - uint64_t work; | |
466 | 370 | for (i = 1; i >= 0; i--) { |
467 | 371 | work = 1; |
468 | 372 | for (j = 0; j < 64; j++) { |
@@ -637,9 +541,6 @@ void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) { | ||
637 | 541 | initial_mask(dsfmt); |
638 | 542 | period_certification(dsfmt); |
639 | 543 | dsfmt->idx = DSFMT_N64; |
640 | -#if defined(HAVE_SSE2) | |
641 | - setup_const(); | |
642 | -#endif | |
643 | 544 | } |
644 | 545 | |
645 | 546 | /** |
@@ -688,7 +589,6 @@ void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], | ||
688 | 589 | r += key_length; |
689 | 590 | psfmt32[idxof((mid + lag) % size)] += r; |
690 | 591 | psfmt32[idxof(0)] = r; |
691 | - i = 1; | |
692 | 592 | count--; |
693 | 593 | for (i = 1, j = 0; (j < count) && (j < key_length); j++) { |
694 | 594 | r = ini_func1(psfmt32[idxof(i)] |
@@ -723,10 +623,11 @@ void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], | ||
723 | 623 | initial_mask(dsfmt); |
724 | 624 | period_certification(dsfmt); |
725 | 625 | dsfmt->idx = DSFMT_N64; |
726 | -#if defined(HAVE_SSE2) | |
727 | - setup_const(); | |
728 | -#endif | |
729 | 626 | } |
730 | 627 | #if defined(__INTEL_COMPILER) |
731 | 628 | # pragma warning(default:981) |
732 | 629 | #endif |
630 | + | |
631 | +#if defined(__cplusplus) | |
632 | +} | |
633 | +#endif |
@@ -1,5 +1,6 @@ | ||
1 | +#pragma once | |
1 | 2 | /** |
2 | - * @file dSFMT_bcc.h | |
3 | + * @file dSFMT.h | |
3 | 4 | * |
4 | 5 | * @brief double precision SIMD oriented Fast Mersenne Twister(dSFMT) |
5 | 6 | * pseudorandom number generator based on IEEE 754 format. |
@@ -9,6 +10,9 @@ | ||
9 | 10 | * |
10 | 11 | * Copyright (C) 2007, 2008 Mutsuo Saito, Makoto Matsumoto and |
11 | 12 | * Hiroshima University. All rights reserved. |
13 | + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, | |
14 | + * Hiroshima University and The University of Tokyo. | |
15 | + * All rights reserved. | |
12 | 16 | * |
13 | 17 | * The new BSD License is applied to this software. |
14 | 18 | * see LICENSE.txt |
@@ -30,6 +34,9 @@ | ||
30 | 34 | |
31 | 35 | #ifndef DSFMT_H |
32 | 36 | #define DSFMT_H |
37 | +#if defined(__cplusplus) | |
38 | +extern "C" { | |
39 | +#endif | |
33 | 40 | |
34 | 41 | #include <stdio.h> |
35 | 42 | #include <assert.h> |
@@ -43,7 +50,7 @@ | ||
43 | 50 | /*----------------- |
44 | 51 | BASIC DEFINITIONS |
45 | 52 | -----------------*/ |
46 | -/** Mersenne Exponent. The period of the sequence | |
53 | +/* Mersenne Exponent. The period of the sequence | |
47 | 54 | * is a multiple of 2^DSFMT_MEXP-1. |
48 | 55 | * #define DSFMT_MEXP 19937 */ |
49 | 56 | /** DSFMT generator has an internal state array of 128-bit integers, |
@@ -85,27 +92,21 @@ | ||
85 | 92 | |
86 | 93 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) |
87 | 94 | # include <inttypes.h> |
88 | -#elif defined(_MSC_VER) | |
89 | -# if !defined(DSFMT_UINT32_DEFINED) && !defined(SFMT_UINT32_DEFINED) | |
90 | -typedef unsigned int uint32_t; | |
91 | -typedef unsigned long long uint64_t; | |
92 | -# define DSFMT_UINT32_DEFINED | |
93 | -# if !defined(inline) | |
94 | -# define inline __inline | |
95 | -# endif | |
96 | -# endif | |
97 | -#elif defined(__BORLANDC__) | |
95 | +#elif defined(_MSC_VER) || defined(__BORLANDC__) | |
98 | 96 | # if !defined(DSFMT_UINT32_DEFINED) && !defined(SFMT_UINT32_DEFINED) |
99 | 97 | typedef unsigned int uint32_t; |
100 | 98 | typedef unsigned __int64 uint64_t; |
99 | +# ifndef UINT64_C | |
100 | +# define UINT64_C(v) (v ## ui64) | |
101 | +# endif | |
101 | 102 | # define DSFMT_UINT32_DEFINED |
102 | -# if !defined(inline) | |
103 | +# if !defined(inline) && !defined(__cplusplus) | |
103 | 104 | # define inline __inline |
104 | 105 | # endif |
105 | 106 | # endif |
106 | 107 | #else |
107 | 108 | # include <inttypes.h> |
108 | -# if !defined(inline) | |
109 | +# if !defined(inline) && !defined(__cplusplus) | |
109 | 110 | # if defined(__GNUC__) |
110 | 111 | # define inline __inline__ |
111 | 112 | # else |
@@ -125,14 +126,9 @@ typedef unsigned __int64 uint64_t; | ||
125 | 126 | #endif |
126 | 127 | |
127 | 128 | #ifndef UINT64_C |
128 | -# if defined(__BORLANDC__) | |
129 | -# define UINT64_C(v) (v ## UI64) | |
130 | -# else | |
131 | -# define UINT64_C(v) (v ## ULL) | |
132 | -# endif | |
129 | +# define UINT64_C(v) (v ## ULL) | |
133 | 130 | #endif |
134 | 131 | |
135 | - | |
136 | 132 | /*------------------------------------------ |
137 | 133 | 128-bit SIMD like data type for standard C |
138 | 134 | ------------------------------------------*/ |
@@ -183,10 +179,6 @@ extern dsfmt_t dsfmt_global_data; | ||
183 | 179 | /** dsfmt mexp for check */ |
184 | 180 | extern const int dsfmt_global_mexp; |
185 | 181 | |
186 | -#ifdef _cplusplus | |
187 | -extern "C" { | |
188 | -#endif | |
189 | - | |
190 | 182 | void dsfmt_gen_rand_all(dsfmt_t *dsfmt); |
191 | 183 | void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size); |
192 | 184 | void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size); |
@@ -194,7 +186,7 @@ void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size); | ||
194 | 186 | void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size); |
195 | 187 | void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp); |
196 | 188 | void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], |
197 | - int key_length, int mexp); | |
189 | + int key_length, int mexp); | |
198 | 190 | const char *dsfmt_get_idstring(void); |
199 | 191 | int dsfmt_get_min_array_size(void); |
200 | 192 |
@@ -202,12 +194,13 @@ int dsfmt_get_min_array_size(void); | ||
202 | 194 | # define DSFMT_PRE_INLINE inline static |
203 | 195 | # define DSFMT_PST_INLINE __attribute__((always_inline)) |
204 | 196 | #elif defined(_MSC_VER) && _MSC_VER >= 1200 |
205 | -# define DSFMT_PRE_INLINE __forceinline | |
197 | +# define DSFMT_PRE_INLINE __forceinline static | |
206 | 198 | # define DSFMT_PST_INLINE |
207 | 199 | #else |
208 | 200 | # define DSFMT_PRE_INLINE inline static |
209 | 201 | # define DSFMT_PST_INLINE |
210 | 202 | #endif |
203 | +DSFMT_PRE_INLINE uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) DSFMT_PST_INLINE; | |
211 | 204 | DSFMT_PRE_INLINE double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) |
212 | 205 | DSFMT_PST_INLINE; |
213 | 206 | DSFMT_PRE_INLINE double dsfmt_genrand_close_open(dsfmt_t *dsfmt) |
@@ -216,6 +209,7 @@ DSFMT_PRE_INLINE double dsfmt_genrand_open_close(dsfmt_t *dsfmt) | ||
216 | 209 | DSFMT_PST_INLINE; |
217 | 210 | DSFMT_PRE_INLINE double dsfmt_genrand_open_open(dsfmt_t *dsfmt) |
218 | 211 | DSFMT_PST_INLINE; |
212 | +DSFMT_PRE_INLINE uint32_t dsfmt_gv_genrand_uint32(void) DSFMT_PST_INLINE; | |
219 | 213 | DSFMT_PRE_INLINE double dsfmt_gv_genrand_close1_open2(void) DSFMT_PST_INLINE; |
220 | 214 | DSFMT_PRE_INLINE double dsfmt_gv_genrand_close_open(void) DSFMT_PST_INLINE; |
221 | 215 | DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_close(void) DSFMT_PST_INLINE; |
@@ -230,11 +224,31 @@ DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close1_open2(double array[], int size) | ||
230 | 224 | DSFMT_PST_INLINE; |
231 | 225 | DSFMT_PRE_INLINE void dsfmt_gv_init_gen_rand(uint32_t seed) DSFMT_PST_INLINE; |
232 | 226 | DSFMT_PRE_INLINE void dsfmt_gv_init_by_array(uint32_t init_key[], |
233 | - int key_length) DSFMT_PST_INLINE; | |
227 | + int key_length) DSFMT_PST_INLINE; | |
234 | 228 | DSFMT_PRE_INLINE void dsfmt_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed) |
235 | 229 | DSFMT_PST_INLINE; |
236 | 230 | DSFMT_PRE_INLINE void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], |
237 | - int key_length) DSFMT_PST_INLINE; | |
231 | + int key_length) DSFMT_PST_INLINE; | |
232 | + | |
233 | +/** | |
234 | + * This function generates and returns unsigned 32-bit integer. | |
235 | + * This is slower than SFMT, only for convenience usage. | |
236 | + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called | |
237 | + * before this function. | |
238 | + * @param dsfmt dsfmt internal state date | |
239 | + * @return double precision floating point pseudorandom number | |
240 | + */ | |
241 | +inline static uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) { | |
242 | + uint32_t r; | |
243 | + uint64_t *psfmt64 = &dsfmt->status[0].u[0]; | |
244 | + | |
245 | + if (dsfmt->idx >= DSFMT_N64) { | |
246 | + dsfmt_gen_rand_all(dsfmt); | |
247 | + dsfmt->idx = 0; | |
248 | + } | |
249 | + r = psfmt64[dsfmt->idx++] & 0xffffffffU; | |
250 | + return r; | |
251 | +} | |
238 | 252 | |
239 | 253 | /** |
240 | 254 | * This function generates and returns double precision pseudorandom |
@@ -250,17 +264,27 @@ inline static double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) { | ||
250 | 264 | double *psfmt64 = &dsfmt->status[0].d[0]; |
251 | 265 | |
252 | 266 | if (dsfmt->idx >= DSFMT_N64) { |
253 | - dsfmt_gen_rand_all(dsfmt); | |
254 | - dsfmt->idx = 0; | |
267 | + dsfmt_gen_rand_all(dsfmt); | |
268 | + dsfmt->idx = 0; | |
255 | 269 | } |
256 | 270 | r = psfmt64[dsfmt->idx++]; |
257 | 271 | return r; |
258 | 272 | } |
259 | 273 | |
260 | 274 | /** |
275 | + * This function generates and returns unsigned 32-bit integer. | |
276 | + * This is slower than SFMT, only for convenience usage. | |
277 | + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called | |
278 | + * before this function. This function uses \b global variables. | |
279 | + * @return double precision floating point pseudorandom number | |
280 | + */ | |
281 | +inline static uint32_t dsfmt_gv_genrand_uint32(void) { | |
282 | + return dsfmt_genrand_uint32(&dsfmt_global_data); | |
283 | +} | |
284 | + | |
285 | +/** | |
261 | 286 | * This function generates and returns double precision pseudorandom |
262 | - * number which distributes uniformly in the range [1, 2). This is | |
263 | - * the primitive and faster than generating numbers in other ranges. | |
287 | + * number which distributes uniformly in the range [1, 2). | |
264 | 288 | * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called |
265 | 289 | * before this function. This function uses \b global variables. |
266 | 290 | * @return double precision floating point pseudorandom number |
@@ -326,13 +350,13 @@ inline static double dsfmt_gv_genrand_open_close(void) { | ||
326 | 350 | inline static double dsfmt_genrand_open_open(dsfmt_t *dsfmt) { |
327 | 351 | double *dsfmt64 = &dsfmt->status[0].d[0]; |
328 | 352 | union { |
329 | - double d; | |
330 | - uint64_t u; | |
353 | + double d; | |
354 | + uint64_t u; | |
331 | 355 | } r; |
332 | 356 | |
333 | 357 | if (dsfmt->idx >= DSFMT_N64) { |
334 | - dsfmt_gen_rand_all(dsfmt); | |
335 | - dsfmt->idx = 0; | |
358 | + dsfmt_gen_rand_all(dsfmt); | |
359 | + dsfmt->idx = 0; | |
336 | 360 | } |
337 | 361 | r.d = dsfmt64[dsfmt->idx++]; |
338 | 362 | r.u |= 1; |
@@ -441,7 +465,7 @@ inline static void dsfmt_gv_init_gen_rand(uint32_t seed) { | ||
441 | 465 | * @param key_length the length of init_key. |
442 | 466 | */ |
443 | 467 | inline static void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], |
444 | - int key_length) { | |
468 | + int key_length) { | |
445 | 469 | dsfmt_chk_init_by_array(dsfmt, init_key, key_length, DSFMT_MEXP); |
446 | 470 | } |
447 | 471 |
@@ -605,7 +629,7 @@ inline static void fill_array_close1_open2(double array[], int size) { | ||
605 | 629 | } |
606 | 630 | #endif /* DSFMT_DO_NOT_USE_OLD_NAMES */ |
607 | 631 | |
608 | -#ifdef _cplusplus | |
632 | +#if defined(__cplusplus) | |
609 | 633 | } |
610 | 634 | #endif |
611 | 635 |
@@ -149,10 +149,10 @@ | ||
149 | 149 | <Target title="VC12"> |
150 | 150 | <Option output="psychlopswin32vc12.lib" prefix_auto="0" extension_auto="0" /> |
151 | 151 | <Option working_dir="" /> |
152 | - <Option object_output=".objs_vc10" /> | |
152 | + <Option object_output=".objs_vc12" /> | |
153 | 153 | <Option deps_output=".deps_bcc" /> |
154 | 154 | <Option type="2" /> |
155 | - <Option compiler="microsoft_visual_c_2013" /> | |
155 | + <Option compiler="copy_of_microsoft_visual_c_2010" /> | |
156 | 156 | <Option projectCompilerOptionsRelation="1" /> |
157 | 157 | <Option projectLinkerOptionsRelation="1" /> |
158 | 158 | <Option projectResourceIncludeDirsRelation="2" /> |
@@ -161,7 +161,7 @@ | ||
161 | 161 | <Add option="/GA" /> |
162 | 162 | <Add option="/EHs" /> |
163 | 163 | <Add option="/GR" /> |
164 | - <Add option="/Ox" /> | |
164 | + <Add option="/W4" /> | |
165 | 165 | <Add option="/DPSYCHLOPS_PLATFORM_WIN32GL" /> |
166 | 166 | </Compiler> |
167 | 167 | <Linker> |
@@ -145,7 +145,7 @@ | ||
145 | 145 | <Option output="Psychlops_win32cblibtest_vc12.exe" prefix_auto="0" extension_auto="0" /> |
146 | 146 | <Option object_output=".objs_vc10" /> |
147 | 147 | <Option type="1" /> |
148 | - <Option compiler="microsoft_visual_c_2013_vc12" /> | |
148 | + <Option compiler="copy_of_microsoft_visual_c_2010" /> | |
149 | 149 | <Option projectCompilerOptionsRelation="1" /> |
150 | 150 | <Compiler> |
151 | 151 | <Add option="/GA" /> |