減色プログラム
Revision | 9259c64445832fc2f3412b0a37b5be396376d1e6 (tree) |
---|---|
Time | 2011-05-22 02:25:58 |
Author | berupon <berupon@gmai...> |
Commiter | berupon |
optimized
added AMD64 platform
@@ -101,22 +101,12 @@ struct Color4d | ||
101 | 101 | } |
102 | 102 | |
103 | 103 | double& operator[] (int idx) { |
104 | - switch (idx) { | |
105 | - case 0: return v[0].m128d_f64[0]; | |
106 | - case 1: return v[0].m128d_f64[1]; | |
107 | - case 2: return v[1].m128d_f64[0]; | |
108 | - case 3: return v[1].m128d_f64[1]; | |
109 | - } | |
104 | + return ((double*)&v)[idx]; | |
110 | 105 | } |
111 | 106 | const double& operator[] (int idx) const { |
112 | - switch (idx) { | |
113 | - case 0: return v[0].m128d_f64[0]; | |
114 | - case 1: return v[0].m128d_f64[1]; | |
115 | - case 2: return v[1].m128d_f64[0]; | |
116 | - case 3: return v[1].m128d_f64[1]; | |
117 | - } | |
107 | + return ((double*)&v)[idx]; | |
118 | 108 | } |
119 | - | |
109 | + | |
120 | 110 | double norm_squared() { |
121 | 111 | double result = 0; |
122 | 112 | for (int i=0; i<3; i++) { |
@@ -6,20 +6,26 @@ EndProject | ||
6 | 6 | Global |
7 | 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution |
8 | 8 | Debug|Win32 = Debug|Win32 |
9 | + Debug|x64 = Debug|x64 | |
9 | 10 | Release|Win32 = Release|Win32 |
11 | + Release|x64 = Release|x64 | |
10 | 12 | EndGlobalSection |
11 | 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution |
12 | 14 | {21F9C24D-7A00-425D-B59C-E1CD45C445A6}.Debug|Win32.ActiveCfg = Debug|Win32 |
13 | 15 | {21F9C24D-7A00-425D-B59C-E1CD45C445A6}.Debug|Win32.Build.0 = Debug|Win32 |
16 | + {21F9C24D-7A00-425D-B59C-E1CD45C445A6}.Debug|x64.ActiveCfg = Debug|x64 | |
17 | + {21F9C24D-7A00-425D-B59C-E1CD45C445A6}.Debug|x64.Build.0 = Debug|x64 | |
14 | 18 | {21F9C24D-7A00-425D-B59C-E1CD45C445A6}.Release|Win32.ActiveCfg = Release|Win32 |
15 | 19 | {21F9C24D-7A00-425D-B59C-E1CD45C445A6}.Release|Win32.Build.0 = Release|Win32 |
20 | + {21F9C24D-7A00-425D-B59C-E1CD45C445A6}.Release|x64.ActiveCfg = Release|x64 | |
21 | + {21F9C24D-7A00-425D-B59C-E1CD45C445A6}.Release|x64.Build.0 = Release|x64 | |
16 | 22 | EndGlobalSection |
17 | 23 | GlobalSection(SolutionProperties) = preSolution |
18 | 24 | HideSolutionNode = FALSE |
19 | 25 | EndGlobalSection |
20 | 26 | GlobalSection(ExtensibilityGlobals) = postSolution |
21 | - AMDCaPersistentConfig = Release|Win32 | |
22 | - AMDCaPersistentStartup = color_quantizer | |
23 | 27 | AMDCaProjectFile = C:\projects\color_quantizer\CodeAnalyst\color_quantizer.caw |
28 | + AMDCaPersistentStartup = color_quantizer | |
29 | + AMDCaPersistentConfig = Release|Win32 | |
24 | 30 | EndGlobalSection |
25 | 31 | EndGlobal |
@@ -12,6 +12,9 @@ | ||
12 | 12 | <Platform |
13 | 13 | Name="Win32" |
14 | 14 | /> |
15 | + <Platform | |
16 | + Name="x64" | |
17 | + /> | |
15 | 18 | </Platforms> |
16 | 19 | <ToolFiles> |
17 | 20 | </ToolFiles> |
@@ -170,6 +173,162 @@ | ||
170 | 173 | Name="VCPostBuildEventTool" |
171 | 174 | /> |
172 | 175 | </Configuration> |
176 | + <Configuration | |
177 | + Name="Debug|x64" | |
178 | + OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)" | |
179 | + IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" | |
180 | + ConfigurationType="1" | |
181 | + CharacterSet="1" | |
182 | + > | |
183 | + <Tool | |
184 | + Name="VCPreBuildEventTool" | |
185 | + /> | |
186 | + <Tool | |
187 | + Name="VCCustomBuildTool" | |
188 | + /> | |
189 | + <Tool | |
190 | + Name="VCXMLDataGeneratorTool" | |
191 | + /> | |
192 | + <Tool | |
193 | + Name="VCWebServiceProxyGeneratorTool" | |
194 | + /> | |
195 | + <Tool | |
196 | + Name="VCMIDLTool" | |
197 | + TargetEnvironment="3" | |
198 | + /> | |
199 | + <Tool | |
200 | + Name="VCCLCompilerTool" | |
201 | + Optimization="0" | |
202 | + AdditionalIncludeDirectories="./" | |
203 | + PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS" | |
204 | + MinimalRebuild="true" | |
205 | + BasicRuntimeChecks="3" | |
206 | + RuntimeLibrary="3" | |
207 | + UsePrecompiledHeader="2" | |
208 | + WarningLevel="3" | |
209 | + DebugInformationFormat="3" | |
210 | + ForcedIncludeFiles="common.h" | |
211 | + /> | |
212 | + <Tool | |
213 | + Name="VCManagedResourceCompilerTool" | |
214 | + /> | |
215 | + <Tool | |
216 | + Name="VCResourceCompilerTool" | |
217 | + /> | |
218 | + <Tool | |
219 | + Name="VCPreLinkEventTool" | |
220 | + /> | |
221 | + <Tool | |
222 | + Name="VCLinkerTool" | |
223 | + LinkIncremental="2" | |
224 | + GenerateDebugInformation="true" | |
225 | + SubSystem="1" | |
226 | + TargetMachine="17" | |
227 | + /> | |
228 | + <Tool | |
229 | + Name="VCALinkTool" | |
230 | + /> | |
231 | + <Tool | |
232 | + Name="VCManifestTool" | |
233 | + /> | |
234 | + <Tool | |
235 | + Name="VCXDCMakeTool" | |
236 | + /> | |
237 | + <Tool | |
238 | + Name="VCBscMakeTool" | |
239 | + /> | |
240 | + <Tool | |
241 | + Name="VCFxCopTool" | |
242 | + /> | |
243 | + <Tool | |
244 | + Name="VCAppVerifierTool" | |
245 | + /> | |
246 | + <Tool | |
247 | + Name="VCPostBuildEventTool" | |
248 | + /> | |
249 | + </Configuration> | |
250 | + <Configuration | |
251 | + Name="Release|x64" | |
252 | + OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)" | |
253 | + IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" | |
254 | + ConfigurationType="1" | |
255 | + CharacterSet="1" | |
256 | + WholeProgramOptimization="1" | |
257 | + > | |
258 | + <Tool | |
259 | + Name="VCPreBuildEventTool" | |
260 | + /> | |
261 | + <Tool | |
262 | + Name="VCCustomBuildTool" | |
263 | + /> | |
264 | + <Tool | |
265 | + Name="VCXMLDataGeneratorTool" | |
266 | + /> | |
267 | + <Tool | |
268 | + Name="VCWebServiceProxyGeneratorTool" | |
269 | + /> | |
270 | + <Tool | |
271 | + Name="VCMIDLTool" | |
272 | + TargetEnvironment="3" | |
273 | + /> | |
274 | + <Tool | |
275 | + Name="VCCLCompilerTool" | |
276 | + Optimization="2" | |
277 | + EnableIntrinsicFunctions="true" | |
278 | + FavorSizeOrSpeed="1" | |
279 | + AdditionalIncludeDirectories="./" | |
280 | + PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS" | |
281 | + RuntimeLibrary="0" | |
282 | + BufferSecurityCheck="false" | |
283 | + EnableFunctionLevelLinking="true" | |
284 | + FloatingPointModel="2" | |
285 | + UsePrecompiledHeader="2" | |
286 | + BrowseInformation="1" | |
287 | + WarningLevel="3" | |
288 | + DebugInformationFormat="3" | |
289 | + ForcedIncludeFiles="common.h" | |
290 | + /> | |
291 | + <Tool | |
292 | + Name="VCManagedResourceCompilerTool" | |
293 | + /> | |
294 | + <Tool | |
295 | + Name="VCResourceCompilerTool" | |
296 | + /> | |
297 | + <Tool | |
298 | + Name="VCPreLinkEventTool" | |
299 | + /> | |
300 | + <Tool | |
301 | + Name="VCLinkerTool" | |
302 | + LinkIncremental="1" | |
303 | + GenerateDebugInformation="true" | |
304 | + SubSystem="1" | |
305 | + OptimizeReferences="2" | |
306 | + EnableCOMDATFolding="2" | |
307 | + RandomizedBaseAddress="1" | |
308 | + TargetMachine="17" | |
309 | + /> | |
310 | + <Tool | |
311 | + Name="VCALinkTool" | |
312 | + /> | |
313 | + <Tool | |
314 | + Name="VCManifestTool" | |
315 | + /> | |
316 | + <Tool | |
317 | + Name="VCXDCMakeTool" | |
318 | + /> | |
319 | + <Tool | |
320 | + Name="VCBscMakeTool" | |
321 | + /> | |
322 | + <Tool | |
323 | + Name="VCFxCopTool" | |
324 | + /> | |
325 | + <Tool | |
326 | + Name="VCAppVerifierTool" | |
327 | + /> | |
328 | + <Tool | |
329 | + Name="VCPostBuildEventTool" | |
330 | + /> | |
331 | + </Configuration> | |
173 | 332 | </Configurations> |
174 | 333 | <References> |
175 | 334 | </References> |
@@ -214,6 +373,22 @@ | ||
214 | 373 | UsePrecompiledHeader="1" |
215 | 374 | /> |
216 | 375 | </FileConfiguration> |
376 | + <FileConfiguration | |
377 | + Name="Debug|x64" | |
378 | + > | |
379 | + <Tool | |
380 | + Name="VCCLCompilerTool" | |
381 | + UsePrecompiledHeader="1" | |
382 | + /> | |
383 | + </FileConfiguration> | |
384 | + <FileConfiguration | |
385 | + Name="Release|x64" | |
386 | + > | |
387 | + <Tool | |
388 | + Name="VCCLCompilerTool" | |
389 | + UsePrecompiledHeader="1" | |
390 | + /> | |
391 | + </FileConfiguration> | |
217 | 392 | </File> |
218 | 393 | </Filter> |
219 | 394 | <Filter |
@@ -280,6 +455,22 @@ | ||
280 | 455 | UsePrecompiledHeader="0" |
281 | 456 | /> |
282 | 457 | </FileConfiguration> |
458 | + <FileConfiguration | |
459 | + Name="Debug|x64" | |
460 | + > | |
461 | + <Tool | |
462 | + Name="VCCLCompilerTool" | |
463 | + UsePrecompiledHeader="0" | |
464 | + /> | |
465 | + </FileConfiguration> | |
466 | + <FileConfiguration | |
467 | + Name="Release|x64" | |
468 | + > | |
469 | + <Tool | |
470 | + Name="VCCLCompilerTool" | |
471 | + UsePrecompiledHeader="0" | |
472 | + /> | |
473 | + </FileConfiguration> | |
283 | 474 | </File> |
284 | 475 | <File |
285 | 476 | RelativePath=".\ReadImage\File.h" |
@@ -308,6 +499,22 @@ | ||
308 | 499 | UsePrecompiledHeader="0" |
309 | 500 | /> |
310 | 501 | </FileConfiguration> |
502 | + <FileConfiguration | |
503 | + Name="Debug|x64" | |
504 | + > | |
505 | + <Tool | |
506 | + Name="VCCLCompilerTool" | |
507 | + UsePrecompiledHeader="0" | |
508 | + /> | |
509 | + </FileConfiguration> | |
510 | + <FileConfiguration | |
511 | + Name="Release|x64" | |
512 | + > | |
513 | + <Tool | |
514 | + Name="VCCLCompilerTool" | |
515 | + UsePrecompiledHeader="0" | |
516 | + /> | |
517 | + </FileConfiguration> | |
311 | 518 | </File> |
312 | 519 | <File |
313 | 520 | RelativePath=".\ReadImage\ReadImage.h" |
@@ -144,15 +144,13 @@ int _tmain(int argc, _TCHAR* argv[]) | ||
144 | 144 | for (int j=0; j<3; j++) { |
145 | 145 | double w = exp(-sqrt((double)((i-1)*(i-1) + (j-1)*(j-1)))/(stddev*stddev)); |
146 | 146 | filter3_weights[i][j] = Color(w,w,w,0); |
147 | - sum += 3 * w; | |
147 | + sum += w; | |
148 | 148 | } |
149 | 149 | } |
150 | - sum /= 3; | |
150 | + double invSum = 1.0 / sum; | |
151 | 151 | for (int i=0; i<3; i++) { |
152 | 152 | for (int j=0; j<3; j++) { |
153 | - for (int k=0; k<3; k++) { | |
154 | - filter3_weights[i][j][k] /= sum; | |
155 | - } | |
153 | + filter3_weights[i][j] *= invSum; | |
156 | 154 | } |
157 | 155 | } |
158 | 156 | sum = 0.0; |
@@ -160,15 +158,13 @@ int _tmain(int argc, _TCHAR* argv[]) | ||
160 | 158 | for (int j=0; j<5; j++) { |
161 | 159 | double w = exp(-sqrt((double)((i-2)*(i-2) + (j-2)*(j-2)))/(stddev*stddev)); |
162 | 160 | filter5_weights[i][j] = Color(w,w,w,0); |
163 | - sum += 3 * w; | |
161 | + sum += w; | |
164 | 162 | } |
165 | 163 | } |
166 | - sum /= 3; | |
164 | + invSum = 1.0 / sum; | |
167 | 165 | for (int i=0; i<5; i++) { |
168 | 166 | for (int j=0; j<5; j++) { |
169 | - for (int k=0; k<3; k++) { | |
170 | - filter5_weights[i][j][k] /= sum; | |
171 | - } | |
167 | + filter5_weights[i][j] *= invSum; | |
172 | 168 | } |
173 | 169 | } |
174 | 170 |
@@ -199,9 +195,10 @@ int _tmain(int argc, _TCHAR* argv[]) | ||
199 | 195 | for (int x=0; x<width; x++) { |
200 | 196 | const uint8_t idx = quantized_image[y][x]; |
201 | 197 | Color col = palette[idx]; |
202 | - c[2] = (unsigned char)(255*col[0]); | |
203 | - c[1] = (unsigned char)(255*col[1]); | |
204 | - c[0] = (unsigned char)(255*col[2]); | |
198 | + col *= 255.0; | |
199 | + c[2] = (unsigned char)(col[0]); | |
200 | + c[1] = (unsigned char)(col[1]); | |
201 | + c[0] = (unsigned char)(col[2]); | |
205 | 202 | fwrite(c, 3, 1, out); |
206 | 203 | } |
207 | 204 | } |
@@ -74,9 +74,9 @@ void random_permutation( | ||
74 | 74 | vector<int>& result |
75 | 75 | ) |
76 | 76 | { |
77 | - result.clear(); | |
77 | + result.resize(count); | |
78 | 78 | for (size_t i=0; i<count; ++i) { |
79 | - result.push_back(i); | |
79 | + result[i] = i; | |
80 | 80 | } |
81 | 81 | random_shuffle(result.begin(), result.end()); |
82 | 82 | } |
@@ -89,10 +89,11 @@ void random_permutation_2d( | ||
89 | 89 | { |
90 | 90 | vector<int> perm1d; |
91 | 91 | random_permutation(width*height, perm1d); |
92 | - while (!perm1d.empty()) { | |
93 | - int idx = perm1d.back(); | |
94 | - perm1d.pop_back(); | |
95 | - result.push_back(pair<int,int>(idx % width, idx / width)); | |
92 | + const size_t sz = perm1d.size(); | |
93 | + result.resize(sz); | |
94 | + for (size_t i=0; i<sz; ++i) { | |
95 | + int idx = perm1d[sz-1-i]; | |
96 | + result[i] = pair<int,int>(idx % width, idx / width); | |
96 | 97 | } |
97 | 98 | } |
98 | 99 |
@@ -291,17 +292,11 @@ void compute_initial_s( | ||
291 | 292 | const Image& b |
292 | 293 | ) |
293 | 294 | { |
294 | - init_image(s); | |
295 | 295 | size_t palette_size = s.width_; |
296 | 296 | int coarse_width = coarse_variables.width_; |
297 | 297 | int coarse_height = coarse_variables.height_; |
298 | 298 | int center_x = (b.width_-1)/2, center_y = (b.height_-1)/2; |
299 | - | |
300 | - printf( | |
301 | - "%s started %d %d %d\n", | |
302 | - __FUNCTION__, palette_size, coarse_width, coarse_height | |
303 | - ); | |
304 | - | |
299 | + | |
305 | 300 | Color center_b = b_value(b,0,0,0,0); |
306 | 301 | Color zero_vector; |
307 | 302 | zero_vector.zero(); |
@@ -519,10 +514,12 @@ void spatial_color_quant( | ||
519 | 514 | size_t iters_at_current_level = 0; |
520 | 515 | bool skip_palette_maintenance = false; |
521 | 516 | Image s(num_colors, num_colors); |
517 | + init_image(s); | |
522 | 518 | compute_initial_s(s, coarse_variables, *b_vec[coarse_level]); |
523 | 519 | Image* j_palette_sum = |
524 | 520 | new Image(coarse_variables.width_, coarse_variables.height_); |
525 | 521 | compute_initial_j_palette_sum(*j_palette_sum, coarse_variables, palette, num_colors); |
522 | + vector<double> meanfield_logs(num_colors), meanfields(num_colors); | |
526 | 523 | while (coarse_level >= 0 || temperature > final_temperature) { |
527 | 524 | // Need to reseat this reference in case we changed p_coarse_variables |
528 | 525 | Array3D<double>& coarse_variables = *p_coarse_variables; |
@@ -545,7 +542,6 @@ void spatial_color_quant( | ||
545 | 542 | while (!visit_queue.empty()) { |
546 | 543 | // If we get to 10% above initial size, just revisit them all |
547 | 544 | if (visit_queue.size() > coarse_variables.width_*coarse_variables.height_*11.0/10) { |
548 | - visit_queue.clear(); | |
549 | 545 | random_permutation_2d(coarse_variables.width_, coarse_variables.height_, visit_queue); |
550 | 546 | } |
551 | 547 |
@@ -557,11 +553,12 @@ void spatial_color_quant( | ||
557 | 553 | Color p_i; |
558 | 554 | p_i.zero(); |
559 | 555 | for (int y=0; y<b.height_; ++y) { |
556 | + int j_y = y - center_y + i_y; | |
557 | + if (j_y < 0 || j_y >= coarse_variables.height_) continue; | |
560 | 558 | for (int x=0; x<b.width_; ++x) { |
561 | 559 | int j_x = x - center_x + i_x; |
562 | - int j_y = y - center_y + i_y; | |
563 | 560 | if (i_x == j_x && i_y == j_y) continue; |
564 | - if (j_x < 0 || j_y < 0 || j_x >= coarse_variables.width_ || j_y >= coarse_variables.height_) continue; | |
561 | + if (j_x < 0 || j_x >= coarse_variables.width_) continue; | |
565 | 562 | Color b_ij = b_value(b, i_x, i_y, j_x, j_y); |
566 | 563 | Color j_pal = (*j_palette_sum)[j_y][j_x]; |
567 | 564 | p_i += b_ij * j_pal; |
@@ -570,7 +567,6 @@ void spatial_color_quant( | ||
570 | 567 | p_i *= 2.0; |
571 | 568 | p_i += a[i_y][i_x]; |
572 | 569 | |
573 | - vector<double> meanfield_logs, meanfields; | |
574 | 570 | double max_meanfield_log = -numeric_limits<double>::infinity(); |
575 | 571 | double meanfield_sum = 0.0; |
576 | 572 | for (size_t v=0; v<num_colors; ++v) { |
@@ -580,14 +576,15 @@ void spatial_color_quant( | ||
580 | 576 | // will choose a value that makes the maximum e^100. |
581 | 577 | Color p_i2; p_i2 = p_i; |
582 | 578 | double m = -(palette[v].dot_product(p_i2 + middle_b.direct_product(palette[v]))) / temperature; |
583 | - meanfield_logs.push_back(m); | |
579 | + meanfield_logs[v] = m; | |
584 | 580 | if (m > max_meanfield_log) { |
585 | 581 | max_meanfield_log = m; |
586 | 582 | } |
587 | 583 | } |
588 | 584 | for (size_t v=0; v<num_colors; ++v) { |
589 | - meanfields.push_back(exp(meanfield_logs[v]-max_meanfield_log+100)); | |
590 | - meanfield_sum += meanfields.back(); | |
585 | + double d = exp(meanfield_logs[v]-max_meanfield_log+100); | |
586 | + meanfields[v] = d; | |
587 | + meanfield_sum += d; | |
591 | 588 | } |
592 | 589 | if (meanfield_sum == 0) { |
593 | 590 | cout << "Fatal error: Meanfield sum underflowed. Please contact developer." << endl; |
@@ -10,6 +10,7 @@ | ||
10 | 10 | #include <stdio.h> |
11 | 11 | #include <tchar.h> |
12 | 12 | |
13 | +#define _SECURE_SCL 0 | |
13 | 14 | |
14 | 15 | |
15 | 16 | // TODO: プログラムに必要な追加ヘッダーをここで参照してください。 |