Develop and Download Open Source Software

Browse Subversion Repository

Contents of /RanCodeAdp.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 14 - (show annotations) (download) (as text)
Mon Sep 7 08:07:15 2009 UTC (14 years, 7 months ago) by berupon
File MIME type: text/x-chdr
File size: 15238 byte(s)
bzip2からRangeCoderに切り替えて圧縮率向上。
1 // RanCode.cpp : Defines the entry point for the console application.
2 //
3 //This is DEMO version of arithmetic/range encoder written for research purposes by
4 //Andrew Polar (www.ezcodesample.com, Jan. 10, 2007). The algorithm was published by
5 //G.N.N. Martin. In March 1979. Video & Data Recording Conference, Southampton, UK.
6 //The program was tested for many statistically different data samples, however you
7 //use it on own risk. Author do not accept any responsibility for use or misuse of
8 //this program. Any data sample that cause crash or wrong result can be sent to author
9 //for research. The e-mail may be obtained by WHOIS www.ezcodesample.com.
10 //The correction of July 03, 2007. The processing of non-zero minimum has been added.
11 //User can pass data with non-zero minimum value, however the min/max limits must be
12 //specified in function call.
13 //The correction of August 17, 2007. The additional means of computational statbility
14 //has been added. As it was discovered and investigated by Bill Dorsey in beta-testing
15 //the addition operation must have treatment for carry propagation. The solution is
16 //to normalize data to vary from 1 to max-min+1 and use value 0 as a means of output
17 //of the head of operation_buffer bits. This zero value is used as synchronization
18 //flag for output buffer and simply excluded from decoded stream.
19 //Last correction 09/07/2007 by Andrew Polar, making range coder adoptive
20 //The missing functionality is big-endian processor capabilities.
21
22 #include <stdio.h>
23 #include <math.h>
24 #include <stdlib.h>
25 #include <memory.h>
26 #include <time.h>
27 //#include <windows.h>
28
29 ///////////////////////////////////////////////////////////////////////
30 // Test data preparation
31 ///////////////////////////////////////////////////////////////////////
32
33 double entropy24(int* data, int size) {
34
35 int max_size = 1 << 24;
36 int min, counter;
37 int* buffer;
38 double entropy;
39 double log2 = log(2.0);
40 double prob;
41
42 min = data[0];
43 for (counter=0; counter<size; counter++) {
44 if (data[counter] < min)
45 min = data[counter];
46 }
47
48 for (counter=0; counter<size; counter++) {
49 data[counter] -= min;
50 }
51
52 buffer = (int*)malloc(max_size*sizeof(int));
53 memset(buffer, 0x00, max_size*sizeof(int));
54 for (counter=0; counter<size; counter++) {
55 buffer[data[counter]]++;
56 }
57
58 entropy = 0.0;
59 for (counter=0; counter<max_size; counter++) {
60 if (buffer[counter] > 0) {
61 prob = (double)buffer[counter];
62 prob /= (double)size;
63 entropy += log(prob)/log2*prob;
64 }
65 }
66 entropy *= -1.0;
67
68 for (counter=0; counter<size; counter++) {
69 data[counter] += min;
70 }
71
72 if (buffer)
73 free(buffer);
74
75 return entropy;
76 }
77
78 double round(double x) {
79 if ((x - floor(x)) >= 0.5)
80 return ceil(x);
81 else
82 return floor(x);
83 }
84
85 double MakeData(int* data, int data_size, int min, int max, int redundancy_factor) {
86
87 int counter, cnt, high, low;
88 double buf;
89
90 if (redundancy_factor <= 1)
91 redundancy_factor = 1;
92
93 if (max <= min)
94 max = min + 1;
95
96 srand((unsigned)time(0));
97 for (counter=0; counter<data_size; counter++) {
98 buf = 0;
99 for (cnt=0; cnt<redundancy_factor; cnt++) {
100 buf += (double)rand();
101 }
102 data[counter] = ((int)buf)/redundancy_factor;
103 }
104
105 low = data[0];
106 high = data[0];
107 for (counter=0; counter<data_size; counter++) {
108 if (data[counter] > high)
109 high = data[counter];
110 if (data[counter] < low)
111 low = data[counter];
112 }
113
114 for (counter=0; counter<data_size; counter++) {
115 buf = (double)(data[counter] - low);
116 buf /= (double)(high - low);
117 buf *= (double)(max - min);
118 buf = round(buf);
119 data[counter] = (int)buf + min;
120 }
121
122 return entropy24(data, data_size);
123 }
124
125 ///////////////////////////////////////////////////////////////////
126 // End of data preparation start of encoding, decoding functions
127 ///////////////////////////////////////////////////////////////////
128
129 const unsigned MAX_BASE = 15; //value is optional but 15 is recommended
130 unsigned current_byte; //position of current byte in result data
131 unsigned char current_bit; //postion of current bit in result data
132
133 //Some look up tables for fast processing
134 static int output_mask[8][32];
135 static unsigned char bytes_plus [8][64];
136
137 static unsigned char edge_mask[8] = {0xff, 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
138 static unsigned char shift_table[8] = {24, 16, 8, 0};
139
140 static long long overflow_indicator = 0xffffffffffffffff << (MAX_BASE * 2 - 1);
141
142 void make_look_ups() {
143
144 int sign_mask[8] = {
145 0xffffffff, 0x7fffffff, 0x3fffffff, 0x1fffffff,
146 0x0fffffff, 0x07ffffff, 0x03ffffff, 0x01ffffff
147 };
148
149 for (int i=0; i<8; i++) {
150 for (int j=0; j<32; j++) {
151 output_mask[i][j] = sign_mask[i];
152 output_mask[i][j] >>= (32-i-j);
153 output_mask[i][j] <<= (32-i-j);
154 }
155 }
156
157 for (int i=0; i<8; i++) {
158 for (int j=0; j<64; j++) {
159 bytes_plus[i][j] = j/8;
160 if ((i + j%8) > 7)
161 ++bytes_plus[i][j];
162 }
163 }
164 }
165
166 //The key function that takes product of x*y and convert it to
167 //mantissa and exponent. Mantissa have number of bits equal to
168 //length of the mask.
169 inline int SafeProduct(int x, int y, int mask, int& shift) {
170
171 int p = x * y;
172 if ((p >> shift) > mask) {
173 p >>= shift;
174 while (p > mask) {
175 p >>= 1;
176 ++shift;
177 }
178 }
179 else {
180 while (shift >= 0) {
181 --shift;
182 if ((p >> shift) > mask) {
183 break;
184 }
185 }
186 ++shift;
187 p >>= shift;
188 }
189 return p;
190 }
191
192 inline int readBits(int operation_buffer, int bits, unsigned char* code) {
193
194 unsigned end_byte = current_byte + bytes_plus[current_bit][bits];
195 int buffer = (code[current_byte] & edge_mask[current_bit]);
196 unsigned char total_bits = 8 - current_bit;
197 for (unsigned i=current_byte+1; i<=end_byte; ++i) {
198 (buffer <<= 8) |= code[i];
199 total_bits += 8;
200 }
201 buffer >>= (total_bits - bits);
202 operation_buffer <<= bits;
203 operation_buffer |= buffer;
204 current_byte = end_byte;
205 current_bit = (bits + current_bit)%8;
206 return operation_buffer;
207 }
208
209 inline void writeBits(long long operation_buffer, int bits, unsigned char* result) {
210
211 int buffer = (int)((operation_buffer >> current_bit) >> 32);
212 buffer &= output_mask[current_bit][bits];
213 unsigned bytes_plus2 = bytes_plus[current_bit][bits];
214 current_bit = (bits + current_bit)%8;
215 for (unsigned i=0; i<=bytes_plus2; ++i) {
216 result[current_byte + i] |= (buffer >> shift_table[i]);
217 }
218 current_byte += bytes_plus2;
219 }
220
221 void update_frequencies(int* buffer, int* freq, int* cumulative, int range, int denominator) {
222
223 memcpy(freq, buffer, range*sizeof(int));
224 for (int i=0; i<range; i++)
225 buffer[i] = 1;
226
227 cumulative[0] = 0;
228 for (int i=1; i<range; i++) {
229 cumulative[i] = cumulative[i-1] + freq[i-1];
230 }
231 cumulative[range] = denominator;
232 }
233
234 //The result buffer should be allocated outside the function
235 //The data must be non-negative integers less or equal to max_value
236 //The actual size of compressed buffer is returned in &result_size
237 //Initially result_size must contain size of result buffer
238 void Encode(int* source, int source_size, int max_value, int min_value, unsigned char* result, int& result_size) {
239
240 memset(result, 0x00, result_size);
241 current_byte = 0;
242 current_bit = 0;
243
244 if (min_value != 0) {
245 for (int i=0; i<source_size; i++) {
246 source[i] -= min_value;
247 }
248 }
249
250 //collecting frequencies
251 int range = (max_value + 1) - min_value + 1; //we add one more value for flag
252 int* frequency = (int*)malloc(range*sizeof(int));
253 int* cumulative = (int*)malloc((range+1)*sizeof(int));
254 int* buffer = (int*)malloc(range * sizeof(int));
255
256 int Denominator = source_size + 1; //we increased size by 1
257 unsigned base = 0;
258 while (Denominator > 1) {
259 Denominator >>= 1;
260 base++;
261 }
262 if (base > MAX_BASE)
263 base = MAX_BASE;
264 Denominator = (1 << base);
265
266 for (int i=0; i<range; ++i) {
267 buffer[i] = 1;
268 }
269 int cnt = 1; //start from 1 on purpose
270 for (int i=0; i<Denominator-range; ++i) {
271 buffer[cnt++]++;
272 if (cnt == range)
273 cnt = 1;
274 }
275
276 update_frequencies(buffer, frequency, cumulative, range, Denominator);
277
278 //saving frequencies in result buffer
279 memcpy(result, &range, 4);
280 memcpy(result + 4, &Denominator, 4);
281 current_byte += 8;
282 //data is ready
283
284 //encoding
285 make_look_ups();
286 int mask = 0xFFFFFFFF >> (32 - base);
287 //First 64 bits are not used for data, we use them for saving data_size and
288 //any other 32 bit value that we want to save
289 long long operation_buffer = source_size;
290 operation_buffer <<= 32;
291 //we save minimum value always as positive number and use last
292 //decimal position as sign indicator
293 int saved_min = min_value * 10;
294 if (saved_min < 0) {
295 saved_min = 1 - saved_min;
296 }
297 operation_buffer |= saved_min; //we use another 32 bits of first 64 bit buffer
298 /////////////////////
299 int product = 1;
300 int shift = 0;
301 cnt = 1;
302 for (int i=0; i<source_size; ++i) {
303
304 while (((operation_buffer << (base - shift)) & overflow_indicator) == overflow_indicator) {
305 printf("Possible buffer overflow is corrected at value %d\n", i);
306 //this is zero flag output in order to avoid buffer overflow
307 //rarely happen, cumulative[0] = 0, frequency[0] = 1
308 writeBits(operation_buffer, base-shift, result);
309 operation_buffer <<= (base - shift);
310 operation_buffer += cumulative[0] * product;
311 product = SafeProduct(product, frequency[0], mask, shift);
312 //in result of this operation shift = 0
313 }
314
315 writeBits(operation_buffer, base-shift, result);
316 operation_buffer <<= (base - shift);
317 operation_buffer += cumulative[source[i]+1] * product;
318 product = SafeProduct(product, frequency[source[i]+1], mask, shift);
319
320 //This is adoptive frequency updates
321 buffer[source[i]+1]++;
322 cnt++;
323 if (cnt == (Denominator-range)) {
324 update_frequencies(buffer, frequency, cumulative, range, Denominator);
325 cnt = 1;
326 }
327 }
328 //flushing remained 64 bits
329 writeBits(operation_buffer, 24, result);
330 operation_buffer <<= 24;
331 writeBits(operation_buffer, 24, result);
332 operation_buffer <<= 24;
333 writeBits(operation_buffer, 16, result);
334 operation_buffer <<= 16;
335 result_size = current_byte + 1;
336 //end encoding
337
338 if (min_value != 0) {
339 for (int i=0; i<source_size; i++) {
340 source[i] += min_value;
341 }
342 }
343
344 if (buffer)
345 free(buffer);
346
347 if (cumulative)
348 free(cumulative);
349
350 if (frequency)
351 free(frequency);
352 }
353
354 void prepare_symbols(int* cumulative, int range, int denominator, int* symbol) {
355
356 memset(symbol, 0x00, denominator*sizeof(int));
357 for (int k=0; k<range; ++k) {
358 for (int i=cumulative[k]; i<cumulative[k+1]; i++) {
359 symbol[i] = k;
360 }
361 }
362 }
363
364 //result buffer must be allocated outside of function
365 //result size must contain the correct size of the buffer
366 void Decode(unsigned char* code, int code_size, int* result, int& result_size) {
367
368 current_byte = 0;
369 current_bit = 0;
370
371 //reading frequencies
372 int range = 0;
373 int Denominator = 0;
374 memcpy(&range, code, 4);
375 memcpy(&Denominator, code + 4, 4);
376
377 int* frequency = (int*)malloc(range*sizeof(int));
378 int* cumulative = (int*)malloc((range+1)*sizeof(int));
379 int* buffer = (int*)malloc(range * sizeof(int));
380
381 current_byte += 8;
382 unsigned base = 0;
383 while (Denominator > 1) {
384 Denominator >>= 1;
385 base++;
386 }
387 Denominator = (1 << base);
388
389 for (int i=0; i<range; ++i) {
390 buffer[i] = 1;
391 }
392 int cnt = 1; //start from 1 on purpose
393 for (int i=0; i<Denominator-range; ++i) {
394 buffer[cnt++]++;
395 if (cnt == range)
396 cnt = 1;
397 }
398
399 int* symbol = (int*)malloc(Denominator*sizeof(int));
400 update_frequencies(buffer, frequency, cumulative, range, Denominator);
401 prepare_symbols(cumulative, range, Denominator, symbol);
402 //data is ready
403
404 //decoding
405 make_look_ups();
406 int mask = 0xFFFFFFFF >> (32 - base);
407 long long ID;
408 int product = 1;
409 int shift = 0;
410 int operation_buffer = 0;
411 //we skip first 64 bits they contain size and minimum value
412 operation_buffer = readBits(operation_buffer, 32, code);
413 result_size = (int)operation_buffer; //First 32 bits is data_size
414 operation_buffer = 0;
415 operation_buffer = readBits(operation_buffer, 32, code);
416 int min_value = (int)operation_buffer; //Second 32 bits is minimum value;
417 //we find sign according to our convention
418 if ((min_value % 10) > 0)
419 min_value = - min_value;
420 min_value /= 10;
421 operation_buffer = 0;
422 ////////////////////////////////////////
423 int counter = 0;
424 cnt = 1;
425 while (counter < result_size) {
426 operation_buffer = readBits(operation_buffer, base-shift, code);
427 ID = operation_buffer/product;
428 operation_buffer -= product * cumulative[symbol[ID]];
429 product = SafeProduct(product, frequency[symbol[ID]], mask, shift);
430 result[counter] = symbol[ID] + min_value - 1;
431 if (result[counter] >= min_value) {
432
433 buffer[symbol[ID]]++;
434 cnt++;
435 if (cnt == (Denominator-range)) {
436 update_frequencies(buffer, frequency, cumulative, range, Denominator);
437 prepare_symbols(cumulative, range, Denominator, symbol);
438 cnt = 1;
439 }
440 counter++;
441 }
442 }
443 //end decoding
444
445 if (buffer)
446 free(buffer);
447
448 if (symbol)
449 free(symbol);
450
451 if (cumulative)
452 free(cumulative);
453
454 if (frequency)
455 free(frequency);
456 }
457
458 #if 0
459
460 int main()
461 {
462 printf("Making data for round trip...\n");
463 int data_size = 1800000;
464 int min_data_value = -297;
465 int max_data_value = 4000;
466 int redundancy_factor = 10;
467
468 int* source = (int*)malloc(data_size * sizeof(int));
469 double entropy = MakeData(source, data_size, min_data_value, max_data_value, redundancy_factor);
470
471 int Bytes = (int)(ceil((double)(data_size) * entropy)/8.0);
472 printf("Data size = %d, alphabet = %d\n", data_size, max_data_value - min_data_value + 1);
473 printf("Entropy %5.3f, estimated compressed size %d bytes\n\n", entropy, Bytes);
474
475 SYSTEMTIME st;
476 GetSystemTime(&st);
477 printf("Encoding is started %d %d\n", st.wSecond, st.wMilliseconds);
478
479 int result_size = Bytes + Bytes/2 + (max_data_value - min_data_value) * 2 + 4 + 1024;
480 unsigned char* result = (unsigned char*)malloc(result_size * sizeof(unsigned char));
481
482 //Min and Max data values can be specified approximately they can make
483 //larger data segment, for example Max value can be passed as larger and Min
484 //value can be passed as smaller number. Replace the function below by commented
485 //out example and it will work in the same way.
486 //Encode(source, data_size, max_data_value + 150, min_data_value - 140, result, result_size);
487 Encode(source, data_size, max_data_value, min_data_value, result, result_size);
488
489 GetSystemTime(&st);
490 printf("Encoding is finished %d %d\n\n", st.wSecond, st.wMilliseconds);
491 printf("Actual size of compressed buffer with frequencies %d bytes\n\n", result_size);
492
493 GetSystemTime(&st);
494 printf("Decoding is started %d %d\n", st.wSecond, st.wMilliseconds);
495 int test_data_size = data_size + data_size/2;
496 int* test_data = (int*)malloc(test_data_size*sizeof(int));
497 Decode(result, result_size, test_data, test_data_size);
498
499 GetSystemTime(&st);
500 printf("Decoding is finished %d %d\n\n", st.wSecond, st.wMilliseconds);
501
502 bool error_occur = false;
503 if (test_data_size != data_size) {
504 error_occur = true;
505 }
506 else {
507 for (int i=0; i<data_size; i++) {
508 if (test_data[i] != source[i]) {
509 error_occur = true;
510 printf("Mismatch in %d, %d %d\n", i, test_data[i], source[i]);
511 break;
512 }
513 }
514 }
515
516 if (error_occur == false) {
517 printf("Round trip is correct, 100 percent match\n\n");
518 }
519 else {
520 printf("Round trip test failed, data mismatch\n\n");
521 }
522
523 if (test_data)
524 free(test_data);
525
526 if (result)
527 free(result);
528
529 if (source)
530 free(source);
531
532 return 0;
533 }
534
535 #endif

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26