Develop and Download Open Source Software

Browse Subversion Repository

Contents of /RanCode.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 15 - (show annotations) (download) (as text)
Mon Sep 7 14:51:32 2009 UTC (14 years, 8 months ago) by berupon
File MIME type: text/x-chdr
File size: 15209 byte(s)
01領域のフラグをRangeCoderで圧縮
Golomb-Rice Coder撤去

1 // RanCode.cpp : Defines the entry point for the console application.
2 //
3 //This is DEMO version of arithmetic/range encoder written for research purposes by
4 //Andrew Polar (www.ezcodesample.com, Jan. 10, 2007). The algorithm was published by
5 //G.N.N. Martin. In March 1979. Video & Data Recording Conference, Southampton, UK.
6 //The program was tested for many statistically different data samples, however you
7 //use it on own risk. Author do not accept any responsibility for use or misuse of
8 //this program. Any data sample that cause crash or wrong result can be sent to author
9 //for research. The e-mail may be obtained by WHOIS www.ezcodesample.com.
10 //The correction of July 03, 2007. The processing of non-zero minimum has been added.
11 //User can pass data with non-zero minimum value, however the min/max limits must be
12 //specified in function call.
13 //The correction of August 17, 2007. The additional means of computational statbility
14 //has been added. As it was discovered and investigated by Bill Dorsey in beta-testing
15 //the addition operation must have treatment for carry propagation. The solution is
16 //to normalize data to vary from 1 to max-min+1 and use value 0 as a means of output
17 //of the head of operation_buffer bits. This zero value is used as synchronization
18 //flag for output buffer and simply excluded from decoded stream.
19
20 #include <stdio.h>
21 #include <math.h>
22 #include <stdlib.h>
23 #include <memory.h>
24 #include <time.h>
25 //#include <windows.h>
26
27 ///////////////////////////////////////////////////////////////////////
28 // Test data preparation
29 ///////////////////////////////////////////////////////////////////////
30
31 double entropy24(int* data, int size) {
32
33 int max_size = 1 << 24;
34 int min, counter;
35 int* buffer;
36 double entropy;
37 double log2 = log(2.0);
38 double prob;
39
40 min = data[0];
41 for (counter=0; counter<size; counter++) {
42 if (data[counter] < min)
43 min = data[counter];
44 }
45
46 for (counter=0; counter<size; counter++) {
47 data[counter] -= min;
48 }
49
50 buffer = (int*)malloc(max_size*sizeof(int));
51 memset(buffer, 0x00, max_size*sizeof(int));
52 for (counter=0; counter<size; counter++) {
53 buffer[data[counter]]++;
54 }
55
56 entropy = 0.0;
57 for (counter=0; counter<max_size; counter++) {
58 if (buffer[counter] > 0) {
59 prob = (double)buffer[counter];
60 prob /= (double)size;
61 entropy += log(prob)/log2*prob;
62 }
63 }
64 entropy *= -1.0;
65
66 for (counter=0; counter<size; counter++) {
67 data[counter] += min;
68 }
69
70 if (buffer)
71 free(buffer);
72
73 return entropy;
74 }
75
76 double round(double x) {
77 if ((x - floor(x)) >= 0.5)
78 return ceil(x);
79 else
80 return floor(x);
81 }
82
83 double MakeData(int* data, int data_size, int min, int max, int redundancy_factor) {
84
85 int counter, cnt, high, low;
86 double buf;
87
88 if (redundancy_factor <= 1)
89 redundancy_factor = 1;
90
91 if (max <= min)
92 max = min + 1;
93
94 srand((unsigned)time(0));
95 for (counter=0; counter<data_size; counter++) {
96 buf = 0;
97 for (cnt=0; cnt<redundancy_factor; cnt++) {
98 buf += (double)rand();
99 }
100 data[counter] = ((int)buf)/redundancy_factor;
101 }
102
103 low = data[0];
104 high = data[0];
105 for (counter=0; counter<data_size; counter++) {
106 if (data[counter] > high)
107 high = data[counter];
108 if (data[counter] < low)
109 low = data[counter];
110 }
111
112 for (counter=0; counter<data_size; counter++) {
113 buf = (double)(data[counter] - low);
114 buf /= (double)(high - low);
115 buf *= (double)(max - min);
116 buf = round(buf);
117 data[counter] = (int)buf + min;
118 }
119
120 return entropy24(data, data_size);
121 }
122
123 ///////////////////////////////////////////////////////////////////
124 // End of data preparation start of encoding, decoding functions
125 ///////////////////////////////////////////////////////////////////
126
127 const unsigned MAX_BASE = 15; //value is optional but 15 is recommended
128 unsigned current_byte; //position of current byte in result data
129 unsigned char current_bit; //postion of current bit in result data
130
131 //Some look up tables for fast processing
132 static int output_mask[8][32];
133 static unsigned char bytes_plus [8][64];
134
135 static unsigned char edge_mask[8] = {0xff, 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
136 static unsigned char shift_table[8] = {24, 16, 8, 0};
137
138 static long long overflow_indicator = 0xffffffffffffffff << (MAX_BASE * 2 - 1);
139
140 void make_look_ups() {
141
142 int sign_mask[8] = {
143 0xffffffff, 0x7fffffff, 0x3fffffff, 0x1fffffff,
144 0x0fffffff, 0x07ffffff, 0x03ffffff, 0x01ffffff
145 };
146
147 for (int i=0; i<8; i++) {
148 for (int j=0; j<32; j++) {
149 output_mask[i][j] = sign_mask[i];
150 output_mask[i][j] >>= (32-i-j);
151 output_mask[i][j] <<= (32-i-j);
152 }
153 }
154
155 for (int i=0; i<8; i++) {
156 for (int j=0; j<64; j++) {
157 bytes_plus[i][j] = j/8;
158 if ((i + j%8) > 7)
159 ++bytes_plus[i][j];
160 }
161 }
162 }
163
164 //The key function that takes product of x*y and convert it to
165 //mantissa and exponent. Mantissa have number of bits equal to
166 //length of the mask.
167 inline int SafeProduct(int x, int y, int mask, int& shift) {
168
169 int p = x * y;
170 if ((p >> shift) > mask) {
171 p >>= shift;
172 while (p > mask) {
173 p >>= 1;
174 ++shift;
175 }
176 }
177 else {
178 while (shift >= 0) {
179 --shift;
180 if ((p >> shift) > mask) {
181 break;
182 }
183 }
184 ++shift;
185 p >>= shift;
186 }
187 return p;
188 }
189
190 inline int readBits(int operation_buffer, int bits, unsigned char* code) {
191
192 unsigned end_byte = current_byte + bytes_plus[current_bit][bits];
193 int buffer = (code[current_byte] & edge_mask[current_bit]);
194 unsigned char total_bits = 8 - current_bit;
195 for (unsigned i=current_byte+1; i<=end_byte; ++i) {
196 (buffer <<= 8) |= code[i];
197 total_bits += 8;
198 }
199 buffer >>= (total_bits - bits);
200 operation_buffer <<= bits;
201 operation_buffer |= buffer;
202 current_byte = end_byte;
203 current_bit = (bits + current_bit)%8;
204 return operation_buffer;
205 }
206
207 inline void writeBits(long long operation_buffer, int bits, unsigned char* result) {
208
209 int buffer = (int)((operation_buffer >> current_bit) >> 32);
210 buffer &= output_mask[current_bit][bits];
211 unsigned bytes_plus2 = bytes_plus[current_bit][bits];
212 current_bit = (bits + current_bit)%8;
213 for (unsigned i=0; i<=bytes_plus2; ++i) {
214 result[current_byte + i] |= (buffer >> shift_table[i]);
215 }
216 current_byte += bytes_plus2;
217 }
218
219 //The result buffer should be allocated outside the function
220 //The data must be non-negative integers less or equal to max_value
221 //The actual size of compressed buffer is returned in &result_size
222 //Initially result_size must contain size of result buffer
223 void Encode(int* source, int source_size, int max_value, int min_value, unsigned char* result, int& result_size) {
224
225 memset(result, 0x00, result_size);
226 current_byte = 0;
227 current_bit = 0;
228
229 if (min_value != 0) {
230 for (int i=0; i<source_size; i++) {
231 source[i] -= min_value;
232 }
233 }
234
235 //collecting frequencies
236 int range = (max_value + 1) - min_value + 1; //we add one more value for flag
237 int* frequency = (int*)malloc(range*sizeof(int));
238 memset(frequency, 0x00, range * sizeof(int));
239 for (int i=0; i<source_size; ++i)
240 ++frequency[source[i]+1]; //we make source vary from 1 to max-min+1
241 frequency[0] = 1; //we use it as flag, virtual 0 value occured once
242
243 //correction by bringing them in certain range
244 int Denominator = source_size + 1; //we increased size by 1
245 unsigned base = 0;
246 while (Denominator > 1) {
247 Denominator >>= 1;
248 base++;
249 }
250 if (base > MAX_BASE)
251 base = MAX_BASE;
252 Denominator = (1 << base);
253
254 double coeff = (double)(Denominator)/((double)(source_size));
255 for (int i=1; i<range; i++) { //we start from 1 on purpose
256 if (frequency[i] > 0) {
257 double ff = (double)(frequency[i]);
258 ff *= coeff;
259 frequency[i] = (int)(ff);
260 if (frequency[i] == 0)
261 frequency[i] = 1;
262 }
263 }
264 int sm = 0;
265 for (int i=0; i<range; i++) {
266 sm += frequency[i];
267 }
268 int diff = Denominator - sm;
269 if (diff > 0) {
270 int mx = frequency[0];
271 int ps = 0;
272 for (int i=0; i<range; i++) {
273 if (frequency[i] > mx) {
274 mx = frequency[i];
275 ps = i;
276 }
277 }
278 frequency[ps] += diff;
279 }
280 if (diff < 0) {
281 int i = 0;
282 while (diff < 0) {
283 if (frequency[i] > 1) {
284 frequency[i]--;
285 diff++;
286 }
287 i++;
288 if (i == (range-1))
289 i = 0;
290 }
291 }
292 //end of correction
293
294 //saving frequencies in result buffer
295 int offset = 0;
296 memcpy(result + offset, &range, 4);
297 offset += 4;
298 for (int m=0; m<range; ++m) {
299 memcpy(result + offset, &frequency[m], 2);
300 offset += 2;
301 }
302 current_byte += offset;
303
304 //making cumulative frequencies
305 int* cumulative = (int*)malloc((range+1)*sizeof(int));
306 memset(cumulative, 0x00, (range+1)*sizeof(int));
307 cumulative[0] = 0;
308 for (int i=1; i<range; i++) {
309 cumulative[i] = cumulative[i-1] + frequency[i-1];
310 }
311 cumulative[range] = Denominator;
312 //data is ready
313
314 //encoding
315 make_look_ups();
316 int mask = 0xFFFFFFFF >> (32 - base);
317 //First 64 bits are not used for data, we use them for saving data_size and
318 //any other 32 bit value that we want to save
319 long long operation_buffer = source_size;
320 operation_buffer <<= 32;
321 //we save minimum value always as positive number and use last
322 //decimal position as sign indicator
323 int saved_min = min_value * 10;
324 if (saved_min < 0) {
325 saved_min = 1 - saved_min;
326 }
327 operation_buffer |= saved_min; //we use another 32 bits of first 64 bit buffer
328 /////////////////////
329 int product = 1;
330 int shift = 0;
331 for (int i=0; i<source_size; ++i) {
332
333 while (((operation_buffer << (base - shift)) & overflow_indicator) == overflow_indicator) {
334 printf("Possible buffer overflow is corrected at value %d\n", i);
335 //this is zero flag output in order to avoid buffer overflow
336 //rarely happen, cumulative[0] = 0, frequency[0] = 1
337 writeBits(operation_buffer, base-shift, result);
338 operation_buffer <<= (base - shift);
339 operation_buffer += cumulative[0] * product;
340 product = SafeProduct(product, frequency[0], mask, shift);
341 //in result of this operation shift = 0
342 }
343
344 writeBits(operation_buffer, base-shift, result);
345 operation_buffer <<= (base - shift);
346 operation_buffer += cumulative[source[i]+1] * product;
347 product = SafeProduct(product, frequency[source[i]+1], mask, shift);
348 }
349 //flushing remained 64 bits
350 writeBits(operation_buffer, 24, result);
351 operation_buffer <<= 24;
352 writeBits(operation_buffer, 24, result);
353 operation_buffer <<= 24;
354 writeBits(operation_buffer, 16, result);
355 operation_buffer <<= 16;
356 result_size = current_byte + 1;
357 //end encoding
358
359 if (min_value != 0) {
360 for (int i=0; i<source_size; i++) {
361 source[i] += min_value;
362 }
363 }
364
365 if (cumulative)
366 free(cumulative);
367
368 if (frequency)
369 free(frequency);
370 }
371
372 //result buffer must be allocated outside of function
373 //result size must contain the correct size of the buffer
374 void Decode(unsigned char* code, int code_size, int* result, int& result_size) {
375
376 current_byte = 0;
377 current_bit = 0;
378
379 //reading frequencies
380 int range = 0;
381 memcpy(&range, code, 4);
382
383 short* frequency = (short*)malloc(range * sizeof(short));
384 memcpy(frequency, code + 4, range * 2);
385 int Denominator = 0;
386 for (int i=0; i<range; ++i) {
387 Denominator += frequency[i];
388 }
389 current_byte += range * 2 + 4;
390 unsigned base = 0;
391 while (Denominator > 1) {
392 Denominator >>= 1;
393 base++;
394 }
395 Denominator = (1 << base);
396
397 //making cumulative frequencies
398 int* cumulative = (int*)malloc((range+1)*sizeof(int));
399 memset(cumulative, 0x00, (range+1)*sizeof(int));
400 cumulative[0] = 0;
401 for (int i=1; i<range; i++) {
402 cumulative[i] = cumulative[i-1] + frequency[i-1];
403 }
404 cumulative[range] = Denominator;
405 //data is ready
406
407 //prepare data for decoding
408 int* symbol = (int*)malloc(Denominator*sizeof(int));
409 memset(symbol, 0x00, Denominator*sizeof(int));
410 for (int k=0; k<range; ++k) {
411 for (int i=cumulative[k]; i<cumulative[k+1]; i++) {
412 symbol[i] = k;
413 }
414 }
415 //data is ready
416
417 //decoding
418 make_look_ups();
419 int mask = 0xFFFFFFFF >> (32 - base);
420 long long ID;
421 int product = 1;
422 int shift = 0;
423 int operation_buffer = 0;
424 //we skip first 64 bits they contain size and minimum value
425 operation_buffer = readBits(operation_buffer, 32, code);
426 result_size = (int)operation_buffer; //First 32 bits is data_size
427 operation_buffer = 0;
428 operation_buffer = readBits(operation_buffer, 32, code);
429 int min_value = (int)operation_buffer; //Second 32 bits is minimum value;
430 //we find sign according to our convention
431 if ((min_value % 10) > 0)
432 min_value = - min_value;
433 min_value /= 10;
434 operation_buffer = 0;
435 ////////////////////////////////////////
436 int cnt = 0;
437 while (cnt < result_size) {
438 operation_buffer = readBits(operation_buffer, base-shift, code);
439 ID = operation_buffer/product;
440 assert(0<=ID && ID<=Denominator);
441 operation_buffer -= product * cumulative[symbol[ID]];
442 product = SafeProduct(product, frequency[symbol[ID]], mask, shift);
443 result[cnt] = symbol[ID] + min_value - 1;
444 if (result[cnt] >= min_value)
445 cnt++;
446 }
447 //end decoding
448
449 if (symbol)
450 free(symbol);
451
452 if (cumulative)
453 free(cumulative);
454
455 if (frequency)
456 free(frequency);
457 }
458
459 #if 0
460
461 int main()
462 {
463 printf("Making data for round trip...\n");
464 int data_size = 1800000;
465 int min_data_value = -297;
466 int max_data_value = 4000;
467 int redundancy_factor = 10;
468
469 int* source = (int*)malloc(data_size * sizeof(int));
470 double entropy = MakeData(source, data_size, min_data_value, max_data_value, redundancy_factor);
471
472 int Bytes = (int)(ceil((double)(data_size) * entropy)/8.0);
473 printf("Data size = %d, alphabet = %d\n", data_size, max_data_value - min_data_value + 1);
474 printf("Entropy %5.3f, estimated compressed size %d bytes\n\n", entropy, Bytes);
475
476 SYSTEMTIME st;
477 GetSystemTime(&st);
478 printf("Encoding is started %d %d\n", st.wSecond, st.wMilliseconds);
479
480 int result_size = Bytes + Bytes/2 + (max_data_value - min_data_value) * 2 + 4 + 1024;
481 unsigned char* result = (unsigned char*)malloc(result_size * sizeof(unsigned char));
482
483 //Min and Max data values can be specified approximately they can make
484 //larger data segment, for example Max value can be passed as larger and Min
485 //value can be passed as smaller number. Replace the function below by commented
486 //out example and it will work in the same way.
487 //Encode(source, data_size, max_data_value + 150, min_data_value - 140, result, result_size);
488 Encode(source, data_size, max_data_value, min_data_value, result, result_size);
489
490 GetSystemTime(&st);
491 printf("Encoding is finished %d %d\n\n", st.wSecond, st.wMilliseconds);
492 printf("Actual size of compressed buffer with frequencies %d bytes\n\n", result_size);
493
494 GetSystemTime(&st);
495 printf("Decoding is started %d %d\n", st.wSecond, st.wMilliseconds);
496 int test_data_size = data_size + data_size/2;
497 int* test_data = (int*)malloc(test_data_size*sizeof(int));
498 Decode(result, result_size, test_data, test_data_size);
499
500 GetSystemTime(&st);
501 printf("Decoding is finished %d %d\n\n", st.wSecond, st.wMilliseconds);
502
503 bool error_occur = false;
504 if (test_data_size != data_size) {
505 error_occur = true;
506 }
507 else {
508 for (int i=0; i<data_size; i++) {
509 if (test_data[i] != source[i]) {
510 error_occur = true;
511 printf("Mismatch in %d, %d %d\n", i, test_data[i], source[i]);
512 break;
513 }
514 }
515 }
516
517 if (error_occur == false) {
518 printf("Round trip is correct, 100 percent match\n\n");
519 }
520 else {
521 printf("Round trip test failed, data mismatch\n\n");
522 }
523
524 if (test_data)
525 free(test_data);
526
527 if (result)
528 free(result);
529
530 if (source)
531 free(source);
532
533 return 0;
534 }
535
536 #endif

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26