• R/O
  • SSH
  • HTTPS

jyeipegyuu: Commit


Commit MetaInfo

Revision14 (tree)
Time2009-09-07 17:07:15
Authorberupon

Log Message

bzip2からRangeCoderに切り替えて圧縮率向上。

Change Summary

Incremental Difference

--- Compressor/BZip2Compressor.cpp (revision 13)
+++ Compressor/BZip2Compressor.cpp (nonexistent)
@@ -1,31 +0,0 @@
1-#include "stdafx.h"
2-
3-#include "BZip2Compressor.h"
4-
5-#include "bzlib.h"
6-#pragma comment(lib, "libbz2.lib")
7-
8-size_t BZip2Compressor::Compress(
9- const unsigned char* src,
10- size_t srcLen,
11- unsigned char* dest,
12- size_t destLen
13-)
14-{
15- size_t compressedLen = destLen;
16- int ret = BZ2_bzBuffToBuffCompress((char*)dest, &compressedLen, (char*)src, srcLen, 9, 0, 30);
17- return compressedLen;
18-}
19-
20-size_t BZip2Compressor::Decompress(
21- const unsigned char* src,
22- size_t srcLen,
23- unsigned char* dest,
24- size_t destLen
25-)
26-{
27- size_t decompressedLen = destLen;
28- int ret = BZ2_bzBuffToBuffDecompress((char*)dest, &decompressedLen, (char*)src, srcLen, 0, 0);
29- return decompressedLen;
30-}
31-
--- Compressor/bzlib.h (revision 13)
+++ Compressor/bzlib.h (nonexistent)
@@ -1,282 +0,0 @@
1-
2-/*-------------------------------------------------------------*/
3-/*--- Public header file for the library. ---*/
4-/*--- bzlib.h ---*/
5-/*-------------------------------------------------------------*/
6-
7-/* ------------------------------------------------------------------
8- This file is part of bzip2/libbzip2, a program and library for
9- lossless, block-sorting data compression.
10-
11- bzip2/libbzip2 version 1.0.5 of 10 December 2007
12- Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
13-
14- Please read the WARNING, DISCLAIMER and PATENTS sections in the
15- README file.
16-
17- This program is released under the terms of the license contained
18- in the file LICENSE.
19- ------------------------------------------------------------------ */
20-
21-
22-#ifndef _BZLIB_H
23-#define _BZLIB_H
24-
25-#ifdef __cplusplus
26-extern "C" {
27-#endif
28-
29-#define BZ_RUN 0
30-#define BZ_FLUSH 1
31-#define BZ_FINISH 2
32-
33-#define BZ_OK 0
34-#define BZ_RUN_OK 1
35-#define BZ_FLUSH_OK 2
36-#define BZ_FINISH_OK 3
37-#define BZ_STREAM_END 4
38-#define BZ_SEQUENCE_ERROR (-1)
39-#define BZ_PARAM_ERROR (-2)
40-#define BZ_MEM_ERROR (-3)
41-#define BZ_DATA_ERROR (-4)
42-#define BZ_DATA_ERROR_MAGIC (-5)
43-#define BZ_IO_ERROR (-6)
44-#define BZ_UNEXPECTED_EOF (-7)
45-#define BZ_OUTBUFF_FULL (-8)
46-#define BZ_CONFIG_ERROR (-9)
47-
48-typedef
49- struct {
50- char *next_in;
51- unsigned int avail_in;
52- unsigned int total_in_lo32;
53- unsigned int total_in_hi32;
54-
55- char *next_out;
56- unsigned int avail_out;
57- unsigned int total_out_lo32;
58- unsigned int total_out_hi32;
59-
60- void *state;
61-
62- void *(*bzalloc)(void *,int,int);
63- void (*bzfree)(void *,void *);
64- void *opaque;
65- }
66- bz_stream;
67-
68-
69-#ifndef BZ_IMPORT
70-#define BZ_EXPORT
71-#endif
72-
73-#ifndef BZ_NO_STDIO
74-/* Need a definitition for FILE */
75-#include <stdio.h>
76-#endif
77-
78-#ifdef _WIN32
79-# include <windows.h>
80-# ifdef small
81- /* windows.h define small to char */
82-# undef small
83-# endif
84-# ifdef BZ_EXPORT
85-# define BZ_API(func) WINAPI func
86-# define BZ_EXTERN extern
87-# else
88- /* import windows dll dynamically */
89-# define BZ_API(func) (WINAPI * func)
90-# define BZ_EXTERN
91-# endif
92-#else
93-# define BZ_API(func) func
94-# define BZ_EXTERN extern
95-#endif
96-
97-
98-/*-- Core (low-level) library functions --*/
99-
100-BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
101- bz_stream* strm,
102- int blockSize100k,
103- int verbosity,
104- int workFactor
105- );
106-
107-BZ_EXTERN int BZ_API(BZ2_bzCompress) (
108- bz_stream* strm,
109- int action
110- );
111-
112-BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
113- bz_stream* strm
114- );
115-
116-BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
117- bz_stream *strm,
118- int verbosity,
119- int small
120- );
121-
122-BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
123- bz_stream* strm
124- );
125-
126-BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
127- bz_stream *strm
128- );
129-
130-
131-
132-/*-- High(er) level library functions --*/
133-
134-#ifndef BZ_NO_STDIO
135-#define BZ_MAX_UNUSED 5000
136-
137-typedef void BZFILE;
138-
139-BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
140- int* bzerror,
141- FILE* f,
142- int verbosity,
143- int small,
144- void* unused,
145- int nUnused
146- );
147-
148-BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
149- int* bzerror,
150- BZFILE* b
151- );
152-
153-BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
154- int* bzerror,
155- BZFILE* b,
156- void** unused,
157- int* nUnused
158- );
159-
160-BZ_EXTERN int BZ_API(BZ2_bzRead) (
161- int* bzerror,
162- BZFILE* b,
163- void* buf,
164- int len
165- );
166-
167-BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
168- int* bzerror,
169- FILE* f,
170- int blockSize100k,
171- int verbosity,
172- int workFactor
173- );
174-
175-BZ_EXTERN void BZ_API(BZ2_bzWrite) (
176- int* bzerror,
177- BZFILE* b,
178- void* buf,
179- int len
180- );
181-
182-BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
183- int* bzerror,
184- BZFILE* b,
185- int abandon,
186- unsigned int* nbytes_in,
187- unsigned int* nbytes_out
188- );
189-
190-BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
191- int* bzerror,
192- BZFILE* b,
193- int abandon,
194- unsigned int* nbytes_in_lo32,
195- unsigned int* nbytes_in_hi32,
196- unsigned int* nbytes_out_lo32,
197- unsigned int* nbytes_out_hi32
198- );
199-#endif
200-
201-
202-/*-- Utility functions --*/
203-
204-BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
205- char* dest,
206- unsigned int* destLen,
207- char* source,
208- unsigned int sourceLen,
209- int blockSize100k,
210- int verbosity,
211- int workFactor
212- );
213-
214-BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
215- char* dest,
216- unsigned int* destLen,
217- char* source,
218- unsigned int sourceLen,
219- int small,
220- int verbosity
221- );
222-
223-
224-/*--
225- Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp)
226- to support better zlib compatibility.
227- This code is not _officially_ part of libbzip2 (yet);
228- I haven't tested it, documented it, or considered the
229- threading-safeness of it.
230- If this code breaks, please contact both Yoshioka and me.
231---*/
232-
233-BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
234- void
235- );
236-
237-#ifndef BZ_NO_STDIO
238-BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
239- const char *path,
240- const char *mode
241- );
242-
243-BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
244- int fd,
245- const char *mode
246- );
247-
248-BZ_EXTERN int BZ_API(BZ2_bzread) (
249- BZFILE* b,
250- void* buf,
251- int len
252- );
253-
254-BZ_EXTERN int BZ_API(BZ2_bzwrite) (
255- BZFILE* b,
256- void* buf,
257- int len
258- );
259-
260-BZ_EXTERN int BZ_API(BZ2_bzflush) (
261- BZFILE* b
262- );
263-
264-BZ_EXTERN void BZ_API(BZ2_bzclose) (
265- BZFILE* b
266- );
267-
268-BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
269- BZFILE *b,
270- int *errnum
271- );
272-#endif
273-
274-#ifdef __cplusplus
275-}
276-#endif
277-
278-#endif
279-
280-/*-------------------------------------------------------------*/
281-/*--- end bzlib.h ---*/
282-/*-------------------------------------------------------------*/
--- Compressor/BZip2Compressor.h (revision 13)
+++ Compressor/BZip2Compressor.h (nonexistent)
@@ -1,24 +0,0 @@
1-#pragma once
2-
3-#include "ICompressor.h"
4-
5-class BZip2Compressor : public ICompressor
6-{
7-public:
8- // overrides
9- size_t Compress(
10- const unsigned char* src,
11- size_t srcLen,
12- unsigned char* dest,
13- size_t destLen
14- );
15-
16- size_t Decompress(
17- const unsigned char* src,
18- size_t srcLen,
19- unsigned char* dest,
20- size_t destLen
21- );
22-
23-};
24-
--- Compressor/ICompressor.h (revision 13)
+++ Compressor/ICompressor.h (nonexistent)
@@ -1,20 +0,0 @@
1-#pragma once
2-
3-class ICompressor
4-{
5-public:
6- virtual size_t Compress(
7- const unsigned char* src,
8- size_t srcLen,
9- unsigned char* dest,
10- size_t destLen
11- ) = 0;
12-
13- virtual size_t Decompress(
14- const unsigned char* src,
15- size_t srcLen,
16- unsigned char* dest,
17- size_t destLen
18- ) = 0;
19-};
20-
--- misc.h (revision 13)
+++ misc.h (revision 14)
@@ -1,4 +1,7 @@
11
2+#include "RanCode.h"
3+//#include "RanCodeAdp.h"
4+
25 template <typename T, typename T2>
36 void gather(
47 const T* in, int lineOffsetBytes,
@@ -248,17 +251,19 @@
248251 int zeroRepeatHist[1024*4];
249252 unsigned char riceCoderParam;
250253
254+ size_t zeroOneOnlyAreaHist[2];
255+ size_t nonZeroOneOnlyAreaHist[1024];
256+
257+
251258 size_t srcCount;
252259 size_t signFlagsCount;
253260 size_t initialCompressedLen;
254261 size_t riceCodedLen;
255- size_t compressedLen;
256262 size_t repeationCompressedLen;
257263 size_t totalLen;
258264 };
259265
260266 size_t compressSub(
261- ICompressor& compressor,
262267 int* src,
263268 const unsigned char* pZeroOneInfos,
264269 size_t blockCount, size_t blockSize,
@@ -265,6 +270,7 @@
265270 unsigned char* dest, size_t destLen,
266271 unsigned char* tmp,
267272 unsigned char* tmp2,
273+ unsigned char* tmp3,
268274 CompressInfo& cinfo
269275 )
270276 {
@@ -281,6 +287,56 @@
281287 int* phists = cinfo.phists;
282288 int* mhists = cinfo.mhists;
283289
290+ int initialCompressedLen = srcCount * 4;
291+ Encode(src, srcCount, max, 0, tmp, initialCompressedLen);
292+
293+ int encodedValueSizes[2];
294+ int* from = src;
295+ if (pZeroOneInfos) {
296+ std::vector<int> values;
297+ for (size_t i=0; i<blockCount; ++i) {
298+ if (pZeroOneInfos[i]) {
299+ for (size_t j=0; j<blockSize; ++j) {
300+ int val = src[i*blockSize+j];
301+ cinfo.zeroOneOnlyAreaHist[val]++;
302+ values.push_back(val);
303+ }
304+ }
305+ }
306+ encodedValueSizes[0] = blockCount*blockSize;
307+ Encode(&values[0], values.size(), 1, 0, tmp2, encodedValueSizes[0]);
308+
309+ values.clear();
310+ int maxV = 0;
311+ for (size_t i=0; i<blockCount; ++i) {
312+ if (!pZeroOneInfos[i]) {
313+ for (size_t j=0; j<blockSize; ++j) {
314+ int val = src[i*blockSize+j];
315+ maxV = std::max(maxV, val);
316+ cinfo.nonZeroOneOnlyAreaHist[val]++;
317+ values.push_back(val);
318+ }
319+ }
320+ }
321+ encodedValueSizes[1] = blockCount*blockSize;
322+ Encode(&values[0], values.size(), maxV, 0, tmp3, encodedValueSizes[1]);
323+
324+ int totalSize = encodedValueSizes[0] + encodedValueSizes[1];
325+ size_t len = 0;
326+ if (totalSize <= initialCompressedLen) {
327+ *dest++ = 3;
328+
329+ for (size_t i=0; i<2; ++i) {
330+ *((size_t*)dest) = encodedValueSizes[i];
331+ dest += 4;
332+ memcpy(dest, ((i==0)?tmp2:tmp3), encodedValueSizes[i]);
333+ dest += encodedValueSizes[i];
334+ }
335+
336+ goto label_end;
337+ }
338+
339+ }
284340 int b = 0;
285341
286342 if (max > 2048) {
@@ -301,49 +357,22 @@
301357 b = 0;
302358 }
303359
304- size_t initialCompressedLen = compressor.Compress((const unsigned char*)src, srcCount*4, tmp, -1);
305-
360+ RiceCoder riceCoder(b);
306361 BitWriter bitWriter(tmp2);
307- RiceCoder riceCoder(b);
308-
309- int* from = src;
310- if (pZeroOneInfos) {
311- for (size_t i=0; i<blockCount; ++i) {
312- if (pZeroOneInfos[i]) {
313- for (size_t j=0; j<blockSize; ++j) {
314- bitWriter.putBit(src[i*blockSize+j] == 0);
315- }
316- }
362+ for (size_t i=0; i<blockCount; ++i) {
363+ for (size_t j=0; j<blockSize; ++j) {
364+ riceCoder.Encode(*from++, bitWriter);
317365 }
318- for (size_t i=0; i<blockCount; ++i) {
319- if (!pZeroOneInfos[i]) {
320- for (size_t j=0; j<blockSize; ++j) {
321- riceCoder.Encode(src[i*blockSize+j], bitWriter);
322- }
323- }
324- }
325- }else {
326- for (size_t i=0; i<blockCount; ++i) {
327- for (size_t j=0; j<blockSize; ++j) {
328- riceCoder.Encode(*from++, bitWriter);
329- }
330- }
331366 }
332367 size_t riceCodedLen = bitWriter.nBytes();
333- size_t compressedLen = compressedLen = compressor.Compress(tmp2, riceCodedLen, dest+6, -1);
334- unsigned char* dest2 = (compressedLen < initialCompressedLen) ? tmp : (dest+6);
335- size_t repeationCompressedLen = repeationCompress(tmp2, riceCodedLen, dest2);
368+ size_t repeationCompressedLen = repeationCompress(tmp2, riceCodedLen, dest+6);
336369
337370 size_t len = 0;
338- if (initialCompressedLen < compressedLen && initialCompressedLen < riceCodedLen && initialCompressedLen < repeationCompressedLen) {
371+ if (initialCompressedLen < riceCodedLen && initialCompressedLen < repeationCompressedLen) {
339372 *dest++ = 1;
340373 *dest++ = RiceCoderFlag::None;
341374 len = initialCompressedLen;
342375 memcpy(dest+4, tmp, len);
343- }else if (compressedLen < riceCodedLen && compressedLen < repeationCompressedLen) {
344- *dest++ = 1;
345- *dest++ = b;
346- len = compressedLen;
347376 }else if (riceCodedLen < repeationCompressedLen) {
348377 *dest++ = 0;
349378 *dest++ = b;
@@ -353,12 +382,16 @@
353382 *dest++ = 2;
354383 *dest++ = b;
355384 len = repeationCompressedLen;
356- memcpy(dest+4, dest2, len);
357385 }
358386 *((size_t*)dest) = len;
359387 dest += 4;
360388 dest += len;
361389
390+ cinfo.riceCoderParam = b;
391+ cinfo.riceCodedLen = riceCodedLen;
392+ cinfo.repeationCompressedLen = repeationCompressedLen;
393+
394+label_end:
362395 size_t destDiff = dest - initialDest;
363396
364397 cinfo.srcCount = srcCount;
@@ -365,11 +398,7 @@
365398 cinfo.mini = mini;
366399 cinfo.maxi = maxi;
367400 cinfo.max = max;
368- cinfo.riceCoderParam = b;
369401 cinfo.initialCompressedLen = initialCompressedLen;
370- cinfo.riceCodedLen = riceCodedLen;
371- cinfo.compressedLen = compressedLen;
372- cinfo.repeationCompressedLen = repeationCompressedLen;
373402 cinfo.totalLen = destDiff;
374403
375404 return destDiff;
@@ -533,7 +562,6 @@
533562 }
534563
535564 size_t compress(
536- ICompressor& compressor,
537565 size_t hBlockCount,
538566 size_t vBlockCount,
539567 const unsigned char* pZeroOneInfos,
@@ -554,7 +582,7 @@
554582 for (size_t i=0; i<8; ++i) {
555583 const unsigned char* p01 = (i < zeroOneLimit) ? 0 : pZeroOneInfos;
556584 size_t blockSize = 1 + i * 2;
557- progress += compressSub(compressor, from, p01, totalBlockCount, blockSize, dest+progress, destLen-progress, tmp1, tmp2, compressInfos[i]);
585+ progress += compressSub(from, p01, totalBlockCount, blockSize, dest+progress, destLen-progress, tmp1, tmp2, tmp3, compressInfos[i]);
558586 from += totalBlockCount * blockSize;
559587 }
560588
@@ -562,10 +590,10 @@
562590 }
563591
564592 size_t decompressSub(
565- ICompressor& compressor,
566593 const unsigned char* src,
567594 const unsigned char* pZeroOneInfos,
568595 unsigned char* tmp,
596+ int* tmp2,
569597 int* dest,
570598 size_t totalBlockCount,
571599 size_t blockSize)
@@ -575,53 +603,74 @@
575603 const unsigned char* initialSrc = src;
576604
577605 unsigned char compressFlag = *src++;
578- unsigned char b = *src++;
579- size_t len = *(size_t*)src;
580- src += 4;
581606
582- size_t len2 = 0;
583- switch (compressFlag) {
584- case 0:
585- memcpy(tmp, src, len);
586- len2 = len;
587- break;
588- case 1:
589- len2 = compressor.Decompress(src, len, tmp, -1);
590- break;
591- case 2:
592- len2 = repeationDecompress(src, len, tmp);
593- break;
594- }
607+ if (compressFlag != 3) {
608+
609+ unsigned char b = *src++;
610+ size_t len = *(size_t*)src;
611+ src += 4;
595612
596- if (b == RiceCoderFlag::None) {
597- memcpy(dest, tmp, len2);
598- }else {
599- RiceCoder riceCoder(b);
600- BitReader bitReader(tmp);
601- if (pZeroOneInfos) {
602- for (size_t i=0; i<totalBlockCount; ++i) {
603- if (pZeroOneInfos[i]) {
604- for (size_t j=0; j<blockSize; ++j) {
605- dest[i*blockSize+j] = bitReader.getBit() ? 0 : 1;
606- }
607- }
608- }
609- for (size_t i=0; i<totalBlockCount; ++i) {
610- if (!pZeroOneInfos[i]) {
611- for (size_t j=0; j<blockSize; ++j) {
612- dest[i*blockSize+j] = riceCoder.Decode(bitReader);
613- }
614- }
615- }
613+ int len2 = 0;
614+ switch (compressFlag) {
615+ case 0:
616+ memcpy(tmp, src, len);
617+ len2 = len;
618+ break;
619+ case 1:
620+ len2 = destLen*4;
621+ Decode((unsigned char*)src, len, (int*)tmp, len2);
622+ break;
623+ case 2:
624+ len2 = repeationDecompress(src, len, tmp);
625+ break;
626+ }
627+ src += len;
628+
629+ if (b == RiceCoderFlag::None) {
630+ memcpy(dest, tmp, len2*4);
616631 }else {
632+ RiceCoder riceCoder(b);
633+ BitReader bitReader(tmp);
617634 for (size_t i=0; i<destLen; ++i) {
618635 dest[i] = riceCoder.Decode(bitReader);
619636 }
620637 }
621- }
622-// showMinus(dest, dest, destLen);
638+ }else {
639+ size_t sizes[2] = {0};
640+ int resultSize = destLen * 4;
623641
624- return 6 + len;
642+ sizes[0] = *(size_t*)src;
643+ src += 4;
644+ memcpy(tmp, src, sizes[0]);
645+ src += sizes[0];
646+ Decode(tmp, sizes[0], tmp2, resultSize);
647+
648+ size_t count = 0;
649+ for (size_t i=0; i<totalBlockCount; ++i) {
650+ if (pZeroOneInfos[i]) {
651+ for (size_t j=0; j<blockSize; ++j) {
652+ dest[i*blockSize+j] = tmp2[count++];
653+ }
654+ }
655+ }
656+
657+ sizes[1] = *(size_t*)src;
658+ src += 4;
659+ memcpy(tmp, src, sizes[1]);
660+ src += sizes[1];
661+ Decode(tmp, sizes[1], tmp2, resultSize);
662+
663+ count = 0;
664+ for (size_t i=0; i<totalBlockCount; ++i) {
665+ if (!pZeroOneInfos[i]) {
666+ for (size_t j=0; j<blockSize; ++j) {
667+ dest[i*blockSize+j] = tmp2[count++];
668+ }
669+ }
670+ }
671+
672+ }
673+ return src - initialSrc;
625674 }
626675
627676 void reorderByFrequency(
@@ -946,7 +995,6 @@
946995 }
947996
948997 size_t decompress(
949- ICompressor& decompressor,
950998 size_t hBlockCount,
951999 size_t vBlockCount,
9521000 const unsigned char* pZeroOneInfos,
@@ -964,7 +1012,7 @@
9641012 for (size_t i=0; i<8; ++i) {
9651013 const unsigned char* p01 = (i < zeroOneLimit) ? 0 : pZeroOneInfos;
9661014 size_t blockSize = 1 + i * 2;
967- from += decompressSub(decompressor, from, p01, tmp, to, totalBlockCount, blockSize);
1015+ from += decompressSub(from, p01, tmp, tmp2, to, totalBlockCount, blockSize);
9681016 to += totalBlockCount * blockSize;
9691017 }
9701018
--- RanCodeAdp.h (nonexistent)
+++ RanCodeAdp.h (revision 14)
@@ -0,0 +1,535 @@
1+// RanCode.cpp : Defines the entry point for the console application.
2+//
3+//This is DEMO version of arithmetic/range encoder written for research purposes by
4+//Andrew Polar (www.ezcodesample.com, Jan. 10, 2007). The algorithm was published by
5+//G.N.N. Martin. In March 1979. Video & Data Recording Conference, Southampton, UK.
6+//The program was tested for many statistically different data samples, however you
7+//use it on own risk. Author do not accept any responsibility for use or misuse of
8+//this program. Any data sample that cause crash or wrong result can be sent to author
9+//for research. The e-mail may be obtained by WHOIS www.ezcodesample.com.
10+//The correction of July 03, 2007. The processing of non-zero minimum has been added.
11+//User can pass data with non-zero minimum value, however the min/max limits must be
12+//specified in function call.
13+//The correction of August 17, 2007. The additional means of computational statbility
14+//has been added. As it was discovered and investigated by Bill Dorsey in beta-testing
15+//the addition operation must have treatment for carry propagation. The solution is
16+//to normalize data to vary from 1 to max-min+1 and use value 0 as a means of output
17+//of the head of operation_buffer bits. This zero value is used as synchronization
18+//flag for output buffer and simply excluded from decoded stream.
19+//Last correction 09/07/2007 by Andrew Polar, making range coder adoptive
20+//The missing functionality is big-endian processor capabilities.
21+
22+#include <stdio.h>
23+#include <math.h>
24+#include <stdlib.h>
25+#include <memory.h>
26+#include <time.h>
27+//#include <windows.h>
28+
29+///////////////////////////////////////////////////////////////////////
30+// Test data preparation
31+///////////////////////////////////////////////////////////////////////
32+
33+double entropy24(int* data, int size) {
34+
35+ int max_size = 1 << 24;
36+ int min, counter;
37+ int* buffer;
38+ double entropy;
39+ double log2 = log(2.0);
40+ double prob;
41+
42+ min = data[0];
43+ for (counter=0; counter<size; counter++) {
44+ if (data[counter] < min)
45+ min = data[counter];
46+ }
47+
48+ for (counter=0; counter<size; counter++) {
49+ data[counter] -= min;
50+ }
51+
52+ buffer = (int*)malloc(max_size*sizeof(int));
53+ memset(buffer, 0x00, max_size*sizeof(int));
54+ for (counter=0; counter<size; counter++) {
55+ buffer[data[counter]]++;
56+ }
57+
58+ entropy = 0.0;
59+ for (counter=0; counter<max_size; counter++) {
60+ if (buffer[counter] > 0) {
61+ prob = (double)buffer[counter];
62+ prob /= (double)size;
63+ entropy += log(prob)/log2*prob;
64+ }
65+ }
66+ entropy *= -1.0;
67+
68+ for (counter=0; counter<size; counter++) {
69+ data[counter] += min;
70+ }
71+
72+ if (buffer)
73+ free(buffer);
74+
75+ return entropy;
76+}
77+
78+double round(double x) {
79+ if ((x - floor(x)) >= 0.5)
80+ return ceil(x);
81+ else
82+ return floor(x);
83+}
84+
85+double MakeData(int* data, int data_size, int min, int max, int redundancy_factor) {
86+
87+ int counter, cnt, high, low;
88+ double buf;
89+
90+ if (redundancy_factor <= 1)
91+ redundancy_factor = 1;
92+
93+ if (max <= min)
94+ max = min + 1;
95+
96+ srand((unsigned)time(0));
97+ for (counter=0; counter<data_size; counter++) {
98+ buf = 0;
99+ for (cnt=0; cnt<redundancy_factor; cnt++) {
100+ buf += (double)rand();
101+ }
102+ data[counter] = ((int)buf)/redundancy_factor;
103+ }
104+
105+ low = data[0];
106+ high = data[0];
107+ for (counter=0; counter<data_size; counter++) {
108+ if (data[counter] > high)
109+ high = data[counter];
110+ if (data[counter] < low)
111+ low = data[counter];
112+ }
113+
114+ for (counter=0; counter<data_size; counter++) {
115+ buf = (double)(data[counter] - low);
116+ buf /= (double)(high - low);
117+ buf *= (double)(max - min);
118+ buf = round(buf);
119+ data[counter] = (int)buf + min;
120+ }
121+
122+ return entropy24(data, data_size);
123+}
124+
125+///////////////////////////////////////////////////////////////////
126+// End of data preparation start of encoding, decoding functions
127+///////////////////////////////////////////////////////////////////
128+
129+const unsigned MAX_BASE = 15; //value is optional but 15 is recommended
130+unsigned current_byte; //position of current byte in result data
131+unsigned char current_bit; //postion of current bit in result data
132+
133+//Some look up tables for fast processing
134+static int output_mask[8][32];
135+static unsigned char bytes_plus [8][64];
136+
137+static unsigned char edge_mask[8] = {0xff, 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
138+static unsigned char shift_table[8] = {24, 16, 8, 0};
139+
140+static long long overflow_indicator = 0xffffffffffffffff << (MAX_BASE * 2 - 1);
141+
142+void make_look_ups() {
143+
144+ int sign_mask[8] = {
145+ 0xffffffff, 0x7fffffff, 0x3fffffff, 0x1fffffff,
146+ 0x0fffffff, 0x07ffffff, 0x03ffffff, 0x01ffffff
147+ };
148+
149+ for (int i=0; i<8; i++) {
150+ for (int j=0; j<32; j++) {
151+ output_mask[i][j] = sign_mask[i];
152+ output_mask[i][j] >>= (32-i-j);
153+ output_mask[i][j] <<= (32-i-j);
154+ }
155+ }
156+
157+ for (int i=0; i<8; i++) {
158+ for (int j=0; j<64; j++) {
159+ bytes_plus[i][j] = j/8;
160+ if ((i + j%8) > 7)
161+ ++bytes_plus[i][j];
162+ }
163+ }
164+}
165+
166+//The key function that takes product of x*y and convert it to
167+//mantissa and exponent. Mantissa have number of bits equal to
168+//length of the mask.
169+inline int SafeProduct(int x, int y, int mask, int& shift) {
170+
171+ int p = x * y;
172+ if ((p >> shift) > mask) {
173+ p >>= shift;
174+ while (p > mask) {
175+ p >>= 1;
176+ ++shift;
177+ }
178+ }
179+ else {
180+ while (shift >= 0) {
181+ --shift;
182+ if ((p >> shift) > mask) {
183+ break;
184+ }
185+ }
186+ ++shift;
187+ p >>= shift;
188+ }
189+ return p;
190+}
191+
192+inline int readBits(int operation_buffer, int bits, unsigned char* code) {
193+
194+ unsigned end_byte = current_byte + bytes_plus[current_bit][bits];
195+ int buffer = (code[current_byte] & edge_mask[current_bit]);
196+ unsigned char total_bits = 8 - current_bit;
197+ for (unsigned i=current_byte+1; i<=end_byte; ++i) {
198+ (buffer <<= 8) |= code[i];
199+ total_bits += 8;
200+ }
201+ buffer >>= (total_bits - bits);
202+ operation_buffer <<= bits;
203+ operation_buffer |= buffer;
204+ current_byte = end_byte;
205+ current_bit = (bits + current_bit)%8;
206+ return operation_buffer;
207+}
208+
209+inline void writeBits(long long operation_buffer, int bits, unsigned char* result) {
210+
211+ int buffer = (int)((operation_buffer >> current_bit) >> 32);
212+ buffer &= output_mask[current_bit][bits];
213+ unsigned bytes_plus2 = bytes_plus[current_bit][bits];
214+ current_bit = (bits + current_bit)%8;
215+ for (unsigned i=0; i<=bytes_plus2; ++i) {
216+ result[current_byte + i] |= (buffer >> shift_table[i]);
217+ }
218+ current_byte += bytes_plus2;
219+}
220+
221+void update_frequencies(int* buffer, int* freq, int* cumulative, int range, int denominator) {
222+
223+ memcpy(freq, buffer, range*sizeof(int));
224+ for (int i=0; i<range; i++)
225+ buffer[i] = 1;
226+
227+ cumulative[0] = 0;
228+ for (int i=1; i<range; i++) {
229+ cumulative[i] = cumulative[i-1] + freq[i-1];
230+ }
231+ cumulative[range] = denominator;
232+}
233+
234+//The result buffer should be allocated outside the function
235+//The data must be non-negative integers less or equal to max_value
236+//The actual size of compressed buffer is returned in &result_size
237+//Initially result_size must contain size of result buffer
238+void Encode(int* source, int source_size, int max_value, int min_value, unsigned char* result, int& result_size) {
239+
240+ memset(result, 0x00, result_size);
241+ current_byte = 0;
242+ current_bit = 0;
243+
244+ if (min_value != 0) {
245+ for (int i=0; i<source_size; i++) {
246+ source[i] -= min_value;
247+ }
248+ }
249+
250+ //collecting frequencies
251+ int range = (max_value + 1) - min_value + 1; //we add one more value for flag
252+ int* frequency = (int*)malloc(range*sizeof(int));
253+ int* cumulative = (int*)malloc((range+1)*sizeof(int));
254+ int* buffer = (int*)malloc(range * sizeof(int));
255+
256+ int Denominator = source_size + 1; //we increased size by 1
257+ unsigned base = 0;
258+ while (Denominator > 1) {
259+ Denominator >>= 1;
260+ base++;
261+ }
262+ if (base > MAX_BASE)
263+ base = MAX_BASE;
264+ Denominator = (1 << base);
265+
266+ for (int i=0; i<range; ++i) {
267+ buffer[i] = 1;
268+ }
269+ int cnt = 1; //start from 1 on purpose
270+ for (int i=0; i<Denominator-range; ++i) {
271+ buffer[cnt++]++;
272+ if (cnt == range)
273+ cnt = 1;
274+ }
275+
276+ update_frequencies(buffer, frequency, cumulative, range, Denominator);
277+
278+ //saving frequencies in result buffer
279+ memcpy(result, &range, 4);
280+ memcpy(result + 4, &Denominator, 4);
281+ current_byte += 8;
282+ //data is ready
283+
284+ //encoding
285+ make_look_ups();
286+ int mask = 0xFFFFFFFF >> (32 - base);
287+ //First 64 bits are not used for data, we use them for saving data_size and
288+ //any other 32 bit value that we want to save
289+ long long operation_buffer = source_size;
290+ operation_buffer <<= 32;
291+ //we save minimum value always as positive number and use last
292+ //decimal position as sign indicator
293+ int saved_min = min_value * 10;
294+ if (saved_min < 0) {
295+ saved_min = 1 - saved_min;
296+ }
297+ operation_buffer |= saved_min; //we use another 32 bits of first 64 bit buffer
298+ /////////////////////
299+ int product = 1;
300+ int shift = 0;
301+ cnt = 1;
302+ for (int i=0; i<source_size; ++i) {
303+
304+ while (((operation_buffer << (base - shift)) & overflow_indicator) == overflow_indicator) {
305+ printf("Possible buffer overflow is corrected at value %d\n", i);
306+ //this is zero flag output in order to avoid buffer overflow
307+ //rarely happen, cumulative[0] = 0, frequency[0] = 1
308+ writeBits(operation_buffer, base-shift, result);
309+ operation_buffer <<= (base - shift);
310+ operation_buffer += cumulative[0] * product;
311+ product = SafeProduct(product, frequency[0], mask, shift);
312+ //in result of this operation shift = 0
313+ }
314+
315+ writeBits(operation_buffer, base-shift, result);
316+ operation_buffer <<= (base - shift);
317+ operation_buffer += cumulative[source[i]+1] * product;
318+ product = SafeProduct(product, frequency[source[i]+1], mask, shift);
319+
320+ //This is adoptive frequency updates
321+ buffer[source[i]+1]++;
322+ cnt++;
323+ if (cnt == (Denominator-range)) {
324+ update_frequencies(buffer, frequency, cumulative, range, Denominator);
325+ cnt = 1;
326+ }
327+ }
328+ //flushing remained 64 bits
329+ writeBits(operation_buffer, 24, result);
330+ operation_buffer <<= 24;
331+ writeBits(operation_buffer, 24, result);
332+ operation_buffer <<= 24;
333+ writeBits(operation_buffer, 16, result);
334+ operation_buffer <<= 16;
335+ result_size = current_byte + 1;
336+ //end encoding
337+
338+ if (min_value != 0) {
339+ for (int i=0; i<source_size; i++) {
340+ source[i] += min_value;
341+ }
342+ }
343+
344+ if (buffer)
345+ free(buffer);
346+
347+ if (cumulative)
348+ free(cumulative);
349+
350+ if (frequency)
351+ free(frequency);
352+}
353+
354+void prepare_symbols(int* cumulative, int range, int denominator, int* symbol) {
355+
356+ memset(symbol, 0x00, denominator*sizeof(int));
357+ for (int k=0; k<range; ++k) {
358+ for (int i=cumulative[k]; i<cumulative[k+1]; i++) {
359+ symbol[i] = k;
360+ }
361+ }
362+}
363+
364+//result buffer must be allocated outside of function
365+//result size must contain the correct size of the buffer
366+void Decode(unsigned char* code, int code_size, int* result, int& result_size) {
367+
368+ current_byte = 0;
369+ current_bit = 0;
370+
371+ //reading frequencies
372+ int range = 0;
373+ int Denominator = 0;
374+ memcpy(&range, code, 4);
375+ memcpy(&Denominator, code + 4, 4);
376+
377+ int* frequency = (int*)malloc(range*sizeof(int));
378+ int* cumulative = (int*)malloc((range+1)*sizeof(int));
379+ int* buffer = (int*)malloc(range * sizeof(int));
380+
381+ current_byte += 8;
382+ unsigned base = 0;
383+ while (Denominator > 1) {
384+ Denominator >>= 1;
385+ base++;
386+ }
387+ Denominator = (1 << base);
388+
389+ for (int i=0; i<range; ++i) {
390+ buffer[i] = 1;
391+ }
392+ int cnt = 1; //start from 1 on purpose
393+ for (int i=0; i<Denominator-range; ++i) {
394+ buffer[cnt++]++;
395+ if (cnt == range)
396+ cnt = 1;
397+ }
398+
399+ int* symbol = (int*)malloc(Denominator*sizeof(int));
400+ update_frequencies(buffer, frequency, cumulative, range, Denominator);
401+ prepare_symbols(cumulative, range, Denominator, symbol);
402+ //data is ready
403+
404+ //decoding
405+ make_look_ups();
406+ int mask = 0xFFFFFFFF >> (32 - base);
407+ long long ID;
408+ int product = 1;
409+ int shift = 0;
410+ int operation_buffer = 0;
411+ //we skip first 64 bits they contain size and minimum value
412+ operation_buffer = readBits(operation_buffer, 32, code);
413+ result_size = (int)operation_buffer; //First 32 bits is data_size
414+ operation_buffer = 0;
415+ operation_buffer = readBits(operation_buffer, 32, code);
416+ int min_value = (int)operation_buffer; //Second 32 bits is minimum value;
417+ //we find sign according to our convention
418+ if ((min_value % 10) > 0)
419+ min_value = - min_value;
420+ min_value /= 10;
421+ operation_buffer = 0;
422+ ////////////////////////////////////////
423+ int counter = 0;
424+ cnt = 1;
425+ while (counter < result_size) {
426+ operation_buffer = readBits(operation_buffer, base-shift, code);
427+ ID = operation_buffer/product;
428+ operation_buffer -= product * cumulative[symbol[ID]];
429+ product = SafeProduct(product, frequency[symbol[ID]], mask, shift);
430+ result[counter] = symbol[ID] + min_value - 1;
431+ if (result[counter] >= min_value) {
432+
433+ buffer[symbol[ID]]++;
434+ cnt++;
435+ if (cnt == (Denominator-range)) {
436+ update_frequencies(buffer, frequency, cumulative, range, Denominator);
437+ prepare_symbols(cumulative, range, Denominator, symbol);
438+ cnt = 1;
439+ }
440+ counter++;
441+ }
442+ }
443+ //end decoding
444+
445+ if (buffer)
446+ free(buffer);
447+
448+ if (symbol)
449+ free(symbol);
450+
451+ if (cumulative)
452+ free(cumulative);
453+
454+ if (frequency)
455+ free(frequency);
456+}
457+
458+#if 0
459+
460+int main()
461+{
462+ printf("Making data for round trip...\n");
463+ int data_size = 1800000;
464+ int min_data_value = -297;
465+ int max_data_value = 4000;
466+ int redundancy_factor = 10;
467+
468+ int* source = (int*)malloc(data_size * sizeof(int));
469+ double entropy = MakeData(source, data_size, min_data_value, max_data_value, redundancy_factor);
470+
471+ int Bytes = (int)(ceil((double)(data_size) * entropy)/8.0);
472+ printf("Data size = %d, alphabet = %d\n", data_size, max_data_value - min_data_value + 1);
473+ printf("Entropy %5.3f, estimated compressed size %d bytes\n\n", entropy, Bytes);
474+
475+ SYSTEMTIME st;
476+ GetSystemTime(&st);
477+ printf("Encoding is started %d %d\n", st.wSecond, st.wMilliseconds);
478+
479+ int result_size = Bytes + Bytes/2 + (max_data_value - min_data_value) * 2 + 4 + 1024;
480+ unsigned char* result = (unsigned char*)malloc(result_size * sizeof(unsigned char));
481+
482+ //Min and Max data values can be specified approximately they can make
483+ //larger data segment, for example Max value can be passed as larger and Min
484+ //value can be passed as smaller number. Replace the function below by commented
485+ //out example and it will work in the same way.
486+ //Encode(source, data_size, max_data_value + 150, min_data_value - 140, result, result_size);
487+ Encode(source, data_size, max_data_value, min_data_value, result, result_size);
488+
489+ GetSystemTime(&st);
490+ printf("Encoding is finished %d %d\n\n", st.wSecond, st.wMilliseconds);
491+ printf("Actual size of compressed buffer with frequencies %d bytes\n\n", result_size);
492+
493+ GetSystemTime(&st);
494+ printf("Decoding is started %d %d\n", st.wSecond, st.wMilliseconds);
495+ int test_data_size = data_size + data_size/2;
496+ int* test_data = (int*)malloc(test_data_size*sizeof(int));
497+ Decode(result, result_size, test_data, test_data_size);
498+
499+ GetSystemTime(&st);
500+ printf("Decoding is finished %d %d\n\n", st.wSecond, st.wMilliseconds);
501+
502+ bool error_occur = false;
503+ if (test_data_size != data_size) {
504+ error_occur = true;
505+ }
506+ else {
507+ for (int i=0; i<data_size; i++) {
508+ if (test_data[i] != source[i]) {
509+ error_occur = true;
510+ printf("Mismatch in %d, %d %d\n", i, test_data[i], source[i]);
511+ break;
512+ }
513+ }
514+ }
515+
516+ if (error_occur == false) {
517+ printf("Round trip is correct, 100 percent match\n\n");
518+ }
519+ else {
520+ printf("Round trip test failed, data mismatch\n\n");
521+ }
522+
523+ if (test_data)
524+ free(test_data);
525+
526+ if (result)
527+ free(result);
528+
529+ if (source)
530+ free(source);
531+
532+ return 0;
533+}
534+
535+#endif
--- RanCode.h (nonexistent)
+++ RanCode.h (revision 14)
@@ -0,0 +1,535 @@
1+// RanCode.cpp : Defines the entry point for the console application.
2+//
3+//This is DEMO version of arithmetic/range encoder written for research purposes by
4+//Andrew Polar (www.ezcodesample.com, Jan. 10, 2007). The algorithm was published by
5+//G.N.N. Martin. In March 1979. Video & Data Recording Conference, Southampton, UK.
6+//The program was tested for many statistically different data samples, however you
7+//use it on own risk. Author do not accept any responsibility for use or misuse of
8+//this program. Any data sample that cause crash or wrong result can be sent to author
9+//for research. The e-mail may be obtained by WHOIS www.ezcodesample.com.
10+//The correction of July 03, 2007. The processing of non-zero minimum has been added.
11+//User can pass data with non-zero minimum value, however the min/max limits must be
12+//specified in function call.
13+//The correction of August 17, 2007. The additional means of computational statbility
14+//has been added. As it was discovered and investigated by Bill Dorsey in beta-testing
15+//the addition operation must have treatment for carry propagation. The solution is
16+//to normalize data to vary from 1 to max-min+1 and use value 0 as a means of output
17+//of the head of operation_buffer bits. This zero value is used as synchronization
18+//flag for output buffer and simply excluded from decoded stream.
19+
20+#include <stdio.h>
21+#include <math.h>
22+#include <stdlib.h>
23+#include <memory.h>
24+#include <time.h>
25+//#include <windows.h>
26+
27+///////////////////////////////////////////////////////////////////////
28+// Test data preparation
29+///////////////////////////////////////////////////////////////////////
30+
31+double entropy24(int* data, int size) {
32+
33+ int max_size = 1 << 24;
34+ int min, counter;
35+ int* buffer;
36+ double entropy;
37+ double log2 = log(2.0);
38+ double prob;
39+
40+ min = data[0];
41+ for (counter=0; counter<size; counter++) {
42+ if (data[counter] < min)
43+ min = data[counter];
44+ }
45+
46+ for (counter=0; counter<size; counter++) {
47+ data[counter] -= min;
48+ }
49+
50+ buffer = (int*)malloc(max_size*sizeof(int));
51+ memset(buffer, 0x00, max_size*sizeof(int));
52+ for (counter=0; counter<size; counter++) {
53+ buffer[data[counter]]++;
54+ }
55+
56+ entropy = 0.0;
57+ for (counter=0; counter<max_size; counter++) {
58+ if (buffer[counter] > 0) {
59+ prob = (double)buffer[counter];
60+ prob /= (double)size;
61+ entropy += log(prob)/log2*prob;
62+ }
63+ }
64+ entropy *= -1.0;
65+
66+ for (counter=0; counter<size; counter++) {
67+ data[counter] += min;
68+ }
69+
70+ if (buffer)
71+ free(buffer);
72+
73+ return entropy;
74+}
75+
76+double round(double x) {
77+ if ((x - floor(x)) >= 0.5)
78+ return ceil(x);
79+ else
80+ return floor(x);
81+}
82+
83+double MakeData(int* data, int data_size, int min, int max, int redundancy_factor) {
84+
85+ int counter, cnt, high, low;
86+ double buf;
87+
88+ if (redundancy_factor <= 1)
89+ redundancy_factor = 1;
90+
91+ if (max <= min)
92+ max = min + 1;
93+
94+ srand((unsigned)time(0));
95+ for (counter=0; counter<data_size; counter++) {
96+ buf = 0;
97+ for (cnt=0; cnt<redundancy_factor; cnt++) {
98+ buf += (double)rand();
99+ }
100+ data[counter] = ((int)buf)/redundancy_factor;
101+ }
102+
103+ low = data[0];
104+ high = data[0];
105+ for (counter=0; counter<data_size; counter++) {
106+ if (data[counter] > high)
107+ high = data[counter];
108+ if (data[counter] < low)
109+ low = data[counter];
110+ }
111+
112+ for (counter=0; counter<data_size; counter++) {
113+ buf = (double)(data[counter] - low);
114+ buf /= (double)(high - low);
115+ buf *= (double)(max - min);
116+ buf = round(buf);
117+ data[counter] = (int)buf + min;
118+ }
119+
120+ return entropy24(data, data_size);
121+}
122+
123+///////////////////////////////////////////////////////////////////
124+// End of data preparation start of encoding, decoding functions
125+///////////////////////////////////////////////////////////////////
126+
127+const unsigned MAX_BASE = 15; //value is optional but 15 is recommended
128+unsigned current_byte; //position of current byte in result data
129+unsigned char current_bit; //postion of current bit in result data
130+
131+//Some look up tables for fast processing
132+static int output_mask[8][32];
133+static unsigned char bytes_plus [8][64];
134+
135+static unsigned char edge_mask[8] = {0xff, 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
136+static unsigned char shift_table[8] = {24, 16, 8, 0};
137+
138+static long long overflow_indicator = 0xffffffffffffffff << (MAX_BASE * 2 - 1);
139+
140+void make_look_ups() {
141+
142+ int sign_mask[8] = {
143+ 0xffffffff, 0x7fffffff, 0x3fffffff, 0x1fffffff,
144+ 0x0fffffff, 0x07ffffff, 0x03ffffff, 0x01ffffff
145+ };
146+
147+ for (int i=0; i<8; i++) {
148+ for (int j=0; j<32; j++) {
149+ output_mask[i][j] = sign_mask[i];
150+ output_mask[i][j] >>= (32-i-j);
151+ output_mask[i][j] <<= (32-i-j);
152+ }
153+ }
154+
155+ for (int i=0; i<8; i++) {
156+ for (int j=0; j<64; j++) {
157+ bytes_plus[i][j] = j/8;
158+ if ((i + j%8) > 7)
159+ ++bytes_plus[i][j];
160+ }
161+ }
162+}
163+
164+//The key function that takes product of x*y and convert it to
165+//mantissa and exponent. Mantissa have number of bits equal to
166+//length of the mask.
167+inline int SafeProduct(int x, int y, int mask, int& shift) {
168+
169+ int p = x * y;
170+ if ((p >> shift) > mask) {
171+ p >>= shift;
172+ while (p > mask) {
173+ p >>= 1;
174+ ++shift;
175+ }
176+ }
177+ else {
178+ while (shift >= 0) {
179+ --shift;
180+ if ((p >> shift) > mask) {
181+ break;
182+ }
183+ }
184+ ++shift;
185+ p >>= shift;
186+ }
187+ return p;
188+}
189+
190+inline int readBits(int operation_buffer, int bits, unsigned char* code) {
191+
192+ unsigned end_byte = current_byte + bytes_plus[current_bit][bits];
193+ int buffer = (code[current_byte] & edge_mask[current_bit]);
194+ unsigned char total_bits = 8 - current_bit;
195+ for (unsigned i=current_byte+1; i<=end_byte; ++i) {
196+ (buffer <<= 8) |= code[i];
197+ total_bits += 8;
198+ }
199+ buffer >>= (total_bits - bits);
200+ operation_buffer <<= bits;
201+ operation_buffer |= buffer;
202+ current_byte = end_byte;
203+ current_bit = (bits + current_bit)%8;
204+ return operation_buffer;
205+}
206+
207+inline void writeBits(long long operation_buffer, int bits, unsigned char* result) {
208+
209+ int buffer = (int)((operation_buffer >> current_bit) >> 32);
210+ buffer &= output_mask[current_bit][bits];
211+ unsigned bytes_plus2 = bytes_plus[current_bit][bits];
212+ current_bit = (bits + current_bit)%8;
213+ for (unsigned i=0; i<=bytes_plus2; ++i) {
214+ result[current_byte + i] |= (buffer >> shift_table[i]);
215+ }
216+ current_byte += bytes_plus2;
217+}
218+
219+//The result buffer should be allocated outside the function
220+//The data must be non-negative integers less or equal to max_value
221+//The actual size of compressed buffer is returned in &result_size
222+//Initially result_size must contain size of result buffer
223+void Encode(int* source, int source_size, int max_value, int min_value, unsigned char* result, int& result_size) {
224+
225+ memset(result, 0x00, result_size);
226+ current_byte = 0;
227+ current_bit = 0;
228+
229+ if (min_value != 0) {
230+ for (int i=0; i<source_size; i++) {
231+ source[i] -= min_value;
232+ }
233+ }
234+
235+ //collecting frequencies
236+ int range = (max_value + 1) - min_value + 1; //we add one more value for flag
237+ int* frequency = (int*)malloc(range*sizeof(int));
238+ memset(frequency, 0x00, range * sizeof(int));
239+ for (int i=0; i<source_size; ++i)
240+ ++frequency[source[i]+1]; //we make source vary from 1 to max-min+1
241+ frequency[0] = 1; //we use it as flag, virtual 0 value occured once
242+
243+ //correction by bringing them in certain range
244+ int Denominator = source_size + 1; //we increased size by 1
245+ unsigned base = 0;
246+ while (Denominator > 1) {
247+ Denominator >>= 1;
248+ base++;
249+ }
250+ if (base > MAX_BASE)
251+ base = MAX_BASE;
252+ Denominator = (1 << base);
253+
254+ double coeff = (double)(Denominator)/((double)(source_size));
255+ for (int i=1; i<range; i++) { //we start from 1 on purpose
256+ if (frequency[i] > 0) {
257+ double ff = (double)(frequency[i]);
258+ ff *= coeff;
259+ frequency[i] = (int)(ff);
260+ if (frequency[i] == 0)
261+ frequency[i] = 1;
262+ }
263+ }
264+ int sm = 0;
265+ for (int i=0; i<range; i++) {
266+ sm += frequency[i];
267+ }
268+ int diff = Denominator - sm;
269+ if (diff > 0) {
270+ int mx = frequency[0];
271+ int ps = 0;
272+ for (int i=0; i<range; i++) {
273+ if (frequency[i] > mx) {
274+ mx = frequency[i];
275+ ps = i;
276+ }
277+ }
278+ frequency[ps] += diff;
279+ }
280+ if (diff < 0) {
281+ int i = 0;
282+ while (diff < 0) {
283+ if (frequency[i] > 1) {
284+ frequency[i]--;
285+ diff++;
286+ }
287+ i++;
288+ if (i == (range-1))
289+ i = 0;
290+ }
291+ }
292+ //end of correction
293+
294+ //saving frequencies in result buffer
295+ int offset = 0;
296+ memcpy(result + offset, &range, 4);
297+ offset += 4;
298+ for (int m=0; m<range; ++m) {
299+ memcpy(result + offset, &frequency[m], 2);
300+ offset += 2;
301+ }
302+ current_byte += offset;
303+
304+ //making cumulative frequencies
305+ int* cumulative = (int*)malloc((range+1)*sizeof(int));
306+ memset(cumulative, 0x00, (range+1)*sizeof(int));
307+ cumulative[0] = 0;
308+ for (int i=1; i<range; i++) {
309+ cumulative[i] = cumulative[i-1] + frequency[i-1];
310+ }
311+ cumulative[range] = Denominator;
312+ //data is ready
313+
314+ //encoding
315+ make_look_ups();
316+ int mask = 0xFFFFFFFF >> (32 - base);
317+ //First 64 bits are not used for data, we use them for saving data_size and
318+ //any other 32 bit value that we want to save
319+ long long operation_buffer = source_size;
320+ operation_buffer <<= 32;
321+ //we save minimum value always as positive number and use last
322+ //decimal position as sign indicator
323+ int saved_min = min_value * 10;
324+ if (saved_min < 0) {
325+ saved_min = 1 - saved_min;
326+ }
327+ operation_buffer |= saved_min; //we use another 32 bits of first 64 bit buffer
328+ /////////////////////
329+ int product = 1;
330+ int shift = 0;
331+ for (int i=0; i<source_size; ++i) {
332+
333+ while (((operation_buffer << (base - shift)) & overflow_indicator) == overflow_indicator) {
334+ printf("Possible buffer overflow is corrected at value %d\n", i);
335+ //this is zero flag output in order to avoid buffer overflow
336+ //rarely happen, cumulative[0] = 0, frequency[0] = 1
337+ writeBits(operation_buffer, base-shift, result);
338+ operation_buffer <<= (base - shift);
339+ operation_buffer += cumulative[0] * product;
340+ product = SafeProduct(product, frequency[0], mask, shift);
341+ //in result of this operation shift = 0
342+ }
343+
344+ writeBits(operation_buffer, base-shift, result);
345+ operation_buffer <<= (base - shift);
346+ operation_buffer += cumulative[source[i]+1] * product;
347+ product = SafeProduct(product, frequency[source[i]+1], mask, shift);
348+ }
349+ //flushing remained 64 bits
350+ writeBits(operation_buffer, 24, result);
351+ operation_buffer <<= 24;
352+ writeBits(operation_buffer, 24, result);
353+ operation_buffer <<= 24;
354+ writeBits(operation_buffer, 16, result);
355+ operation_buffer <<= 16;
356+ result_size = current_byte + 1;
357+ //end encoding
358+
359+ if (min_value != 0) {
360+ for (int i=0; i<source_size; i++) {
361+ source[i] += min_value;
362+ }
363+ }
364+
365+ if (cumulative)
366+ free(cumulative);
367+
368+ if (frequency)
369+ free(frequency);
370+}
371+
372+//result buffer must be allocated outside of function
373+//result size must contain the correct size of the buffer
374+void Decode(unsigned char* code, int code_size, int* result, int& result_size) {
375+
376+ current_byte = 0;
377+ current_bit = 0;
378+
379+ //reading frequencies
380+ int range = 0;
381+ memcpy(&range, code, 4);
382+
383+ short* frequency = (short*)malloc(range * sizeof(short));
384+ memcpy(frequency, code + 4, range * 2);
385+ int Denominator = 0;
386+ for (int i=0; i<range; ++i) {
387+ Denominator += frequency[i];
388+ }
389+ current_byte += range * 2 + 4;
390+ unsigned base = 0;
391+ while (Denominator > 1) {
392+ Denominator >>= 1;
393+ base++;
394+ }
395+ Denominator = (1 << base);
396+
397+ //making cumulative frequencies
398+ int* cumulative = (int*)malloc((range+1)*sizeof(int));
399+ memset(cumulative, 0x00, (range+1)*sizeof(int));
400+ cumulative[0] = 0;
401+ for (int i=1; i<range; i++) {
402+ cumulative[i] = cumulative[i-1] + frequency[i-1];
403+ }
404+ cumulative[range] = Denominator;
405+ //data is ready
406+
407+ //prepare data for decoding
408+ int* symbol = (int*)malloc(Denominator*sizeof(int));
409+ memset(symbol, 0x00, Denominator*sizeof(int));
410+ for (int k=0; k<range; ++k) {
411+ for (int i=cumulative[k]; i<cumulative[k+1]; i++) {
412+ symbol[i] = k;
413+ }
414+ }
415+ //data is ready
416+
417+ //decoding
418+ make_look_ups();
419+ int mask = 0xFFFFFFFF >> (32 - base);
420+ long long ID;
421+ int product = 1;
422+ int shift = 0;
423+ int operation_buffer = 0;
424+ //we skip first 64 bits they contain size and minimum value
425+ operation_buffer = readBits(operation_buffer, 32, code);
426+ result_size = (int)operation_buffer; //First 32 bits is data_size
427+ operation_buffer = 0;
428+ operation_buffer = readBits(operation_buffer, 32, code);
429+ int min_value = (int)operation_buffer; //Second 32 bits is minimum value;
430+ //we find sign according to our convention
431+ if ((min_value % 10) > 0)
432+ min_value = - min_value;
433+ min_value /= 10;
434+ operation_buffer = 0;
435+ ////////////////////////////////////////
436+ int cnt = 0;
437+ while (cnt < result_size) {
438+ operation_buffer = readBits(operation_buffer, base-shift, code);
439+ ID = operation_buffer/product;
440+ operation_buffer -= product * cumulative[symbol[ID]];
441+ product = SafeProduct(product, frequency[symbol[ID]], mask, shift);
442+ result[cnt] = symbol[ID] + min_value - 1;
443+ if (result[cnt] >= min_value)
444+ cnt++;
445+ }
446+ //end decoding
447+
448+ if (symbol)
449+ free(symbol);
450+
451+ if (cumulative)
452+ free(cumulative);
453+
454+ if (frequency)
455+ free(frequency);
456+}
457+
458+#if 0
459+
460+int main()
461+{
462+ printf("Making data for round trip...\n");
463+ int data_size = 1800000;
464+ int min_data_value = -297;
465+ int max_data_value = 4000;
466+ int redundancy_factor = 10;
467+
468+ int* source = (int*)malloc(data_size * sizeof(int));
469+ double entropy = MakeData(source, data_size, min_data_value, max_data_value, redundancy_factor);
470+
471+ int Bytes = (int)(ceil((double)(data_size) * entropy)/8.0);
472+ printf("Data size = %d, alphabet = %d\n", data_size, max_data_value - min_data_value + 1);
473+ printf("Entropy %5.3f, estimated compressed size %d bytes\n\n", entropy, Bytes);
474+
475+ SYSTEMTIME st;
476+ GetSystemTime(&st);
477+ printf("Encoding is started %d %d\n", st.wSecond, st.wMilliseconds);
478+
479+ int result_size = Bytes + Bytes/2 + (max_data_value - min_data_value) * 2 + 4 + 1024;
480+ unsigned char* result = (unsigned char*)malloc(result_size * sizeof(unsigned char));
481+
482+ //Min and Max data values can be specified approximately they can make
483+ //larger data segment, for example Max value can be passed as larger and Min
484+ //value can be passed as smaller number. Replace the function below by commented
485+ //out example and it will work in the same way.
486+ //Encode(source, data_size, max_data_value + 150, min_data_value - 140, result, result_size);
487+ Encode(source, data_size, max_data_value, min_data_value, result, result_size);
488+
489+ GetSystemTime(&st);
490+ printf("Encoding is finished %d %d\n\n", st.wSecond, st.wMilliseconds);
491+ printf("Actual size of compressed buffer with frequencies %d bytes\n\n", result_size);
492+
493+ GetSystemTime(&st);
494+ printf("Decoding is started %d %d\n", st.wSecond, st.wMilliseconds);
495+ int test_data_size = data_size + data_size/2;
496+ int* test_data = (int*)malloc(test_data_size*sizeof(int));
497+ Decode(result, result_size, test_data, test_data_size);
498+
499+ GetSystemTime(&st);
500+ printf("Decoding is finished %d %d\n\n", st.wSecond, st.wMilliseconds);
501+
502+ bool error_occur = false;
503+ if (test_data_size != data_size) {
504+ error_occur = true;
505+ }
506+ else {
507+ for (int i=0; i<data_size; i++) {
508+ if (test_data[i] != source[i]) {
509+ error_occur = true;
510+ printf("Mismatch in %d, %d %d\n", i, test_data[i], source[i]);
511+ break;
512+ }
513+ }
514+ }
515+
516+ if (error_occur == false) {
517+ printf("Round trip is correct, 100 percent match\n\n");
518+ }
519+ else {
520+ printf("Round trip test failed, data mismatch\n\n");
521+ }
522+
523+ if (test_data)
524+ free(test_data);
525+
526+ if (result)
527+ free(result);
528+
529+ if (source)
530+ free(source);
531+
532+ return 0;
533+}
534+
535+#endif
--- main.cpp (revision 13)
+++ main.cpp (revision 14)
@@ -13,7 +13,6 @@
1313 using namespace boost;
1414 #include <boost/cstdint.hpp>
1515
16-#include "Compressor/BZip2Compressor.h"
1716 #include "misc.h"
1817 #include "decode.h"
1918 #include "encode.h"
@@ -58,8 +57,6 @@
5857 const size_t vBlockCount = height / 8 + ((height % 8) ? 1 : 0);
5958 const size_t totalBlockCount = hBlockCount * vBlockCount;
6059
61- BZip2Compressor compressor;
62-
6360 size_t storageSize = work.size()*4*1.1+600;
6461 std::vector<unsigned char> work3(storageSize);
6562 std::vector<unsigned char> work4(storageSize);
@@ -80,7 +77,7 @@
8077
8178 // TODO: to record stream signature "jyeipegyuu\0"
8279 // TODO: to record streams
83-
80+
8481 int* pWork = &work[0];
8582 int* pWork2 = &work2[0];
8683
@@ -91,7 +88,6 @@
9188
9289 reorderByFrequency(hBlockCount, vBlockCount, pWork, pWork2);
9390
94- // TODO: to add option to disable paeth prediction
9591 unsigned char enablePaethPrediction = 1;
9692 *dest++ = enablePaethPrediction;
9793 if (enablePaethPrediction) {
@@ -106,8 +102,6 @@
106102 std::vector<unsigned char> zeroOneInfos(totalBlockCount);
107103 unsigned char* pZeroOneInfos = &zeroOneInfos[0];
108104
109- // TODO: to record quantizing zero one limit setting
110-
111105 // quantizing zero one flags
112106 *dest++ = zeroOneLimit;
113107 if (zeroOneLimit != 0) {
@@ -122,7 +116,7 @@
122116 }else {
123117 pZeroOneInfos = 0;
124118 }
125- dest += compress(compressor, hBlockCount, vBlockCount, pZeroOneInfos, zeroOneLimit, compressInfos, pWork2, (unsigned char*)pWork, &work3[0], &work4[0], dest, encoded.size());
119+ dest += compress(hBlockCount, vBlockCount, pZeroOneInfos, zeroOneLimit, compressInfos, pWork2, (unsigned char*)pWork, &work3[0], &work4[0], dest, encoded.size());
126120
127121 // TODO: to record DCT coefficients sign predictor setting
128122
@@ -171,7 +165,7 @@
171165 }else {
172166 pZeroOneFlags = 0;
173167 }
174- src += decompress(compressor, hBlockCount, vBlockCount, pZeroOneFlags, zeroOneLimit, src, compressedLen, &tmp[0], pWork, pWork2, destLen);
168+ src += decompress(hBlockCount, vBlockCount, pZeroOneFlags, zeroOneLimit, src, compressedLen, &tmp[0], pWork, pWork2, destLen);
175169
176170 // sign flags
177171 uint32_t signFlagBytes = *(uint32_t*)src;
Show on old repository browser