| 1 |
/* |
| 2 |
* Copyright (C) 2009-2012 TSUBAKIMOTO Hiroya <z0rac@users.sourceforge.jp> |
| 3 |
* |
| 4 |
* This software comes with ABSOLUTELY NO WARRANTY; for details of |
| 5 |
* the license terms, see the LICENSE.txt file included with the program. |
| 6 |
*/ |
| 7 |
#include <string> |
| 8 |
#include <cstdio> |
| 9 |
#include <cstdlib> |
| 10 |
|
| 11 |
using namespace std; |
| 12 |
|
| 13 |
/** crc32 - CRC32 calculator |
| 14 |
*/ |
| 15 |
namespace { |
| 16 |
class crc32 { |
| 17 |
unsigned _tab[256]; |
| 18 |
public: |
| 19 |
crc32() |
| 20 |
{ |
| 21 |
for (unsigned i = 0; i < 256; ++i) { |
| 22 |
unsigned v = i; |
| 23 |
for (int c = 8; c--;) { |
| 24 |
v = ((int(!(v & 1)) - 1) & 0xedb88320) ^ (v >> 1); |
| 25 |
} |
| 26 |
_tab[i] = v; |
| 27 |
} |
| 28 |
} |
| 29 |
|
| 30 |
unsigned operator()(const string& s) const |
| 31 |
{ |
| 32 |
unsigned v = ~0U; |
| 33 |
for (const char* p = s.c_str(); *p; ++p) { |
| 34 |
v = _tab[(v ^ *p) & 255] ^ (v >> 8); |
| 35 |
} |
| 36 |
return ~v; |
| 37 |
} |
| 38 |
}; |
| 39 |
const crc32 crc; |
| 40 |
} |
| 41 |
|
| 42 |
#if MAIN |
| 43 |
|
| 44 |
#include <iostream> |
| 45 |
#include <iomanip> |
| 46 |
#include <list> |
| 47 |
|
| 48 |
namespace { |
| 49 |
struct elem { |
| 50 |
unsigned hash; |
| 51 |
unsigned cp; |
| 52 |
string cs; |
| 53 |
elem() {} |
| 54 |
elem(unsigned hash, unsigned cp, const string& cs) |
| 55 |
: hash(hash), cp(cp), cs(cs) {} |
| 56 |
}; |
| 57 |
} |
| 58 |
|
| 59 |
int |
| 60 |
main() |
| 61 |
{ |
| 62 |
static const char ws[] = " \t"; |
| 63 |
list<elem> ls; |
| 64 |
while (cin) { |
| 65 |
string s; |
| 66 |
getline(cin, s); |
| 67 |
string::size_type i = s.find_first_of(';'); |
| 68 |
if (i != string::npos) s.erase(i); |
| 69 |
i = s.find_first_not_of(ws); |
| 70 |
if (i == string::npos) continue; |
| 71 |
string::size_type n = s.find_first_not_of("0123456789", i); |
| 72 |
if (n == string::npos || n == i) continue; |
| 73 |
string cp(s, i, n - i); |
| 74 |
i = s.find_first_not_of(ws, n); |
| 75 |
if (i == string::npos || i == n) continue; |
| 76 |
n = s.find_first_of(ws, i); |
| 77 |
string cs(s, i, n - i); |
| 78 |
for (string::iterator p = cs.begin(); p != cs.end(); ++p) { |
| 79 |
*p = static_cast<char>(toupper(*p)); |
| 80 |
} |
| 81 |
unsigned hash = crc(cs); |
| 82 |
list<elem>::iterator p = ls.begin(); |
| 83 |
while (p != ls.end() && hash > p->hash) ++p; |
| 84 |
if (p == ls.end() || p->hash != hash) { |
| 85 |
char* e; |
| 86 |
ls.insert(p, elem(hash, strtoul(cp.c_str(), &e, 10), cs)); |
| 87 |
} else { |
| 88 |
cerr << "conflict: " << cs << "(" << cp << ") and " |
| 89 |
<< p->cs << "(" << p->cp << ")" << endl; |
| 90 |
} |
| 91 |
} |
| 92 |
for (int cp = 1; cp < 65536; ++cp) { |
| 93 |
char no[10]; |
| 94 |
sprintf(no, "%d", cp); |
| 95 |
static const char* prefix[] = { |
| 96 |
"WINDOWS-", "X-CP", "CP", "CP0", "CP00" |
| 97 |
}; |
| 98 |
int i = sizeof(prefix) / sizeof(prefix[0]); |
| 99 |
i -= (cp >= 1000) + (cp >= 10000); |
| 100 |
while (i--) { |
| 101 |
string cs = string(prefix[i]) + no; |
| 102 |
unsigned hash = crc(cs); |
| 103 |
list<elem>::const_iterator p = ls.begin(); |
| 104 |
while (p != ls.end() && hash > p->hash) ++p; |
| 105 |
if (p != ls.end() && p->hash == hash && p->cs != cs) { |
| 106 |
cerr << "conflict: " << cs << "(" << cp << ") and " |
| 107 |
<< p->cs << "(" << p->cp << ")" << endl; |
| 108 |
} |
| 109 |
} |
| 110 |
} |
| 111 |
cout << "// This file was created by codepage.exe." << endl |
| 112 |
<< endl |
| 113 |
<< "static const unsigned hash[] = {" << endl |
| 114 |
<< hex << setfill('0'); |
| 115 |
for (list<elem>::iterator p = ls.begin(); p != ls.end(); ++p) { |
| 116 |
cout << " 0x" << setw(8) << p->hash << ",\t// " << p->cs << endl; |
| 117 |
} |
| 118 |
cout << "};" << endl |
| 119 |
<< endl |
| 120 |
<< "static const unsigned short codepage[] = {" << endl |
| 121 |
<< dec << setfill(' '); |
| 122 |
for (list<elem>::iterator p = ls.begin(); p != ls.end(); ++p) { |
| 123 |
cout << " " << setw(8) << p->cp << ",\t// " << p->cs << endl; |
| 124 |
} |
| 125 |
cout << "};" << endl; |
| 126 |
return 0; |
| 127 |
} |
| 128 |
|
| 129 |
#else |
| 130 |
|
| 131 |
unsigned |
| 132 |
codepage(const string& charset) |
| 133 |
{ |
| 134 |
#include "codepage.h" |
| 135 |
const unsigned k = crc(charset); |
| 136 |
int lo = 0, hi = sizeof(hash) / sizeof(hash[0]); |
| 137 |
while (lo < hi) { |
| 138 |
int i = (lo + hi) >> 1; |
| 139 |
int diff = int(k - hash[i]); |
| 140 |
if (!diff) return codepage[i]; |
| 141 |
if (diff < 0) hi = i; |
| 142 |
else lo = i + 1; |
| 143 |
} |
| 144 |
string::size_type i = charset.find_last_not_of("0123456789"); |
| 145 |
if (i < charset.size() - 1) { |
| 146 |
string prefix(charset, 0, i + 1); |
| 147 |
if (prefix == "WINDOWS-" || prefix == "CP" || prefix == "X-CP") { |
| 148 |
char* e; |
| 149 |
return strtoul(charset.c_str() + i + 1, &e, 10); |
| 150 |
} |
| 151 |
} |
| 152 |
return 0; |
| 153 |
} |
| 154 |
|
| 155 |
#endif |