| 1 |
/* |
| 2 |
* Copyright (C) 2009-2014 TSUBAKIMOTO Hiroya <z0rac@users.sourceforge.jp> |
| 3 |
* |
| 4 |
* This software comes with ABSOLUTELY NO WARRANTY; for details of |
| 5 |
* the license terms, see the LICENSE.txt file included with the program. |
| 6 |
*/ |
| 7 |
#include "mailbox.h" |
| 8 |
#include "win32.h" |
| 9 |
#include <cassert> |
| 10 |
#include <cctype> |
| 11 |
|
| 12 |
#ifdef _DEBUG |
| 13 |
#include <iostream> |
| 14 |
#define DBG(s) s |
| 15 |
#define LOG(s) (cout << s) |
| 16 |
#else |
| 17 |
#define DBG(s) |
| 18 |
#define LOG(s) |
| 19 |
#endif |
| 20 |
|
| 21 |
#if USE_ICONV |
| 22 |
/** u8conv - iconv wrapper |
| 23 |
*/ |
| 24 |
namespace { |
| 25 |
extern "C" { |
| 26 |
typedef void* iconv_t; |
| 27 |
typedef iconv_t (*libiconv_open)(const char*, const char*); |
| 28 |
typedef size_t (*libiconv)(iconv_t, const char**, size_t*, char**, size_t*); |
| 29 |
typedef int (*libiconv_close)(iconv_t); |
| 30 |
} |
| 31 |
#define FUNC(name) name(_dll(#name)) |
| 32 |
|
| 33 |
class u8conv { |
| 34 |
static win32::dll _dll; |
| 35 |
iconv_t _cd; |
| 36 |
string _charset; |
| 37 |
libiconv_open _open; |
| 38 |
libiconv_close _close; |
| 39 |
libiconv _iconv; |
| 40 |
public: |
| 41 |
u8conv() : _cd(iconv_t(-1)), _open(NULL) {} |
| 42 |
~u8conv() { if (*this) FUNC(libiconv_close)(_cd); } |
| 43 |
u8conv& charset(const string& charset); |
| 44 |
u8conv& reset(); |
| 45 |
operator bool() const { return _cd != iconv_t(-1); } |
| 46 |
string operator()(const string& text); |
| 47 |
}; |
| 48 |
win32::dll u8conv::_dll("iconv.dll"); |
| 49 |
} |
| 50 |
|
| 51 |
u8conv& |
| 52 |
u8conv::charset(const string& charset) |
| 53 |
{ |
| 54 |
if (_dll) { |
| 55 |
if (!_open) { |
| 56 |
_iconv = FUNC(libiconv); |
| 57 |
_close = FUNC(libiconv_close); |
| 58 |
_open = FUNC(libiconv_open); |
| 59 |
} |
| 60 |
if (!charset.empty() && charset != _charset) { |
| 61 |
if (_cd != iconv_t(-1)) _close(_cd), _cd = iconv_t(-1); |
| 62 |
_charset = charset; |
| 63 |
_cd = _open("UTF-8", charset.c_str()); |
| 64 |
} |
| 65 |
} |
| 66 |
return *this; |
| 67 |
} |
| 68 |
|
| 69 |
u8conv& |
| 70 |
u8conv::reset() |
| 71 |
{ |
| 72 |
if (_cd != iconv_t(-1)) _iconv(_cd, NULL, NULL, NULL, NULL); |
| 73 |
return *this; |
| 74 |
} |
| 75 |
|
| 76 |
string |
| 77 |
u8conv::operator()(const string& text) |
| 78 |
{ |
| 79 |
if (!*this) throw text; |
| 80 |
string result; |
| 81 |
const char* in = text.c_str(); |
| 82 |
size_t inlen = text.size(); |
| 83 |
size_t ret; |
| 84 |
do { |
| 85 |
char buf[128]; |
| 86 |
char* out = buf; |
| 87 |
size_t outlen = sizeof(buf); |
| 88 |
ret = _iconv(_cd, &in, &inlen, &out, &outlen); |
| 89 |
if (outlen == sizeof(buf)) break; |
| 90 |
result.append(buf, sizeof(buf) - outlen); |
| 91 |
} while (ret == size_t(-1)); |
| 92 |
return result; |
| 93 |
} |
| 94 |
|
| 95 |
#undef FUNC |
| 96 |
#else // !USE_ICONV |
| 97 |
/** u8conv - convert multibyte text to UTF-8 |
| 98 |
*/ |
| 99 |
namespace { |
| 100 |
extern "C" { |
| 101 |
typedef HRESULT (WINAPI* ConvertINetMultiByteToUnicode) |
| 102 |
(LPDWORD, DWORD, LPCSTR, LPINT, LPWSTR, LPINT); |
| 103 |
} |
| 104 |
#define FUNC(name) name(_dll(#name, NULL)) |
| 105 |
|
| 106 |
class u8conv { |
| 107 |
static win32::dll _dll; |
| 108 |
static ConvertINetMultiByteToUnicode _mb2u; |
| 109 |
string _charset; |
| 110 |
UINT _codepage; |
| 111 |
DWORD _mode; |
| 112 |
public: |
| 113 |
u8conv() : _codepage(0) {} |
| 114 |
u8conv& charset(const string& charset); |
| 115 |
u8conv& reset() { _mode = 0; return *this; } |
| 116 |
operator bool() const { return _codepage != 0; } |
| 117 |
string operator()(const string& text); |
| 118 |
}; |
| 119 |
win32::dll u8conv::_dll("mlang.dll"); |
| 120 |
ConvertINetMultiByteToUnicode |
| 121 |
u8conv::_mb2u = FUNC(ConvertINetMultiByteToUnicode); |
| 122 |
#undef FUNC |
| 123 |
} |
| 124 |
|
| 125 |
extern unsigned codepage(const string&); |
| 126 |
|
| 127 |
u8conv& |
| 128 |
u8conv::charset(const string& charset) |
| 129 |
{ |
| 130 |
if (!charset.empty() && charset != _charset) { |
| 131 |
_charset = charset; |
| 132 |
_codepage = codepage(charset); |
| 133 |
_mode = 0; |
| 134 |
} |
| 135 |
return *this; |
| 136 |
} |
| 137 |
|
| 138 |
string |
| 139 |
u8conv::operator()(const string& text) |
| 140 |
{ |
| 141 |
if (!*this) throw text; |
| 142 |
if (!_mb2u) { |
| 143 |
win32::wstr ws(text, _codepage); |
| 144 |
if (!ws) throw text; |
| 145 |
return ws.mbstr(CP_UTF8); |
| 146 |
} |
| 147 |
int n = 0; |
| 148 |
if (_mb2u(&_mode, _codepage, text.c_str(), NULL, NULL, &n) != S_OK) throw text; |
| 149 |
win32::textbuf<WCHAR> buf(n + 1); |
| 150 |
_mb2u(&_mode, _codepage, text.c_str(), NULL, buf.data, &n); |
| 151 |
buf.data[n] = 0; |
| 152 |
return win32::wstr::mbstr(buf.data, CP_UTF8); |
| 153 |
} |
| 154 |
#endif // !USE_ICONV |
| 155 |
|
| 156 |
/* |
| 157 |
* Functions of the class mail. |
| 158 |
*/ |
| 159 |
bool |
| 160 |
mail::header(const string& headers) |
| 161 |
{ |
| 162 |
bool read = false; |
| 163 |
decoder de(headers); |
| 164 |
while (de) { |
| 165 |
switch (de.field("SUBJECT\0FROM\0DATE\0STATUS\0")) { |
| 166 |
case 0: _subject = decoder(de.field()).unstructured(); break; |
| 167 |
case 1: _from = decoder(de.field()).address(); break; |
| 168 |
case 2: _date = decoder(de.field()).date(); break; |
| 169 |
case 3: read = de.field().find_first_of('R') != string::npos; break; |
| 170 |
} |
| 171 |
} |
| 172 |
return read; |
| 173 |
} |
| 174 |
|
| 175 |
/* |
| 176 |
* Functions of the class mail::decoder |
| 177 |
*/ |
| 178 |
int |
| 179 |
mail::decoder::field(const char* names) |
| 180 |
{ |
| 181 |
_field.clear(); |
| 182 |
while (*this) { |
| 183 |
string::size_type i = findf(":\015"); |
| 184 |
if (i == string::npos) { |
| 185 |
_next = _s.size(); |
| 186 |
break; |
| 187 |
} |
| 188 |
if (_s[i] == ':') { |
| 189 |
string name = uppercase(i++); |
| 190 |
int n = 0; |
| 191 |
const char* np = names; |
| 192 |
for (; *np && name != np; np += strlen(np) + 1) ++n; |
| 193 |
if (*np) { |
| 194 |
_next = i; |
| 195 |
string s; |
| 196 |
do { |
| 197 |
i = _s.find("\015\012", _next); |
| 198 |
s.append(_s, _next, i - _next); |
| 199 |
_next = i == string::npos ? _s.size() : i + 2; |
| 200 |
} while (*this && (_s[_next] == ' ' || _s[_next] == '\t')); |
| 201 |
_field = trim(s); |
| 202 |
return n; |
| 203 |
} |
| 204 |
} |
| 205 |
do { |
| 206 |
i = _s.find("\015\012", i); |
| 207 |
i = i != string::npos ? i + 2 : _s.size(); |
| 208 |
} while (i < _s.size() && (_s[i] == ' ' || _s[i] == '\t')); |
| 209 |
_next = i; |
| 210 |
} |
| 211 |
return -1; |
| 212 |
} |
| 213 |
|
| 214 |
pair<string, string> |
| 215 |
mail::decoder::address() |
| 216 |
{ |
| 217 |
string line; |
| 218 |
bool first = true; |
| 219 |
string addr[4]; // $0 <$1> $2 ($3) |
| 220 |
int n = 0; |
| 221 |
while (*this) { |
| 222 |
string t = eword(findf("\"(,<>[\\")); |
| 223 |
line += t; |
| 224 |
if (first) addr[n] += t; |
| 225 |
t = token(true); |
| 226 |
if (t.empty()) break; |
| 227 |
line += t[0] == '(' || t[0] == '"' ? eword(t, false) : t; |
| 228 |
if (first) { |
| 229 |
switch (t[0]) { |
| 230 |
case ',': first = false; break; |
| 231 |
case '<': n = 1; break; |
| 232 |
case '>': n = 2; break; |
| 233 |
case '[': addr[n] += t; break; |
| 234 |
case '(': addr[3] = eword(t.assign(t, 1, t.size() - 2), true); break; |
| 235 |
case '"': addr[n] += eword(t.assign(t, 1, t.size() - 2), true); break; |
| 236 |
case '\\': addr[n] += t.substr(1); break; |
| 237 |
} |
| 238 |
} |
| 239 |
} |
| 240 |
for (int i = 0; i < 4; ++i) addr[i] = trim(addr[i]); |
| 241 |
n = (addr[1].empty() ? (addr[3].empty() ? 0 : 3) : |
| 242 |
addr[0].empty() ? (addr[3].empty() ? 1 : 3) : 0); |
| 243 |
return pair<string, string>(addr[n], line); |
| 244 |
} |
| 245 |
|
| 246 |
time_t |
| 247 |
mail::decoder::date() |
| 248 |
{ |
| 249 |
struct tm tms = { 0 }; |
| 250 |
{ |
| 251 |
string day = token(), month = token(); |
| 252 |
if (month == ",") day = token(), month = token(); |
| 253 |
if (!digit(day, tms.tm_mday)) return time_t(-1); |
| 254 |
static const char mn[][4] = { |
| 255 |
"Jan", "Feb", "Mar", "Apr", "May", "Jun", |
| 256 |
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec" |
| 257 |
}; |
| 258 |
int i = sizeof(mn) / sizeof(*mn); |
| 259 |
while (i-- && month != mn[i]) continue; |
| 260 |
if (i < 0) return time_t(-1); |
| 261 |
tms.tm_mon = i; |
| 262 |
} |
| 263 |
if (!digit(token(), tms.tm_year)) return time_t(-1); |
| 264 |
tms.tm_year -= 1900; |
| 265 |
if (!digit(token(), tms.tm_hour) || |
| 266 |
token() != ":" || |
| 267 |
!digit(token(), tms.tm_min)) return time_t(-1); |
| 268 |
string zone = token(); |
| 269 |
if (zone == ":") { |
| 270 |
if (!digit(token(), tms.tm_sec)) return time_t(-1); |
| 271 |
zone = token(); |
| 272 |
} |
| 273 |
|
| 274 |
time_t gmt = mktime(&tms); |
| 275 |
if (gmt == time_t(-1)) return time_t(-1); |
| 276 |
struct tm* gm = gmtime(&gmt); |
| 277 |
if (!gm) return time_t(-1); |
| 278 |
gmt += tms.tm_sec - gm->tm_sec; |
| 279 |
gmt += (tms.tm_min - gm->tm_min) * 60; |
| 280 |
gmt += (tms.tm_hour - gm->tm_hour) * 3600; |
| 281 |
if (tms.tm_mday != gm->tm_mday) gmt += 86400; |
| 282 |
|
| 283 |
int delta = 0; |
| 284 |
if (!zone.empty()) { |
| 285 |
if (zone[0] == '+' || zone[0] == '-') { |
| 286 |
if (!digit(zone.substr(1), delta)) return time_t(-1); |
| 287 |
if (zone[0] == '-') delta = -delta; |
| 288 |
} else if (zone.size() == 3 && zone[2] == 'T' && |
| 289 |
(zone[1] == 'S' || zone[1] == 'D')) { |
| 290 |
static const char z[] = "ECMP"; |
| 291 |
for (int i = 0; !delta && z[i]; ++i) { |
| 292 |
if (zone[0] == z[i]) delta = (i + int(zone[1] == 'S') + 4) * -100; |
| 293 |
} |
| 294 |
} |
| 295 |
} |
| 296 |
return gmt - (delta / 100 * 60 + delta % 100) * 60; |
| 297 |
} |
| 298 |
|
| 299 |
string |
| 300 |
mail::decoder::trim(const string& text) |
| 301 |
{ |
| 302 |
static const char ws[] = " \t"; |
| 303 |
string::size_type i = text.find_first_not_of(ws); |
| 304 |
return i == string::npos ? string() : |
| 305 |
text.substr(i, text.find_last_not_of(ws) - i + 1); |
| 306 |
} |
| 307 |
|
| 308 |
string |
| 309 |
mail::decoder::eword(string::size_type to) |
| 310 |
{ |
| 311 |
string::size_type i = _next; |
| 312 |
return i < to ? eword(_s, i, _next = min(to, _s.size())) : string(); |
| 313 |
} |
| 314 |
|
| 315 |
string |
| 316 |
mail::decoder::eword(const string& text, bool unescape) |
| 317 |
{ |
| 318 |
string result; |
| 319 |
for (string::size_type i = 0; i < text.size();) { |
| 320 |
string::size_type n = text.find_first_of('\\', i); |
| 321 |
result += eword(text, i, n); |
| 322 |
if (n == string::npos) break; |
| 323 |
i = n < text.size() - 1 ? n + 2 : text.size(); |
| 324 |
if (unescape) ++n; |
| 325 |
result.append(text, n, i - n); |
| 326 |
} |
| 327 |
return result; |
| 328 |
} |
| 329 |
|
| 330 |
string |
| 331 |
mail::decoder::eword(const string& text, |
| 332 |
string::size_type pos, string::size_type end) |
| 333 |
{ |
| 334 |
u8conv conv; |
| 335 |
string result; |
| 336 |
if (end > text.size()) end = text.size(); |
| 337 |
for (string::size_type i = pos; i < end;) { |
| 338 |
i = text.find("=?", i); |
| 339 |
if (i >= end) break; |
| 340 |
i += 2; |
| 341 |
string::size_type q[3]; |
| 342 |
{ |
| 343 |
string::size_type p = i; |
| 344 |
int n = 0; |
| 345 |
for (; n < 3; ++n) { |
| 346 |
q[n] = p; |
| 347 |
p = text.find_first_of("? \t", p); |
| 348 |
if (p >= end || text[p++] != '?') break; |
| 349 |
} |
| 350 |
if (n < 3 || p == end || text[p++] != '=') continue; |
| 351 |
i = p; |
| 352 |
} |
| 353 |
int c = toupper(text[q[1]]); |
| 354 |
if (q[2] - q[1] != 2 || (c != 'B' && c != 'Q')) continue; |
| 355 |
if (text.find_first_not_of(" \t", pos) < q[0] - 2) { |
| 356 |
result.append(text, pos, q[0] - pos - 2), pos = q[0] - 2; |
| 357 |
conv.reset(); |
| 358 |
} |
| 359 |
try { |
| 360 |
string s(text, q[2], i - q[2] - 2); |
| 361 |
s = (c == 'B' ? decodeB : decodeQ)(s); |
| 362 |
string cs = uppercase(text.substr(q[0], q[1] - q[0] - 1)); |
| 363 |
result += cs == "UTF-8" ? (conv.reset(), s) : conv.charset(cs)(s); |
| 364 |
pos = i; |
| 365 |
} catch (...) {} |
| 366 |
} |
| 367 |
if (pos < end) result.append(text, pos, end - pos); |
| 368 |
return result; |
| 369 |
} |
| 370 |
|
| 371 |
string |
| 372 |
mail::decoder::decodeB(const string& text) |
| 373 |
{ |
| 374 |
if (text.size() & 3) throw -1; |
| 375 |
string result; |
| 376 |
result.reserve((text.size() >> 2) * 3); |
| 377 |
for (const char* p = text.c_str(); *p; p += 4) { |
| 378 |
unsigned v = 0; |
| 379 |
int i; |
| 380 |
for (i = 0; i < 4; ++i) { |
| 381 |
static const char b64[] = |
| 382 |
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
| 383 |
const char* pos = strchr(b64, p[i]); |
| 384 |
if (!pos) break; |
| 385 |
v = (v << 6) | unsigned(pos - b64); |
| 386 |
} |
| 387 |
if (i < 4) { |
| 388 |
if (i < 2 || p[4]) throw -1; |
| 389 |
for (int t = i; t < 4; ++t) { |
| 390 |
if (p[t] != '=') throw -1; |
| 391 |
v <<= 6; |
| 392 |
} |
| 393 |
} |
| 394 |
char b[] = { char(v >> 16), char(v >> 8), char(v) }; |
| 395 |
result.append(b, i - 1); |
| 396 |
}; |
| 397 |
return result; |
| 398 |
} |
| 399 |
|
| 400 |
string |
| 401 |
mail::decoder::decodeQ(const string& text) |
| 402 |
{ |
| 403 |
string result; |
| 404 |
for (string::size_type i = 0; i < text.size();) { |
| 405 |
string::size_type t = i; |
| 406 |
i = text.find_first_of("_=", i); |
| 407 |
if (i != t) result.append(text, t, i - t); |
| 408 |
if (i == string::npos) break; |
| 409 |
char c = char(0x20); |
| 410 |
if (text[i++] == '=') { |
| 411 |
if (i >= text.size() - 1) throw -1; |
| 412 |
char s[] = { text[i], text[i + 1], 0 }; |
| 413 |
char* e; |
| 414 |
c = char(strtoul(s, &e, 16)); |
| 415 |
if (e != s + 2) throw -1; |
| 416 |
i += 2; |
| 417 |
} |
| 418 |
result += c; |
| 419 |
} |
| 420 |
return result; |
| 421 |
} |
| 422 |
|
| 423 |
string |
| 424 |
mail::decoder::token(bool comment) |
| 425 |
{ |
| 426 |
while (*this) { |
| 427 |
string::size_type i = _s.find_first_not_of(" \t", _next); |
| 428 |
if (i == string::npos) { |
| 429 |
_next = _s.size(); |
| 430 |
break; |
| 431 |
} |
| 432 |
string::size_type n = findf(" \t\"(),.:;<>@[\\]", i); |
| 433 |
if (n == i || (n != string::npos && _s[n] == '.')) { |
| 434 |
switch (_s[n]) { |
| 435 |
case '"': // quoted-text |
| 436 |
n = findq("\"", n + 1); |
| 437 |
break; |
| 438 |
case '(': // comment |
| 439 |
for (int nest = 1; nest;) { |
| 440 |
n = findq("()", n + 1); |
| 441 |
if (n == string::npos) break; |
| 442 |
nest += _s[n] == '(' ? 1 : -1; |
| 443 |
} |
| 444 |
break; |
| 445 |
case '.': // dot-atom |
| 446 |
n = findf(" \t\"(),:;<>@[\\]", n + 1); |
| 447 |
if (n != string::npos) --n; |
| 448 |
break; |
| 449 |
case '[': // domain |
| 450 |
n = findq("]", n + 1); |
| 451 |
break; |
| 452 |
case '\\': // escape |
| 453 |
++n; |
| 454 |
break; |
| 455 |
} |
| 456 |
if (n < _s.size()) ++n; |
| 457 |
} |
| 458 |
if (n == string::npos) n = _s.size(); |
| 459 |
_next = n; |
| 460 |
if (comment || _s[i] != '(') return _s.substr(i, n - i); |
| 461 |
} |
| 462 |
return string(); |
| 463 |
} |
| 464 |
|
| 465 |
/* |
| 466 |
* Functions of the class tokenizer |
| 467 |
*/ |
| 468 |
bool |
| 469 |
tokenizer::digit(const string& s, int& value) |
| 470 |
{ |
| 471 |
if (s.empty()) return false; |
| 472 |
char* e; |
| 473 |
value = strtol(s.c_str(), &e, 10); |
| 474 |
return !*e; |
| 475 |
} |
| 476 |
|
| 477 |
string |
| 478 |
tokenizer::uppercase(string s) |
| 479 |
{ |
| 480 |
for (string::iterator p = s.begin(); p != s.end(); ++p) { |
| 481 |
*p = static_cast<char>(toupper(*p)); |
| 482 |
} |
| 483 |
return s; |
| 484 |
} |
| 485 |
|
| 486 |
string::size_type |
| 487 |
tokenizer::findq(const char* s, string::size_type pos) const |
| 488 |
{ |
| 489 |
string delim = s; |
| 490 |
delim += '\\'; |
| 491 |
for (; pos < _s.size(); pos += 2) { |
| 492 |
pos = findf(delim.c_str(), pos); |
| 493 |
if (pos == string::npos || _s[pos] != '\\') return pos; |
| 494 |
} |
| 495 |
return string::npos; |
| 496 |
} |