Revision | 4622eaaf58dded56e9bffed81baf77d2e062754f (tree) |
---|---|
Time | 2008-12-28 17:33:05 |
Author | henoheno <henoheno> |
Commiter | henoheno |
$Id: spam.php,v 1.207 2008/12/27 15:21:41 henoheno Exp $
* delimiter_reverse(): Return FALSE with invalid argument. Added test cases
* is_ip(): IPv6 (rough)
* get_blocklist(): Added a comment, and blank lines
* get_blocklist_add(): Regex separator '/' => '#'
@@ -1,5 +1,5 @@ | ||
1 | 1 | <?php |
2 | -// $Id: spam.php,v 1.32 2007/08/18 14:47:32 henoheno Exp $ | |
2 | +// $Id: spam.php,v 1.33 2008/12/28 08:33:05 henoheno Exp $ | |
3 | 3 | // Copyright (C) 2006-2007 PukiWiki Developers Team |
4 | 4 | // License: GPL v2 or (at your option) any later version |
5 | 5 | // |
@@ -70,13 +70,46 @@ function var_export_shrink($expression, $return = FALSE, $ignore_numeric_keys = | ||
70 | 70 | } |
71 | 71 | } |
72 | 72 | |
73 | +// Data structure: Create an array they _refer_only_one_ value | |
74 | +function one_value_array($num = 0, $value = NULL) | |
75 | +{ | |
76 | + $num = max(0, intval($num)); | |
77 | + $array = array(); | |
78 | + | |
79 | + for ($i = 0; $i < $num; $i++) { | |
80 | + $array[] = & $value; | |
81 | + } | |
82 | + | |
83 | + return $array; | |
84 | +} | |
85 | + | |
73 | 86 | // Reverse $string with specified delimiter |
74 | -function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = '.') | |
87 | +function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = NULL) | |
75 | 88 | { |
76 | - if (! is_string($string) || ! is_string($from_delim) || ! is_string($to_delim)) | |
77 | - return $string; | |
89 | + $to_null = ($to_delim === NULL); | |
78 | 90 | |
79 | - // com.example.bar.foo | |
91 | + if (! is_string($from_delim) || (! $to_null && ! is_string($to_delim))) { | |
92 | + return FALSE; | |
93 | + } | |
94 | + if (is_array($string)) { | |
95 | + // Map, Recurse | |
96 | + $count = count($string); | |
97 | + $from = one_value_array($count, $from_delim); | |
98 | + if ($to_null) { | |
99 | + // Note: array_map() vanishes all keys | |
100 | + return array_map('delimiter_reverse', $string, $from); | |
101 | + } else { | |
102 | + $to = one_value_array($count, $to_delim); | |
103 | + // Note: array_map() vanishes all keys | |
104 | + return array_map('delimiter_reverse', $string, $from, $to); | |
105 | + } | |
106 | + } | |
107 | + if (! is_string($string)) { | |
108 | + return FALSE; | |
109 | + } | |
110 | + | |
111 | + // Returns com.example.bar.foo | |
112 | + if ($to_null) $to_delim = & $from_delim; | |
80 | 113 | return implode($to_delim, array_reverse(explode($from_delim, $string))); |
81 | 114 | } |
82 | 115 |
@@ -85,13 +118,18 @@ function ksort_by_domain(& $array) | ||
85 | 118 | { |
86 | 119 | $sort = array(); |
87 | 120 | foreach(array_keys($array) as $key) { |
88 | - $sort[delimiter_reverse($key)] = $key; | |
121 | + $reversed = delimiter_reverse($key); | |
122 | + if ($reversed !== FALSE) { | |
123 | + $sort[$reversed] = $key; | |
124 | + } | |
89 | 125 | } |
90 | 126 | ksort($sort, SORT_STRING); |
127 | + | |
91 | 128 | $result = array(); |
92 | 129 | foreach($sort as $key) { |
93 | 130 | $result[$key] = & $array[$key]; |
94 | 131 | } |
132 | + | |
95 | 133 | $array = $result; |
96 | 134 | } |
97 | 135 |
@@ -317,8 +355,10 @@ function generate_host_regex($string = '', $divider = '/') | ||
317 | 355 | { |
318 | 356 | if (! is_string($string)) return ''; |
319 | 357 | |
320 | - if (mb_strpos($string, '.') === FALSE) | |
358 | + if (mb_strpos($string, '.') === FALSE) { | |
359 | + // localhost | |
321 | 360 | return generate_glob_regex($string, $divider); |
361 | + } | |
322 | 362 | |
323 | 363 | if (is_ip($string)) { |
324 | 364 | // IPv4 |
@@ -327,10 +367,13 @@ function generate_host_regex($string = '', $divider = '/') | ||
327 | 367 | // FQDN or something |
328 | 368 | $part = explode('.', $string, 2); |
329 | 369 | if ($part[0] == '') { |
330 | - $part[0] = '(?:.*\.)?'; // And all related FQDN | |
370 | + // .example.org | |
371 | + $part[0] = '(?:.*\.)?'; | |
331 | 372 | } else if ($part[0] == '*') { |
332 | - $part[0] = '.*\.'; // All subdomains/hosts only | |
373 | + // *.example.org | |
374 | + $part[0] = '.*\.'; | |
333 | 375 | } else { |
376 | + // example.org, etc | |
334 | 377 | return generate_glob_regex($string, $divider); |
335 | 378 | } |
336 | 379 | $part[1] = generate_glob_regex($part[1], $divider); |
@@ -339,20 +382,26 @@ function generate_host_regex($string = '', $divider = '/') | ||
339 | 382 | } |
340 | 383 | |
341 | 384 | // Rough hostname checker |
342 | -// [OK] 192.168. | |
343 | -// TODO: Strict digit, 0x, CIDR, IPv6 | |
385 | +// TODO: Strict digit, 0x, CIDR, '999.999.999.999', ':', '::G' | |
344 | 386 | function is_ip($string = '') |
345 | 387 | { |
388 | + if (! is_string($string)) return FALSE; | |
389 | + | |
390 | + if (strpos($string, ':') !== FALSE) { | |
391 | + return 6; // Seems IPv6 | |
392 | + } | |
393 | + | |
346 | 394 | if (preg_match('/^' . |
347 | 395 | '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . |
348 | - '(?:[0-9]{1,3}\.){1,3}' . '$/', | |
396 | + '(?:[0-9]{1,3}\.){1,3}' . '$/', | |
349 | 397 | $string)) { |
350 | 398 | return 4; // Seems IPv4(dot-decimal) |
351 | - } else { | |
352 | - return 0; // Seems not IP | |
353 | 399 | } |
400 | + | |
401 | + return FALSE; // Seems not IP | |
354 | 402 | } |
355 | 403 | |
404 | +// Load SPAM_INI_FILE and return parsed one | |
356 | 405 | function get_blocklist($list = '') |
357 | 406 | { |
358 | 407 | static $regexes; |
@@ -366,6 +415,7 @@ function get_blocklist($list = '') | ||
366 | 415 | $regexes = array(); |
367 | 416 | if (file_exists(SPAM_INI_FILE)) { |
368 | 417 | $blocklist = array(); |
418 | + | |
369 | 419 | include(SPAM_INI_FILE); |
370 | 420 | // $blocklist['list'] = array( |
371 | 421 | // //'goodhost' => FALSE; |
@@ -375,11 +425,19 @@ function get_blocklist($list = '') | ||
375 | 425 | // '*.blogspot.com', // Blog services's subdomains (only) |
376 | 426 | // 'IANA-examples' => '#^(?:.*\.)?example\.(?:com|net|org)$#', |
377 | 427 | // ); |
378 | - foreach(array('pre', 'list') as $special) { | |
428 | + | |
429 | + foreach(array( | |
430 | + 'pre', | |
431 | + 'list', | |
432 | + ) as $special) { | |
433 | + | |
379 | 434 | if (! isset($blocklist[$special])) continue; |
435 | + | |
380 | 436 | $regexes[$special] = $blocklist[$special]; |
437 | + | |
381 | 438 | foreach(array_keys($blocklist[$special]) as $_list) { |
382 | 439 | if (! isset($blocklist[$_list])) continue; |
440 | + | |
383 | 441 | foreach ($blocklist[$_list] as $key => $value) { |
384 | 442 | if (is_array($value)) { |
385 | 443 | $regexes[$_list][$key] = array(); |
@@ -390,6 +448,7 @@ function get_blocklist($list = '') | ||
390 | 448 | get_blocklist_add($regexes[$_list], $key, $value); |
391 | 449 | } |
392 | 450 | } |
451 | + | |
393 | 452 | unset($blocklist[$_list]); |
394 | 453 | } |
395 | 454 | } |
@@ -397,21 +456,21 @@ function get_blocklist($list = '') | ||
397 | 456 | } |
398 | 457 | |
399 | 458 | if ($list === '') { |
400 | - return $regexes; // ALL | |
459 | + return $regexes; // ALL of | |
401 | 460 | } else if (isset($regexes[$list])) { |
402 | - return $regexes[$list]; | |
461 | + return $regexes[$list]; // A part of | |
403 | 462 | } else { |
404 | - return array(); | |
463 | + return array(); // Found nothing | |
405 | 464 | } |
406 | 465 | } |
407 | 466 | |
408 | -// Subroutine of get_blocklist() | |
409 | -function get_blocklist_add(& $array, $key = 0, $value = '*.example.org') | |
467 | +// Subroutine of get_blocklist(): Add new regex to the $array | |
468 | +function get_blocklist_add(& $array, $key = 0, $value = '*.example.org/path/to/file.html') | |
410 | 469 | { |
411 | 470 | if (is_string($key)) { |
412 | - $array[$key] = & $value; // Treat $value as a regex | |
471 | + $array[$key] = & $value; // Treat $value as a regex for FQDN(host)s | |
413 | 472 | } else { |
414 | - $array[$value] = '/^' . generate_host_regex($value, '/') . '$/i'; | |
473 | + $array[$value] = '#^' . generate_host_regex($value, '#') . '$#i'; | |
415 | 474 | } |
416 | 475 | } |
417 | 476 |