• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revision4622eaaf58dded56e9bffed81baf77d2e062754f (tree)
Time2008-12-28 17:33:05
Authorhenoheno <henoheno>
Commiterhenoheno

Log Message

$Id: spam.php,v 1.207 2008/12/27 15:21:41 henoheno Exp $

* delimiter_reverse(): Return FALSE with invalid argument. Added test cases
* is_ip(): IPv6 (rough)
* get_blocklist(): Added a comment, and blank lines
* get_blocklist_add(): Regex separator '/' => '#'

Change Summary

Incremental Difference

--- a/lib/spam.php
+++ b/lib/spam.php
@@ -1,5 +1,5 @@
11 <?php
2-// $Id: spam.php,v 1.32 2007/08/18 14:47:32 henoheno Exp $
2+// $Id: spam.php,v 1.33 2008/12/28 08:33:05 henoheno Exp $
33 // Copyright (C) 2006-2007 PukiWiki Developers Team
44 // License: GPL v2 or (at your option) any later version
55 //
@@ -70,13 +70,46 @@ function var_export_shrink($expression, $return = FALSE, $ignore_numeric_keys =
7070 }
7171 }
7272
73+// Data structure: Create an array they _refer_only_one_ value
74+function one_value_array($num = 0, $value = NULL)
75+{
76+ $num = max(0, intval($num));
77+ $array = array();
78+
79+ for ($i = 0; $i < $num; $i++) {
80+ $array[] = & $value;
81+ }
82+
83+ return $array;
84+}
85+
7386 // Reverse $string with specified delimiter
74-function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = '.')
87+function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = NULL)
7588 {
76- if (! is_string($string) || ! is_string($from_delim) || ! is_string($to_delim))
77- return $string;
89+ $to_null = ($to_delim === NULL);
7890
79- // com.example.bar.foo
91+ if (! is_string($from_delim) || (! $to_null && ! is_string($to_delim))) {
92+ return FALSE;
93+ }
94+ if (is_array($string)) {
95+ // Map, Recurse
96+ $count = count($string);
97+ $from = one_value_array($count, $from_delim);
98+ if ($to_null) {
99+ // Note: array_map() vanishes all keys
100+ return array_map('delimiter_reverse', $string, $from);
101+ } else {
102+ $to = one_value_array($count, $to_delim);
103+ // Note: array_map() vanishes all keys
104+ return array_map('delimiter_reverse', $string, $from, $to);
105+ }
106+ }
107+ if (! is_string($string)) {
108+ return FALSE;
109+ }
110+
111+ // Returns com.example.bar.foo
112+ if ($to_null) $to_delim = & $from_delim;
80113 return implode($to_delim, array_reverse(explode($from_delim, $string)));
81114 }
82115
@@ -85,13 +118,18 @@ function ksort_by_domain(& $array)
85118 {
86119 $sort = array();
87120 foreach(array_keys($array) as $key) {
88- $sort[delimiter_reverse($key)] = $key;
121+ $reversed = delimiter_reverse($key);
122+ if ($reversed !== FALSE) {
123+ $sort[$reversed] = $key;
124+ }
89125 }
90126 ksort($sort, SORT_STRING);
127+
91128 $result = array();
92129 foreach($sort as $key) {
93130 $result[$key] = & $array[$key];
94131 }
132+
95133 $array = $result;
96134 }
97135
@@ -317,8 +355,10 @@ function generate_host_regex($string = '', $divider = '/')
317355 {
318356 if (! is_string($string)) return '';
319357
320- if (mb_strpos($string, '.') === FALSE)
358+ if (mb_strpos($string, '.') === FALSE) {
359+ // localhost
321360 return generate_glob_regex($string, $divider);
361+ }
322362
323363 if (is_ip($string)) {
324364 // IPv4
@@ -327,10 +367,13 @@ function generate_host_regex($string = '', $divider = '/')
327367 // FQDN or something
328368 $part = explode('.', $string, 2);
329369 if ($part[0] == '') {
330- $part[0] = '(?:.*\.)?'; // And all related FQDN
370+ // .example.org
371+ $part[0] = '(?:.*\.)?';
331372 } else if ($part[0] == '*') {
332- $part[0] = '.*\.'; // All subdomains/hosts only
373+ // *.example.org
374+ $part[0] = '.*\.';
333375 } else {
376+ // example.org, etc
334377 return generate_glob_regex($string, $divider);
335378 }
336379 $part[1] = generate_glob_regex($part[1], $divider);
@@ -339,20 +382,26 @@ function generate_host_regex($string = '', $divider = '/')
339382 }
340383
341384 // Rough hostname checker
342-// [OK] 192.168.
343-// TODO: Strict digit, 0x, CIDR, IPv6
385+// TODO: Strict digit, 0x, CIDR, '999.999.999.999', ':', '::G'
344386 function is_ip($string = '')
345387 {
388+ if (! is_string($string)) return FALSE;
389+
390+ if (strpos($string, ':') !== FALSE) {
391+ return 6; // Seems IPv6
392+ }
393+
346394 if (preg_match('/^' .
347395 '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' .
348- '(?:[0-9]{1,3}\.){1,3}' . '$/',
396+ '(?:[0-9]{1,3}\.){1,3}' . '$/',
349397 $string)) {
350398 return 4; // Seems IPv4(dot-decimal)
351- } else {
352- return 0; // Seems not IP
353399 }
400+
401+ return FALSE; // Seems not IP
354402 }
355403
404+// Load SPAM_INI_FILE and return parsed one
356405 function get_blocklist($list = '')
357406 {
358407 static $regexes;
@@ -366,6 +415,7 @@ function get_blocklist($list = '')
366415 $regexes = array();
367416 if (file_exists(SPAM_INI_FILE)) {
368417 $blocklist = array();
418+
369419 include(SPAM_INI_FILE);
370420 // $blocklist['list'] = array(
371421 // //'goodhost' => FALSE;
@@ -375,11 +425,19 @@ function get_blocklist($list = '')
375425 // '*.blogspot.com', // Blog services's subdomains (only)
376426 // 'IANA-examples' => '#^(?:.*\.)?example\.(?:com|net|org)$#',
377427 // );
378- foreach(array('pre', 'list') as $special) {
428+
429+ foreach(array(
430+ 'pre',
431+ 'list',
432+ ) as $special) {
433+
379434 if (! isset($blocklist[$special])) continue;
435+
380436 $regexes[$special] = $blocklist[$special];
437+
381438 foreach(array_keys($blocklist[$special]) as $_list) {
382439 if (! isset($blocklist[$_list])) continue;
440+
383441 foreach ($blocklist[$_list] as $key => $value) {
384442 if (is_array($value)) {
385443 $regexes[$_list][$key] = array();
@@ -390,6 +448,7 @@ function get_blocklist($list = '')
390448 get_blocklist_add($regexes[$_list], $key, $value);
391449 }
392450 }
451+
393452 unset($blocklist[$_list]);
394453 }
395454 }
@@ -397,21 +456,21 @@ function get_blocklist($list = '')
397456 }
398457
399458 if ($list === '') {
400- return $regexes; // ALL
459+ return $regexes; // ALL of
401460 } else if (isset($regexes[$list])) {
402- return $regexes[$list];
461+ return $regexes[$list]; // A part of
403462 } else {
404- return array();
463+ return array(); // Found nothing
405464 }
406465 }
407466
408-// Subroutine of get_blocklist()
409-function get_blocklist_add(& $array, $key = 0, $value = '*.example.org')
467+// Subroutine of get_blocklist(): Add new regex to the $array
468+function get_blocklist_add(& $array, $key = 0, $value = '*.example.org/path/to/file.html')
410469 {
411470 if (is_string($key)) {
412- $array[$key] = & $value; // Treat $value as a regex
471+ $array[$key] = & $value; // Treat $value as a regex for FQDN(host)s
413472 } else {
414- $array[$value] = '/^' . generate_host_regex($value, '/') . '$/i';
473+ $array[$value] = '#^' . generate_host_regex($value, '#') . '$#i';
415474 }
416475 }
417476