ArcGETのソースコード
| Revision | 7fe9490c636e47251014ecfc87c7dc6fa6aa0ffc (tree) |
|---|---|
| Time | 2014-04-09 02:53:11 |
| Author | mikari <mikari@wolf...> |
| Commiter | mikari |
不要なコメント削除
| @@ -47,7 +47,7 @@ class ArcGET | ||
| 47 | 47 | @sleepTime = 20.0 |
| 48 | 48 | @commitCount = 16 |
| 49 | 49 | @savePath = if param[:path] then param[:path] else "./save" end |
| 50 | - @saveDB = if param[:db] then param[:db] else "./url.db3" end | |
| 50 | + @saveDB = if param[:db] then param[:db] else "./url.db3" end | |
| 51 | 51 | @pendingURI = param[:pendingURI] |
| 52 | 52 | @pendingURI = nil if @pendingURI.to_s == "" |
| 53 | 53 | @list = URLtable.new(param={:path => @savePath, :db => @saveDB}) |
| @@ -177,10 +177,7 @@ class ArcGET | ||
| 177 | 177 | end |
| 178 | 178 | |
| 179 | 179 | def check_root(add) |
| 180 | - #p add.url.to_s | |
| 181 | - #p add.referrer.to_s | |
| 182 | 180 | @rootExp.each do |exp| |
| 183 | - # p exp | |
| 184 | 181 | return true if exp.match(add.url.to_s) |
| 185 | 182 | return true if exp.match(add.referrer.to_s) |
| 186 | 183 | end |
| @@ -201,15 +198,11 @@ class ArcGET | ||
| 201 | 198 | def add_nextpage_sub(path, dat) |
| 202 | 199 | return unless path |
| 203 | 200 | |
| 204 | - #p path | |
| 205 | 201 | path = CGI::unescapeHTML(path.to_s) |
| 206 | 202 | return if /^(?:javascript|mailto|data|file|tel):/ni.match(path) |
| 207 | - #p "##" | |
| 208 | 203 | uri = dat.uri |
| 209 | 204 | begin |
| 210 | - # p path | |
| 211 | 205 | path = URI.parse( path.gsub(/[\x00-\x1F\x80-\xFF]/n){|x| '%'+x.unpack('H2')[0] } ) |
| 212 | - # p path | |
| 213 | 206 | rescue URI::InvalidURIError, URI::InvalidComponentError |
| 214 | 207 | # p "INV #{path}" |
| 215 | 208 | return |
| @@ -224,15 +217,11 @@ class ArcGET | ||
| 224 | 217 | if check_cgiroot(add) then |
| 225 | 218 | add.linkCountCGI = dat.linkCountCGI+1 |
| 226 | 219 | end |
| 227 | - ##add.message = path #for debug | |
| 228 | 220 | |
| 229 | - #p "L #{newuri} #{add.linkCount}:#{add.linkCountCGI}" | |
| 230 | 221 | return if add.linkCountCGI > @cgiMAXlink |
| 231 | 222 | |
| 232 | 223 | isroot = check_root(add) |
| 233 | - #p isroot | |
| 234 | 224 | if isroot and (add.linkCount <= @rootMAXlink) then |
| 235 | - # p "U" | |
| 236 | 225 | if @rootExp[0].match(add.url) or (@cgirootExp[0] and @cgirootExp[0].match(add.url)) then |
| 237 | 226 | add.priority = add.priority | 0x40000000 |
| 238 | 227 | end |
| @@ -244,31 +233,9 @@ class ArcGET | ||
| 244 | 233 | } |
| 245 | 234 | end |
| 246 | 235 | end |
| 247 | - #p path | |
| 248 | 236 | end |
| 249 | 237 | |
| 250 | 238 | def add_nextpage(dat,response) |
| 251 | -=begin | |
| 252 | - text = response.body | |
| 253 | - #p text | |
| 254 | - scriptmode = false | |
| 255 | - exp = /(<script|<\/script)\b|\b(?:href|src)(?:\s*=\s*"([^\x22]*)"|=([^\x22\x27> ]+))|\burl\(([^\x29]*)\)/ni | |
| 256 | - text.scan( exp ) do |t| | |
| 257 | - curr=t.shift | |
| 258 | - if curr=='<script' | |
| 259 | - scriptmode = true | |
| 260 | - end | |
| 261 | - if curr=='</script' | |
| 262 | - scriptmode = false | |
| 263 | - end | |
| 264 | - | |
| 265 | - if !scriptmode then | |
| 266 | - add_nextpage_sub(t[0], dat) | |
| 267 | - add_nextpage_sub(t[1], dat) | |
| 268 | - add_nextpage_sub(t[2], dat) | |
| 269 | - end | |
| 270 | - end | |
| 271 | -=end | |
| 272 | 239 | text = response.body |
| 273 | 240 | scriptmode = false |
| 274 | 241 | exp = /(<script|<\/script)\b|\b(href|src|value)(?:\s*=\s*"([^\x22]*)"|=([^\x22\x27> ]+))|\burl\(([^\x29]*)\)/ni |
| @@ -5,9 +5,6 @@ require 'fileutils' | ||
| 5 | 5 | require 'uri' |
| 6 | 6 | require 'pathname' |
| 7 | 7 | |
| 8 | -#TABLE_PATH = "./url.db3" | |
| 9 | - | |
| 10 | - | |
| 11 | 8 | |
| 12 | 9 | class URLtable |
| 13 | 10 | Waiting = 0 |
| @@ -345,10 +342,6 @@ SQL | ||
| 345 | 342 | read_sub(cond,data) |
| 346 | 343 | ) |
| 347 | 344 | end |
| 348 | -# sql = SELECT_SQLBODY | |
| 349 | -# sql += ' order by priority desc limit 1;' | |
| 350 | -# result = @db.query( sql, [] ) | |
| 351 | -# return notNil( toRow(result) ) | |
| 352 | 345 | end |
| 353 | 346 | |
| 354 | 347 | def rest |