• R/O
  • SSH
  • HTTPS

shm-rss: Commit


Commit MetaInfo

Revision5 (tree)
Time2014-05-08 20:07:59
Authortamomo

Log Message

Tried to make it easier to read...

Change Summary

Incremental Difference

--- trunk/shm.rb (revision 4)
+++ trunk/shm.rb (revision 5)
@@ -3,58 +3,60 @@
33 require 'open-uri'
44 require 'rss'
55
6-debug = true if ARGV[0] == "-d"
6+debug = true if ARGV[0] == '-d'
77
88 url = 'http://www.st.ryukoku.ac.jp/~kjm/security/memo/'
9-url = './index.html' if debug
10-puts "opening #{url}" if debug
11-doc = Nokogiri::HTML(open(url))
9+url = './index.html' if debug ### 何度もアクセスすると悪いので
10+ttl = '60' ### cron の設定に合わせて分単位で指定
11+out = 'shm.rss'
1212
13-### FIXME: any better ways there?
14-doc.css('a[href^="/~kjm/"]').each do |anc|
15- anc['href'] = 'http://www.st.ryukoku.ac.jp' + anc['href']
16- puts "prefixed: #{anc['href']}" if debug
17-end
13+open(url) do |html|
14+ doc = Nokogiri::HTML(html) ### 最悪でも new するのかな
1815
19-### Which version should we use?
20-rss = RSS::Maker.make("2.0") do |xml|
21- xml.channel.title = doc.title
22- xml.channel.link = url
23- xml.channel.description = doc.css('div.NORMAL').first.children
24- p xml.channel if debug
16+ ### 相対パスを絶対パスに。格好いい方法ないのかな
17+ doc.css('a[href^="/~kjm/"]').each do |anc|
18+ anc['href'] = 'http://www.st.ryukoku.ac.jp' + anc['href']
19+ puts "prefixed: #{anc['href']}" if debug
20+ end
2521
26- doc.css('a.NU').each do |link|
27- next if link.parent.name == "h2"
28- puts "processing: #{link}" if debug
29- i = xml.items.new_item
30- ### a"》", span" ", content
31- i.title = link.next.next.content
32- i.link = link['href']
33- if link.parent.name == "p"
34- ### Normal short items
35- i.description = link.parent.parent.children
36- elsif link.parent.name == "h3"
37- ### "various", "tuiki" etc
38- i.description = link.parent.next.next
39- else
40- i.description = "Something wrong"
22+ rss = RSS::Maker.make('2.0') do |xml|
23+ xml.channel.title = doc.title
24+ xml.channel.link = url
25+ p xml.channel if debug
26+
27+ ### 「追いかけてみるテストです」のあたりにしてみた
28+ xml.channel.description = doc.css('div.NORMAL').first.children
29+
30+ doc.css('a.NU').each do |link|
31+ next if link.parent.name == "h2" ### その中にまた a.NU がある
32+
33+ puts "processing: #{link}" if debug
34+ i = xml.items.new_item
35+ ### "》" の次が空 span で、その次がリンクかな
36+ i.title = link.next.next.content
37+ i.link = link['href']
38+ if link.parent.name == 'p' ### 大部分の一行もの
39+ i.description = link.parent.parent.children
40+ elsif link.parent.name == 'h3' ### 「いろいろ」とか「追記」
41+ i.description = link.parent.next.next
42+ else
43+ i.description = '(HTML のパースに失敗しました)'
44+ end
45+ ### アンカーから日付だけ取得するハック
46+ i.date = Time.parse(/#([0-9]{8})/.match(link['href'])[1])
47+
48+ if debug
49+ puts " #{link.parent.name}: Title: #{i.title}"
50+ puts " Link: #{i.link}"
51+ puts " Date: #{i.date}"
52+ puts "" ### description は長いから出力しない
53+ end
4154 end
42- i.date = Time.parse(/#([0-9]{8})/.match(link['href'])[1])
4355
44- if debug
45- puts " #{link.parent.name}: Title: #{i.title}"
46- puts " Link: #{i.link}"
47- puts " Date: #{i.date}"
48- puts "" ### description is too long to put here
49- end
56+ xml.channel.ttl = ttl
5057 end
5158
52- ### TTL depends on your cron settings
53- xml.channel.ttl = "60" ### (in minutes)
59+ File.open(out, 'w') do |f|
60+ f.write(rss.to_s)
61+ end
5462 end
55-
56-### Lazy: should check before writing
57-File.open("shm.rss", "w") do |f|
58- f.write(rss.to_s)
59-end
60-
Show on old repository browser