• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

レコメンドIndexをScalaで


Commit MetaInfo

Revision1059eedb662cc5e753c0f819406b29100947e5c6 (tree)
Time2011-03-03 16:51:34
Authortachiki <tachiki@p-wi...>
Commitertachiki

Log Message

Blog本文のparserを追加

Change Summary

Incremental Difference

--- a/src/com/parrotstudio/recommend/ro/RecommendWorker.scala
+++ b/src/com/parrotstudio/recommend/ro/RecommendWorker.scala
@@ -5,7 +5,7 @@ import scala.actors.Actor
55 import com.parrotstudio.recommend.ro.model._
66
77 class RecommendWorker(val index: Int) extends Actor with RecommendMessage {
8- val parser = """.*<title>Angel, alone.*孤独な天使.*-(.*)-</title>.*<div id="more">(.*?)</div>.*""".r
8+ val parser = """.*<title>Angel, alone.*孤独な天使.*-(.*)-</title>.*<div class="_body">(.*?)</div>.*<div id="more">(.*?)</div>.*""".r
99
1010 def act {
1111 loop {
@@ -28,7 +28,7 @@ class RecommendWorker(val index: Int) extends Actor with RecommendMessage {
2828
2929 def parse(p: Page): Article = {
3030 p.page match {
31- case parser(title, ext) => Article(normalize(title), "本文", normalize(ext))
31+ case parser(title, body, ext) => Article(normalize(title), normalize(body), normalize(ext))
3232 case _ => new Article()
3333 }
3434 }
@@ -43,7 +43,33 @@ class RecommendWorker(val index: Int) extends Actor with RecommendMessage {
4343 }
4444
4545 private def normalize(text: String): String = {
46- text.trim.replaceAll("<br.*?/?>", "\n").replaceAll("<a.+href=.*?>", "").replaceAll("</a>", "")
46+ val funcs = List(
47+ replace_line_tag(_),
48+ remove_link_tag(_),
49+ remove_img_tag(_),
50+ remove_form_tag(_),
51+ replace_webclap_comment(_))
52+
53+ Function.chain(funcs)(text.trim)
54+ }
55+
56+ private def replace_line_tag(text: String): String = {
57+ text.replaceAll("<br.*?/?>", "\n")
58+ }
59+
60+ private def remove_link_tag(text: String): String = {
61+ text.replaceAll("<a.+href=.*?>", "").replaceAll("</a>", "")
4762 }
4863
64+ private def remove_img_tag(text: String): String = {
65+ text.replaceAll("<img.+src=.*?>", "")
66+ }
67+
68+ private def remove_form_tag(text: String): String = {
69+ text.replaceAll("<form.*?>", "").replaceAll("</form>", "")
70+ }
71+
72+ private def replace_webclap_comment(text: String): String = {
73+ text.replaceAll("<input type=submit value=(.*?)>", "$1")
74+ }
4975 }
\ No newline at end of file