レコメンドIndexをScalaで
| Revision | a04dc4d87185ba8294596d53814aa6574fa50508 (tree) |
|---|---|
| Time | 2011-02-24 16:16:01 |
| Author | tachiki <tachiki@p-wi...> |
| Commiter | tachiki |
URLから本文を切り出す部分の仮コードを追加
| @@ -0,0 +1,14 @@ | ||
| 1 | +package com.parrotstudio.recommend.ro | |
| 2 | + | |
| 3 | +import com.parrotstudio.recommend.ro.model._ | |
| 4 | + | |
| 5 | +object RecommendIndexer { | |
| 6 | + def main(args: Array[String]): Unit = { | |
| 7 | + val worker = new RecommendWorker(0) | |
| 8 | + worker.start | |
| 9 | + val page = worker !! URL("http://parrot.blog21.fc2.com/blog-entry-2141.html", "EUC-JP") | |
| 10 | + //println(page()) | |
| 11 | + val art = worker !! page() | |
| 12 | + println(art()) | |
| 13 | + } | |
| 14 | +} |
| @@ -1,9 +1,11 @@ | ||
| 1 | 1 | package com.parrotstudio.recommend.ro |
| 2 | 2 | |
| 3 | +import scala.io.Source | |
| 3 | 4 | import scala.actors.Actor |
| 4 | 5 | import com.parrotstudio.recommend.ro.model._ |
| 5 | 6 | |
| 6 | 7 | class RecommendWorker(val index: Int) extends Actor with RecommendMessage { |
| 8 | + val parser = """.*<div id="more">(.*?)</div>.*""".r | |
| 7 | 9 | |
| 8 | 10 | def act { |
| 9 | 11 | loop { |
| @@ -19,13 +21,16 @@ class RecommendWorker(val index: Int) extends Actor with RecommendMessage { | ||
| 19 | 21 | } |
| 20 | 22 | |
| 21 | 23 | def download(u: URL): Page = { |
| 22 | - | |
| 23 | - Page("hoge") | |
| 24 | + val html = Source.fromURL(u.url, u.encoding).getLines | |
| 25 | + // TODO ファイルに保存してページ名称を返す | |
| 26 | + Page(html.mkString) | |
| 24 | 27 | } |
| 25 | 28 | |
| 26 | 29 | def parse(p: Page): Article = { |
| 27 | - | |
| 28 | - Article("hoge") | |
| 30 | + p.page match { | |
| 31 | + case parser(art) => Article("本文", art) | |
| 32 | + case _ => Article("", "") | |
| 33 | + } | |
| 29 | 34 | } |
| 30 | 35 | |
| 31 | 36 | def analysis(a: Article): Terms = { |
| @@ -1,5 +1,5 @@ | ||
| 1 | 1 | package com.parrotstudio.recommend.ro.model |
| 2 | 2 | |
| 3 | -case class Article(val body: String) { | |
| 3 | +case class Article(val body: String, val extend: String) { | |
| 4 | 4 | |
| 5 | 5 | } |
| @@ -1,5 +1,5 @@ | ||
| 1 | 1 | package com.parrotstudio.recommend.ro.model |
| 2 | 2 | |
| 3 | -case class Page(val pageName: String) { | |
| 3 | +case class Page(val page: String) { | |
| 4 | 4 | |
| 5 | 5 | } |
| @@ -1,5 +1,5 @@ | ||
| 1 | 1 | package com.parrotstudio.recommend.ro.model |
| 2 | 2 | |
| 3 | -case class URL(val url:String) { | |
| 3 | +case class URL(val url:String, val encoding:String) { | |
| 4 | 4 | |
| 5 | 5 | } |
| \ No newline at end of file |