作業部屋の使い方を試しています。
xmlファイルコンバート
| @@ -1,314 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package webScraping.utility; | |
| 24 | - | |
| 25 | -import webScraping.core.SearchData; | |
| 26 | -import java.io.File; | |
| 27 | -import java.io.FileNotFoundException; | |
| 28 | -import java.io.FileOutputStream; | |
| 29 | -import java.io.IOException; | |
| 30 | -import java.util.logging.Level; | |
| 31 | -import java.util.logging.Logger; | |
| 32 | -import javax.xml.parsers.DocumentBuilder; | |
| 33 | -import javax.xml.parsers.DocumentBuilderFactory; | |
| 34 | -import javax.xml.parsers.ParserConfigurationException; | |
| 35 | -import javax.xml.transform.Transformer; | |
| 36 | -import javax.xml.transform.TransformerConfigurationException; | |
| 37 | -import javax.xml.transform.TransformerException; | |
| 38 | -import javax.xml.transform.TransformerFactory; | |
| 39 | -import javax.xml.transform.dom.DOMSource; | |
| 40 | -import javax.xml.transform.stream.StreamResult; | |
| 41 | -import org.w3c.dom.DOMImplementation; | |
| 42 | -import org.w3c.dom.Document; | |
| 43 | -import org.w3c.dom.Element; | |
| 44 | -import org.w3c.dom.Node; | |
| 45 | -import org.w3c.dom.NodeList; | |
| 46 | -import org.xml.sax.SAXException; | |
| 47 | - | |
| 48 | -/** | |
| 49 | - * 検索データ読込・保存. | |
| 50 | - * @author kgto | |
| 51 | - */ | |
| 52 | -public class SearchDataRW { | |
| 53 | - /* ---------------------------------------------------------------------- * | |
| 54 | - * フィールド | |
| 55 | - * ---------------------------------------------------------------------- */ | |
| 56 | - private String UrlAdress; | |
| 57 | - | |
| 58 | - DocumentBuilder builder; | |
| 59 | - public Document document; | |
| 60 | - Element root; | |
| 61 | - | |
| 62 | - /* ---------------------------------------------------------------------- * | |
| 63 | - * コンストラクタ | |
| 64 | - * ---------------------------------------------------------------------- */ | |
| 65 | - public SearchDataRW() { | |
| 66 | - try { | |
| 67 | - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 68 | - builder = factory.newDocumentBuilder(); | |
| 69 | - | |
| 70 | - } catch (ParserConfigurationException ex) { | |
| 71 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 72 | - } | |
| 73 | - } | |
| 74 | - | |
| 75 | - /* ---------------------------------------------------------------------- * | |
| 76 | - * Setter | |
| 77 | - * ---------------------------------------------------------------------- */ | |
| 78 | - public void seturl(String UrlAdress) { | |
| 79 | - this.UrlAdress = UrlAdress; | |
| 80 | - } | |
| 81 | - | |
| 82 | - /* ---------------------------------------------------------------------- * | |
| 83 | - * Getter | |
| 84 | - * ---------------------------------------------------------------------- */ | |
| 85 | - public String geturl() { | |
| 86 | - return UrlAdress; | |
| 87 | - } | |
| 88 | - | |
| 89 | - /* ---------------------------------------------------------------------- * | |
| 90 | - * メソッド | |
| 91 | - * ---------------------------------------------------------------------- */ | |
| 92 | - /** | |
| 93 | - * 保存. | |
| 94 | - * @param file | |
| 95 | - */ | |
| 96 | - public void save(File file) { | |
| 97 | - saveUrl(UrlAdress); | |
| 98 | - saveSearchList(); | |
| 99 | - write(file); | |
| 100 | - } | |
| 101 | - | |
| 102 | - /** | |
| 103 | - * 読込. | |
| 104 | - * @param file | |
| 105 | - */ | |
| 106 | - public void load(File file) { | |
| 107 | - read(file); | |
| 108 | - loadUrl(); | |
| 109 | - loadSearchList(); | |
| 110 | - } | |
| 111 | - | |
| 112 | - /* ---------------------------------------------------------------------- */ | |
| 113 | - | |
| 114 | - void loadUrl() { | |
| 115 | - NodeList nodelist = root.getElementsByTagName("url"); | |
| 116 | - Node node = nodelist.item(0); | |
| 117 | - UrlAdress = node.getFirstChild().getNodeValue(); | |
| 118 | - } | |
| 119 | - | |
| 120 | - public void loadSearchList() { | |
| 121 | - SearchData.clear(); | |
| 122 | - | |
| 123 | - NodeList nodelist = root.getElementsByTagName("searchlist"); | |
| 124 | - for(int i = 0; i < nodelist.getLength(); i++) { | |
| 125 | - Node childnode = nodelist.item(i); | |
| 126 | - | |
| 127 | - boolean sdatflg = false; | |
| 128 | - SearchData sdat = new SearchData(); | |
| 129 | - for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) { | |
| 130 | - if(child.getNodeType() == Node.ELEMENT_NODE) { | |
| 131 | - String tag = child.getNodeName(); | |
| 132 | - String rtn = ""; | |
| 133 | - if(child.getFirstChild() != null) { | |
| 134 | - rtn = child.getFirstChild().getNodeValue(); | |
| 135 | - } | |
| 136 | - switch (tag) { | |
| 137 | - case "item" : | |
| 138 | - sdat.setitem(rtn); | |
| 139 | - sdatflg = true; | |
| 140 | - break; | |
| 141 | - case "htmltag" : | |
| 142 | - sdat.setHtmltag(rtn); | |
| 143 | - sdatflg = true; | |
| 144 | - break; | |
| 145 | - case "htmlid" : | |
| 146 | - sdat.setHtmlid(rtn); | |
| 147 | - sdatflg = true; | |
| 148 | - break; | |
| 149 | - case "htmlclass" : | |
| 150 | - sdat.setHtmlclass(rtn); | |
| 151 | - sdatflg = true; | |
| 152 | - break; | |
| 153 | - case "around" : | |
| 154 | - sdat.setaround(rtn); | |
| 155 | - sdatflg = true; | |
| 156 | - break; | |
| 157 | - case "regexp" : | |
| 158 | - sdat.setregexp(rtn); | |
| 159 | - sdatflg = true; | |
| 160 | - break; | |
| 161 | - } | |
| 162 | - } | |
| 163 | - } | |
| 164 | - if(sdatflg) SearchData.add(sdat); | |
| 165 | - } | |
| 166 | - } | |
| 167 | - | |
| 168 | - public String loadMsg404() { | |
| 169 | - StringBuilder strbuf = new StringBuilder(); | |
| 170 | - NodeList nodelist = root.getElementsByTagName("msg404"); | |
| 171 | - for(int i = 0; i < nodelist.getLength(); i++) { | |
| 172 | - Node childnode = nodelist.item(i); | |
| 173 | - String str = childnode.getFirstChild().getNodeValue(); | |
| 174 | - if(strbuf.length() > 0) { | |
| 175 | - strbuf.append("\n"); | |
| 176 | - } | |
| 177 | - strbuf.append(str); | |
| 178 | - } | |
| 179 | - return strbuf.toString(); | |
| 180 | - } | |
| 181 | - | |
| 182 | - public Element loadElement(String elementTagName) { | |
| 183 | - NodeList nodelist = root.getElementsByTagName(elementTagName); | |
| 184 | - Element element = (Element)nodelist.item(0); | |
| 185 | - | |
| 186 | - return element; | |
| 187 | - } | |
| 188 | - | |
| 189 | - /* ---------------------------------------------------------------------- */ | |
| 190 | - | |
| 191 | - void saveUrl(String urladdress) { | |
| 192 | - checkdoc(); | |
| 193 | - removeElement("url"); // 既にElementが存在してた場合、一度削除 | |
| 194 | - | |
| 195 | - Element url = document.createElement("url"); | |
| 196 | - url.appendChild(document.createTextNode(urladdress)); | |
| 197 | - root.appendChild(url); | |
| 198 | - } | |
| 199 | - | |
| 200 | - void saveSearchList() { | |
| 201 | - checkdoc(); | |
| 202 | - removeElement("searchlist"); // 既にElementが存在してた場合、一度削除 | |
| 203 | - | |
| 204 | - int count = 0; | |
| 205 | - for(int i = 0; i < SearchData.size(); i++) { | |
| 206 | - SearchData sdat = SearchData.get(i); | |
| 207 | - | |
| 208 | - Element cslist = document.createElement("searchlist"); | |
| 209 | - cslist.setAttribute("listNo", String.valueOf(++count)); | |
| 210 | - | |
| 211 | - addChild(cslist, "item", sdat.getitem()); | |
| 212 | - addChild(cslist, "htmltag", sdat.getHtmltag()); | |
| 213 | - addChild(cslist, "htmlid", sdat.getHtmlid()); | |
| 214 | - addChild(cslist, "htmlclass", sdat.getHtmlclass()); | |
| 215 | - addChild(cslist, "around", sdat.getaround()); | |
| 216 | - addChild(cslist, "regexp", sdat.getregexp()); | |
| 217 | - | |
| 218 | - root.appendChild(cslist); | |
| 219 | - } | |
| 220 | - } | |
| 221 | - | |
| 222 | - void saveMsg404(String msg) { | |
| 223 | - checkdoc(); | |
| 224 | - removeElement("msg404"); // 既にElementが存在してた場合、一度削除 | |
| 225 | - | |
| 226 | - String[] msgs = msg.split("\n"); | |
| 227 | - int count = 0; | |
| 228 | - for(String msgOne : msgs) { | |
| 229 | - Element msgElement = document.createElement("msg404"); | |
| 230 | - msgElement.setAttribute("No", String.valueOf(++count)); | |
| 231 | - msgElement.appendChild(document.createTextNode(msgOne)); | |
| 232 | - | |
| 233 | - root.appendChild(msgElement); | |
| 234 | - } | |
| 235 | - } | |
| 236 | - | |
| 237 | - public void saveElement(Element element) { | |
| 238 | - checkdoc(); | |
| 239 | - removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除 | |
| 240 | - | |
| 241 | - root.appendChild(element); | |
| 242 | - } | |
| 243 | - | |
| 244 | - /* ---------------------------------------------------------------------- */ | |
| 245 | - | |
| 246 | - private void addChild(Element cslist, String keyword, String data) { | |
| 247 | - if(!data.isEmpty()) { | |
| 248 | - Element element = document.createElement(keyword); | |
| 249 | - element.appendChild(document.createTextNode(data)); | |
| 250 | - cslist.appendChild(element); | |
| 251 | - } | |
| 252 | - } | |
| 253 | - | |
| 254 | - private void removeElement(String elementTagName) { | |
| 255 | - int nodeSize; | |
| 256 | - do { | |
| 257 | - NodeList nodelist = document.getElementsByTagName(elementTagName); | |
| 258 | - nodeSize = nodelist.getLength(); | |
| 259 | - for(int i = 0; i < nodelist.getLength(); i++) { | |
| 260 | - Node node = nodelist.item(i); | |
| 261 | - root.removeChild(node); | |
| 262 | - } | |
| 263 | - } while(nodeSize > 0); | |
| 264 | - } | |
| 265 | - | |
| 266 | - /** | |
| 267 | - * ドキュメントチェック. | |
| 268 | - * 新規の場合やXMLファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。 | |
| 269 | - * 既読の場合、ルートエレメントの取得を行う。 | |
| 270 | - */ | |
| 271 | - public void checkdoc() { | |
| 272 | - if(document == null) { | |
| 273 | - DOMImplementation domImpl = builder.getDOMImplementation(); | |
| 274 | - document = domImpl.createDocument("","searchdata",null); | |
| 275 | - } | |
| 276 | - root = document.getDocumentElement(); | |
| 277 | - } | |
| 278 | - | |
| 279 | - /** | |
| 280 | - * XML読込み. | |
| 281 | - * @param file | |
| 282 | - */ | |
| 283 | - public void read(File file) { | |
| 284 | - try { | |
| 285 | - document = builder.parse(file); | |
| 286 | - root = document.getDocumentElement(); | |
| 287 | - | |
| 288 | - } catch (SAXException | IOException ex) { | |
| 289 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 290 | - } | |
| 291 | - } | |
| 292 | - | |
| 293 | - /** | |
| 294 | - * XML書込み. | |
| 295 | - * @param file | |
| 296 | - */ | |
| 297 | - public void write(File file) { | |
| 298 | - try { | |
| 299 | - TransformerFactory transFactory = TransformerFactory.newInstance(); | |
| 300 | - Transformer transformer = transFactory.newTransformer(); | |
| 301 | - | |
| 302 | - DOMSource source = new DOMSource(document); | |
| 303 | - FileOutputStream os = new FileOutputStream(file); | |
| 304 | - StreamResult result = new StreamResult(os); | |
| 305 | - transformer.transform(source, result); | |
| 306 | - | |
| 307 | - } catch (TransformerConfigurationException ex) { | |
| 308 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 309 | - } catch (FileNotFoundException | TransformerException ex) { | |
| 310 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 311 | - } | |
| 312 | - } | |
| 313 | - | |
| 314 | -} |
| @@ -44,17 +44,17 @@ | ||
| 44 | 44 | /* ---------------------------------------------------------------------- * |
| 45 | 45 | * コンストラクタ |
| 46 | 46 | * ---------------------------------------------------------------------- */ |
| 47 | - ScrapingXml() { | |
| 47 | + public ScrapingXml() { | |
| 48 | 48 | } |
| 49 | 49 | |
| 50 | 50 | /* ---------------------------------------------------------------------- * |
| 51 | 51 | * Setter |
| 52 | 52 | * ---------------------------------------------------------------------- */ |
| 53 | - void setTestUrl(String testUrl) { | |
| 53 | + public void setTestUrl(String testUrl) { | |
| 54 | 54 | this.testUrl = testUrl; |
| 55 | 55 | } |
| 56 | 56 | |
| 57 | - void setSdata() { | |
| 57 | + public void setSdata() { | |
| 58 | 58 | this.sdata = new SearchData[SearchData.size()]; |
| 59 | 59 | for(int i = 0; i < SearchData.size(); i++) { |
| 60 | 60 | this.sdata[i] = SearchData.get(i); |
| @@ -64,11 +64,11 @@ | ||
| 64 | 64 | /* ---------------------------------------------------------------------- * |
| 65 | 65 | * Getter |
| 66 | 66 | * ---------------------------------------------------------------------- */ |
| 67 | - String getTestUrl() { | |
| 67 | + public String getTestUrl() { | |
| 68 | 68 | return testUrl; |
| 69 | 69 | } |
| 70 | 70 | |
| 71 | - void getSdata() { | |
| 71 | + public void getSdata() { | |
| 72 | 72 | SearchData.clear(); |
| 73 | 73 | for(SearchData sdata1 : sdata) { |
| 74 | 74 | SearchData.add(sdata1); |
| @@ -78,7 +78,7 @@ | ||
| 78 | 78 | /* ---------------------------------------------------------------------- * |
| 79 | 79 | * メソッド |
| 80 | 80 | * ---------------------------------------------------------------------- */ |
| 81 | - void save(File file) { | |
| 81 | + public void save(File file) { | |
| 82 | 82 | |
| 83 | 83 | elementset(); |
| 84 | 84 |
| @@ -1,108 +1,71 @@ | ||
| 1 | 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?> |
| 2 | -<searchdata> | |
| 3 | - <url>http://weather.yahoo.co.jp/weather/</url> | |
| 4 | - <searchlist> | |
| 5 | - <item>天気01</item> | |
| 6 | - <htmltag>li</htmltag> | |
| 7 | - <htmlid/> | |
| 8 | - <htmlclass>point pt1400</htmlclass> | |
| 9 | - <around/> | |
| 10 | - <regexp/> | |
| 11 | - </searchlist> | |
| 12 | - <searchlist> | |
| 13 | - <item>天気02</item> | |
| 14 | - <htmltag>li</htmltag> | |
| 15 | - <htmlid/> | |
| 16 | - <htmlclass>point pt1900</htmlclass> | |
| 17 | - <around/> | |
| 18 | - <regexp/> | |
| 19 | - </searchlist> | |
| 20 | - <searchlist> | |
| 21 | - <item>天気03</item> | |
| 22 | - <htmltag>li</htmltag> | |
| 23 | - <htmlid/> | |
| 24 | - <htmlclass>point pt3410</htmlclass> | |
| 25 | - <around/> | |
| 26 | - <regexp/> | |
| 27 | - </searchlist> | |
| 28 | - <searchlist> | |
| 29 | - <item>天気04</item> | |
| 30 | - <htmltag>li</htmltag> | |
| 31 | - <htmlid/> | |
| 32 | - <htmlclass>point pt4410</htmlclass> | |
| 33 | - <around/> | |
| 34 | - <regexp/> | |
| 35 | - </searchlist> | |
| 36 | - <searchlist> | |
| 37 | - <item>天気05</item> | |
| 38 | - <htmltag>li</htmltag> | |
| 39 | - <htmlid/> | |
| 40 | - <htmlclass>point pt5110</htmlclass> | |
| 41 | - <around/> | |
| 42 | - <regexp/> | |
| 43 | - </searchlist> | |
| 44 | - <searchlist> | |
| 45 | - <item>天気06</item> | |
| 46 | - <htmltag>li</htmltag> | |
| 47 | - <htmlid/> | |
| 48 | - <htmlclass>point pt5410</htmlclass> | |
| 49 | - <around/> | |
| 50 | - <regexp/> | |
| 51 | - </searchlist> | |
| 52 | - <searchlist> | |
| 53 | - <item>天気07</item> | |
| 54 | - <htmltag>li</htmltag> | |
| 55 | - <htmlid/> | |
| 56 | - <htmlclass>point pt5610</htmlclass> | |
| 57 | - <around/> | |
| 58 | - <regexp/> | |
| 59 | - </searchlist> | |
| 60 | - <searchlist> | |
| 61 | - <item>天気08</item> | |
| 62 | - <htmltag>li</htmltag> | |
| 63 | - <htmlid/> | |
| 64 | - <htmlclass>point pt6200</htmlclass> | |
| 65 | - <around/> | |
| 66 | - <regexp/> | |
| 67 | - </searchlist> | |
| 68 | - <searchlist> | |
| 69 | - <item>天気09</item> | |
| 70 | - <htmltag>li</htmltag> | |
| 71 | - <htmlid/> | |
| 72 | - <htmlclass>point pt6710</htmlclass> | |
| 73 | - <around/> | |
| 74 | - <regexp/> | |
| 75 | - </searchlist> | |
| 76 | - <searchlist> | |
| 77 | - <item>天気10</item> | |
| 78 | - <htmltag>li</htmltag> | |
| 79 | - <htmlid/> | |
| 80 | - <htmlclass>point pt7410</htmlclass> | |
| 81 | - <around/> | |
| 82 | - <regexp/> | |
| 83 | - </searchlist> | |
| 84 | - <searchlist> | |
| 85 | - <item>天気11</item> | |
| 86 | - <htmltag>li</htmltag> | |
| 87 | - <htmlid/> | |
| 88 | - <htmlclass>point pt8210</htmlclass> | |
| 89 | - <around/> | |
| 90 | - <regexp/> | |
| 91 | - </searchlist> | |
| 92 | - <searchlist> | |
| 93 | - <item>天気12</item> | |
| 94 | - <htmltag>li</htmltag> | |
| 95 | - <htmlid/> | |
| 96 | - <htmlclass>point pt8810</htmlclass> | |
| 97 | - <around/> | |
| 98 | - <regexp/> | |
| 99 | - </searchlist> | |
| 100 | - <searchlist> | |
| 101 | - <item>天気13</item> | |
| 102 | - <htmltag>li</htmltag> | |
| 103 | - <htmlid/> | |
| 104 | - <htmlclass>point pt9110</htmlclass> | |
| 105 | - <around/> | |
| 106 | - <regexp/> | |
| 107 | - </searchlist> | |
| 108 | -</searchdata> | |
| \ No newline at end of file | ||
| 2 | +<xmlcontainer> | |
| 3 | +<webscraping> | |
| 4 | +<url>http://weather.yahoo.co.jp/weather/</url> | |
| 5 | +<searchlist listNo="1"> | |
| 6 | +<item>天気01</item> | |
| 7 | +<htmltag>li</htmltag> | |
| 8 | +<htmlclass>point pt1400</htmlclass> | |
| 9 | +</searchlist> | |
| 10 | +<searchlist listNo="2"> | |
| 11 | +<item>天気02</item> | |
| 12 | +<htmltag>li</htmltag> | |
| 13 | +<htmlclass>point pt1900</htmlclass> | |
| 14 | +</searchlist> | |
| 15 | +<searchlist listNo="3"> | |
| 16 | +<item>天気03</item> | |
| 17 | +<htmltag>li</htmltag> | |
| 18 | +<htmlclass>point pt3410</htmlclass> | |
| 19 | +</searchlist> | |
| 20 | +<searchlist listNo="4"> | |
| 21 | +<item>天気04</item> | |
| 22 | +<htmltag>li</htmltag> | |
| 23 | +<htmlclass>point pt4410</htmlclass> | |
| 24 | +</searchlist> | |
| 25 | +<searchlist listNo="5"> | |
| 26 | +<item>天気05</item> | |
| 27 | +<htmltag>li</htmltag> | |
| 28 | +<htmlclass>point pt5110</htmlclass> | |
| 29 | +</searchlist> | |
| 30 | +<searchlist listNo="6"> | |
| 31 | +<item>天気06</item> | |
| 32 | +<htmltag>li</htmltag> | |
| 33 | +<htmlclass>point pt5410</htmlclass> | |
| 34 | +</searchlist> | |
| 35 | +<searchlist listNo="7"> | |
| 36 | +<item>天気07</item> | |
| 37 | +<htmltag>li</htmltag> | |
| 38 | +<htmlclass>point pt5610</htmlclass> | |
| 39 | +</searchlist> | |
| 40 | +<searchlist listNo="8"> | |
| 41 | +<item>天気08</item> | |
| 42 | +<htmltag>li</htmltag> | |
| 43 | +<htmlclass>point pt6200</htmlclass> | |
| 44 | +</searchlist> | |
| 45 | +<searchlist listNo="9"> | |
| 46 | +<item>天気09</item> | |
| 47 | +<htmltag>li</htmltag> | |
| 48 | +<htmlclass>point pt6710</htmlclass> | |
| 49 | +</searchlist> | |
| 50 | +<searchlist listNo="10"> | |
| 51 | +<item>天気10</item> | |
| 52 | +<htmltag>li</htmltag> | |
| 53 | +<htmlclass>point pt7410</htmlclass> | |
| 54 | +</searchlist> | |
| 55 | +<searchlist listNo="11"> | |
| 56 | +<item>天気11</item> | |
| 57 | +<htmltag>li</htmltag> | |
| 58 | +<htmlclass>point pt8210</htmlclass> | |
| 59 | +</searchlist> | |
| 60 | +<searchlist listNo="12"> | |
| 61 | +<item>天気12</item> | |
| 62 | +<htmltag>li</htmltag> | |
| 63 | +<htmlclass>point pt8810</htmlclass> | |
| 64 | +</searchlist> | |
| 65 | +<searchlist listNo="13"> | |
| 66 | +<item>天気13</item> | |
| 67 | +<htmltag>li</htmltag> | |
| 68 | +<htmlclass>point pt9110</htmlclass> | |
| 69 | +</searchlist> | |
| 70 | +</webscraping> | |
| 71 | +</xmlcontainer> |
| @@ -1 +1,217 @@ | ||
| 1 | -<?xml version="1.0" encoding="UTF-8" standalone="no"?><searchdata><url>http://stocks.finance.yahoo.co.jp/stocks/detail/?code=9984.T</url><searchlist><item>銘柄コード</item><htmltag>dl</htmltag><htmlid/><htmlclass>stocksInfo clearFix</htmlclass><around/><regexp>(^\d{4})</regexp></searchlist><searchlist><item>カテゴリ</item><htmltag>div</htmltag><htmlid/><htmlclass>stockMainTabParts stockMainTabPartsCurrent</htmlclass><around/><regexp/></searchlist><searchlist><item>業種</item><htmltag>dd</htmltag><htmlid/><htmlclass>category yjSb</htmlclass><around/><regexp/></searchlist><searchlist><item>取得時間</item><htmltag>dd</htmltag><htmlid/><htmlclass>yjSb real</htmlclass><around/><regexp>^(.*)\t</regexp></searchlist><searchlist><item>銘柄名</item><htmltag>th</htmltag><htmlid/><htmlclass>symbol</htmlclass><around/><regexp/></searchlist><searchlist><item>株価</item><htmltag>td</htmltag><htmlid/><htmlclass>stoksPrice</htmlclass><around/><regexp/></searchlist><searchlist><item>前日比</item><htmltag>td</htmltag><htmlid/><htmlclass>change</htmlclass><around/><regexp>\t(.*)(.*%)</regexp></searchlist><searchlist><item>前日比%</item><htmltag>td</htmltag><htmlid/><htmlclass>change</htmlclass><around/><regexp>\t.*((.*)%)</regexp></searchlist><searchlist><item>前日終値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>0</around><regexp>^([,0-9]+)\t</regexp></searchlist><searchlist><item>始値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>1</around><regexp>^([,0-9]+|-{3})\t</regexp></searchlist><searchlist><item>高値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>2</around><regexp>^([,0-9]+|-{3})\t</regexp></searchlist><searchlist><item>安値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>3</around><regexp>^([,0-9]+|-{3})\t</regexp></searchlist><searchlist><item>出来高</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>4</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>売買代金</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>5</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>値幅制限</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi clearfix</htmlclass><around>6</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>時価総額</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>0</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>発行済株式数</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>1</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>配当利回り</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>2</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>1株配当</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>3</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>PER</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>4</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>PBR</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>5</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>EPS</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>6</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>BPS</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>7</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>最低購入代金</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>8</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>単元株数</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>9</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>年初来高値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>10</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>年初来安値</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>11</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>信用買残</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>12</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>信用買残前週比</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>13</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>信用売残</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>14</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>信用売残前週比</item><htmltag>div</htmltag><htmlid/><htmlclass>lineFi yjMS clearfix</htmlclass><around>15</around><regexp>^(.*?)\t</regexp></searchlist><searchlist><item>貸借倍率</item><htmltag>div</htmltag><htmlid/><htmlclass>yjMS clearfix</htmlclass><around/><regexp>^(.*?)\t</regexp></searchlist></searchdata> | |
| \ No newline at end of file | ||
| 1 | +<?xml version="1.0" encoding="UTF-8" standalone="no"?> | |
| 2 | +<xmlcontainer> | |
| 3 | +<webscraping> | |
| 4 | +<url>http://stocks.finance.yahoo.co.jp/stocks/detail/?code=9984.T</url> | |
| 5 | +<searchlist listNo="1"> | |
| 6 | +<item>銘柄コード</item> | |
| 7 | +<htmltag>dl</htmltag> | |
| 8 | +<htmlclass>stocksInfo clearFix</htmlclass> | |
| 9 | +<regexp>(^\d{4})</regexp> | |
| 10 | +</searchlist> | |
| 11 | +<searchlist listNo="2"> | |
| 12 | +<item>カテゴリ</item> | |
| 13 | +<htmltag>div</htmltag> | |
| 14 | +<htmlclass>stockMainTabParts stockMainTabPartsCurrent</htmlclass> | |
| 15 | +</searchlist> | |
| 16 | +<searchlist listNo="3"> | |
| 17 | +<item>業種</item> | |
| 18 | +<htmltag>dd</htmltag> | |
| 19 | +<htmlclass>category yjSb</htmlclass> | |
| 20 | +</searchlist> | |
| 21 | +<searchlist listNo="4"> | |
| 22 | +<item>取得時間</item> | |
| 23 | +<htmltag>dd</htmltag> | |
| 24 | +<htmlclass>yjSb real</htmlclass> | |
| 25 | +<regexp>^(.*)\t</regexp> | |
| 26 | +</searchlist> | |
| 27 | +<searchlist listNo="5"> | |
| 28 | +<item>銘柄名</item> | |
| 29 | +<htmltag>th</htmltag> | |
| 30 | +<htmlclass>symbol</htmlclass> | |
| 31 | +</searchlist> | |
| 32 | +<searchlist listNo="6"> | |
| 33 | +<item>株価</item> | |
| 34 | +<htmltag>td</htmltag> | |
| 35 | +<htmlclass>stoksPrice</htmlclass> | |
| 36 | +</searchlist> | |
| 37 | +<searchlist listNo="7"> | |
| 38 | +<item>前日比</item> | |
| 39 | +<htmltag>td</htmltag> | |
| 40 | +<htmlclass>change</htmlclass> | |
| 41 | +<regexp>\t(.*)(.*%)</regexp> | |
| 42 | +</searchlist> | |
| 43 | +<searchlist listNo="8"> | |
| 44 | +<item>前日比%</item> | |
| 45 | +<htmltag>td</htmltag> | |
| 46 | +<htmlclass>change</htmlclass> | |
| 47 | +<regexp>\t.*((.*)%)</regexp> | |
| 48 | +</searchlist> | |
| 49 | +<searchlist listNo="9"> | |
| 50 | +<item>前日終値</item> | |
| 51 | +<htmltag>div</htmltag> | |
| 52 | +<htmlclass>lineFi clearfix</htmlclass> | |
| 53 | +<around>0</around> | |
| 54 | +<regexp>^([,0-9]+)\t</regexp> | |
| 55 | +</searchlist> | |
| 56 | +<searchlist listNo="10"> | |
| 57 | +<item>始値</item> | |
| 58 | +<htmltag>div</htmltag> | |
| 59 | +<htmlclass>lineFi clearfix</htmlclass> | |
| 60 | +<around>1</around> | |
| 61 | +<regexp>^([,0-9]+|-{3})\t</regexp> | |
| 62 | +</searchlist> | |
| 63 | +<searchlist listNo="11"> | |
| 64 | +<item>高値</item> | |
| 65 | +<htmltag>div</htmltag> | |
| 66 | +<htmlclass>lineFi clearfix</htmlclass> | |
| 67 | +<around>2</around> | |
| 68 | +<regexp>^([,0-9]+|-{3})\t</regexp> | |
| 69 | +</searchlist> | |
| 70 | +<searchlist listNo="12"> | |
| 71 | +<item>安値</item> | |
| 72 | +<htmltag>div</htmltag> | |
| 73 | +<htmlclass>lineFi clearfix</htmlclass> | |
| 74 | +<around>3</around> | |
| 75 | +<regexp>^([,0-9]+|-{3})\t</regexp> | |
| 76 | +</searchlist> | |
| 77 | +<searchlist listNo="13"> | |
| 78 | +<item>出来高</item> | |
| 79 | +<htmltag>div</htmltag> | |
| 80 | +<htmlclass>lineFi clearfix</htmlclass> | |
| 81 | +<around>4</around> | |
| 82 | +<regexp>^(.*?)\t</regexp> | |
| 83 | +</searchlist> | |
| 84 | +<searchlist listNo="14"> | |
| 85 | +<item>売買代金</item> | |
| 86 | +<htmltag>div</htmltag> | |
| 87 | +<htmlclass>lineFi clearfix</htmlclass> | |
| 88 | +<around>5</around> | |
| 89 | +<regexp>^(.*?)\t</regexp> | |
| 90 | +</searchlist> | |
| 91 | +<searchlist listNo="15"> | |
| 92 | +<item>値幅制限</item> | |
| 93 | +<htmltag>div</htmltag> | |
| 94 | +<htmlclass>lineFi clearfix</htmlclass> | |
| 95 | +<around>6</around> | |
| 96 | +<regexp>^(.*?)\t</regexp> | |
| 97 | +</searchlist> | |
| 98 | +<searchlist listNo="16"> | |
| 99 | +<item>時価総額</item> | |
| 100 | +<htmltag>div</htmltag> | |
| 101 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 102 | +<around>0</around> | |
| 103 | +<regexp>^(.*?)\t</regexp> | |
| 104 | +</searchlist> | |
| 105 | +<searchlist listNo="17"> | |
| 106 | +<item>発行済株式数</item> | |
| 107 | +<htmltag>div</htmltag> | |
| 108 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 109 | +<around>1</around> | |
| 110 | +<regexp>^(.*?)\t</regexp> | |
| 111 | +</searchlist> | |
| 112 | +<searchlist listNo="18"> | |
| 113 | +<item>配当利回り</item> | |
| 114 | +<htmltag>div</htmltag> | |
| 115 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 116 | +<around>2</around> | |
| 117 | +<regexp>^(.*?)\t</regexp> | |
| 118 | +</searchlist> | |
| 119 | +<searchlist listNo="19"> | |
| 120 | +<item>1株配当</item> | |
| 121 | +<htmltag>div</htmltag> | |
| 122 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 123 | +<around>3</around> | |
| 124 | +<regexp>^(.*?)\t</regexp> | |
| 125 | +</searchlist> | |
| 126 | +<searchlist listNo="20"> | |
| 127 | +<item>PER</item> | |
| 128 | +<htmltag>div</htmltag> | |
| 129 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 130 | +<around>4</around> | |
| 131 | +<regexp>^(.*?)\t</regexp> | |
| 132 | +</searchlist> | |
| 133 | +<searchlist listNo="21"> | |
| 134 | +<item>PBR</item> | |
| 135 | +<htmltag>div</htmltag> | |
| 136 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 137 | +<around>5</around> | |
| 138 | +<regexp>^(.*?)\t</regexp> | |
| 139 | +</searchlist> | |
| 140 | +<searchlist listNo="22"> | |
| 141 | +<item>EPS</item> | |
| 142 | +<htmltag>div</htmltag> | |
| 143 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 144 | +<around>6</around> | |
| 145 | +<regexp>^(.*?)\t</regexp> | |
| 146 | +</searchlist> | |
| 147 | +<searchlist listNo="23"> | |
| 148 | +<item>BPS</item> | |
| 149 | +<htmltag>div</htmltag> | |
| 150 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 151 | +<around>7</around> | |
| 152 | +<regexp>^(.*?)\t</regexp> | |
| 153 | +</searchlist> | |
| 154 | +<searchlist listNo="24"> | |
| 155 | +<item>最低購入代金</item> | |
| 156 | +<htmltag>div</htmltag> | |
| 157 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 158 | +<around>8</around> | |
| 159 | +<regexp>^(.*?)\t</regexp> | |
| 160 | +</searchlist> | |
| 161 | +<searchlist listNo="25"> | |
| 162 | +<item>単元株数</item> | |
| 163 | +<htmltag>div</htmltag> | |
| 164 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 165 | +<around>9</around> | |
| 166 | +<regexp>^(.*?)\t</regexp> | |
| 167 | +</searchlist> | |
| 168 | +<searchlist listNo="26"> | |
| 169 | +<item>年初来高値</item> | |
| 170 | +<htmltag>div</htmltag> | |
| 171 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 172 | +<around>10</around> | |
| 173 | +<regexp>^(.*?)\t</regexp> | |
| 174 | +</searchlist> | |
| 175 | +<searchlist listNo="27"> | |
| 176 | +<item>年初来安値</item> | |
| 177 | +<htmltag>div</htmltag> | |
| 178 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 179 | +<around>11</around> | |
| 180 | +<regexp>^(.*?)\t</regexp> | |
| 181 | +</searchlist> | |
| 182 | +<searchlist listNo="28"> | |
| 183 | +<item>信用買残</item> | |
| 184 | +<htmltag>div</htmltag> | |
| 185 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 186 | +<around>12</around> | |
| 187 | +<regexp>^(.*?)\t</regexp> | |
| 188 | +</searchlist> | |
| 189 | +<searchlist listNo="29"> | |
| 190 | +<item>信用買残前週比</item> | |
| 191 | +<htmltag>div</htmltag> | |
| 192 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 193 | +<around>13</around> | |
| 194 | +<regexp>^(.*?)\t</regexp> | |
| 195 | +</searchlist> | |
| 196 | +<searchlist listNo="30"> | |
| 197 | +<item>信用売残</item> | |
| 198 | +<htmltag>div</htmltag> | |
| 199 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 200 | +<around>14</around> | |
| 201 | +<regexp>^(.*?)\t</regexp> | |
| 202 | +</searchlist> | |
| 203 | +<searchlist listNo="31"> | |
| 204 | +<item>信用売残前週比</item> | |
| 205 | +<htmltag>div</htmltag> | |
| 206 | +<htmlclass>lineFi yjMS clearfix</htmlclass> | |
| 207 | +<around>15</around> | |
| 208 | +<regexp>^(.*?)\t</regexp> | |
| 209 | +</searchlist> | |
| 210 | +<searchlist listNo="32"> | |
| 211 | +<item>貸借倍率</item> | |
| 212 | +<htmltag>div</htmltag> | |
| 213 | +<htmlclass>yjMS clearfix</htmlclass> | |
| 214 | +<regexp>^(.*?)\t</regexp> | |
| 215 | +</searchlist> | |
| 216 | +</webscraping> | |
| 217 | +</xmlcontainer> |
| @@ -0,0 +1,314 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package utility.test1; | |
| 24 | + | |
| 25 | +import webScraping.core.SearchData; | |
| 26 | +import java.io.File; | |
| 27 | +import java.io.FileNotFoundException; | |
| 28 | +import java.io.FileOutputStream; | |
| 29 | +import java.io.IOException; | |
| 30 | +import java.util.logging.Level; | |
| 31 | +import java.util.logging.Logger; | |
| 32 | +import javax.xml.parsers.DocumentBuilder; | |
| 33 | +import javax.xml.parsers.DocumentBuilderFactory; | |
| 34 | +import javax.xml.parsers.ParserConfigurationException; | |
| 35 | +import javax.xml.transform.Transformer; | |
| 36 | +import javax.xml.transform.TransformerConfigurationException; | |
| 37 | +import javax.xml.transform.TransformerException; | |
| 38 | +import javax.xml.transform.TransformerFactory; | |
| 39 | +import javax.xml.transform.dom.DOMSource; | |
| 40 | +import javax.xml.transform.stream.StreamResult; | |
| 41 | +import org.w3c.dom.DOMImplementation; | |
| 42 | +import org.w3c.dom.Document; | |
| 43 | +import org.w3c.dom.Element; | |
| 44 | +import org.w3c.dom.Node; | |
| 45 | +import org.w3c.dom.NodeList; | |
| 46 | +import org.xml.sax.SAXException; | |
| 47 | + | |
| 48 | +/** | |
| 49 | + * 検索データ読込・保存. | |
| 50 | + * @author kgto | |
| 51 | + */ | |
| 52 | +public class SearchDataRW { | |
| 53 | + /* ---------------------------------------------------------------------- * | |
| 54 | + * フィールド | |
| 55 | + * ---------------------------------------------------------------------- */ | |
| 56 | + private String UrlAdress; | |
| 57 | + | |
| 58 | + DocumentBuilder builder; | |
| 59 | + public Document document; | |
| 60 | + Element root; | |
| 61 | + | |
| 62 | + /* ---------------------------------------------------------------------- * | |
| 63 | + * コンストラクタ | |
| 64 | + * ---------------------------------------------------------------------- */ | |
| 65 | + public SearchDataRW() { | |
| 66 | + try { | |
| 67 | + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 68 | + builder = factory.newDocumentBuilder(); | |
| 69 | + | |
| 70 | + } catch (ParserConfigurationException ex) { | |
| 71 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 72 | + } | |
| 73 | + } | |
| 74 | + | |
| 75 | + /* ---------------------------------------------------------------------- * | |
| 76 | + * Setter | |
| 77 | + * ---------------------------------------------------------------------- */ | |
| 78 | + public void seturl(String UrlAdress) { | |
| 79 | + this.UrlAdress = UrlAdress; | |
| 80 | + } | |
| 81 | + | |
| 82 | + /* ---------------------------------------------------------------------- * | |
| 83 | + * Getter | |
| 84 | + * ---------------------------------------------------------------------- */ | |
| 85 | + public String geturl() { | |
| 86 | + return UrlAdress; | |
| 87 | + } | |
| 88 | + | |
| 89 | + /* ---------------------------------------------------------------------- * | |
| 90 | + * メソッド | |
| 91 | + * ---------------------------------------------------------------------- */ | |
| 92 | + /** | |
| 93 | + * 保存. | |
| 94 | + * @param file | |
| 95 | + */ | |
| 96 | + public void save(File file) { | |
| 97 | + saveUrl(UrlAdress); | |
| 98 | + saveSearchList(); | |
| 99 | + write(file); | |
| 100 | + } | |
| 101 | + | |
| 102 | + /** | |
| 103 | + * 読込. | |
| 104 | + * @param file | |
| 105 | + */ | |
| 106 | + public void load(File file) { | |
| 107 | + read(file); | |
| 108 | + loadUrl(); | |
| 109 | + loadSearchList(); | |
| 110 | + } | |
| 111 | + | |
| 112 | + /* ---------------------------------------------------------------------- */ | |
| 113 | + | |
| 114 | + void loadUrl() { | |
| 115 | + NodeList nodelist = root.getElementsByTagName("url"); | |
| 116 | + Node node = nodelist.item(0); | |
| 117 | + UrlAdress = node.getFirstChild().getNodeValue(); | |
| 118 | + } | |
| 119 | + | |
| 120 | + public void loadSearchList() { | |
| 121 | + SearchData.clear(); | |
| 122 | + | |
| 123 | + NodeList nodelist = root.getElementsByTagName("searchlist"); | |
| 124 | + for(int i = 0; i < nodelist.getLength(); i++) { | |
| 125 | + Node childnode = nodelist.item(i); | |
| 126 | + | |
| 127 | + boolean sdatflg = false; | |
| 128 | + SearchData sdat = new SearchData(); | |
| 129 | + for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) { | |
| 130 | + if(child.getNodeType() == Node.ELEMENT_NODE) { | |
| 131 | + String tag = child.getNodeName(); | |
| 132 | + String rtn = ""; | |
| 133 | + if(child.getFirstChild() != null) { | |
| 134 | + rtn = child.getFirstChild().getNodeValue(); | |
| 135 | + } | |
| 136 | + switch (tag) { | |
| 137 | + case "item" : | |
| 138 | + sdat.setitem(rtn); | |
| 139 | + sdatflg = true; | |
| 140 | + break; | |
| 141 | + case "htmltag" : | |
| 142 | + sdat.setHtmltag(rtn); | |
| 143 | + sdatflg = true; | |
| 144 | + break; | |
| 145 | + case "htmlid" : | |
| 146 | + sdat.setHtmlid(rtn); | |
| 147 | + sdatflg = true; | |
| 148 | + break; | |
| 149 | + case "htmlclass" : | |
| 150 | + sdat.setHtmlclass(rtn); | |
| 151 | + sdatflg = true; | |
| 152 | + break; | |
| 153 | + case "around" : | |
| 154 | + sdat.setaround(rtn); | |
| 155 | + sdatflg = true; | |
| 156 | + break; | |
| 157 | + case "regexp" : | |
| 158 | + sdat.setregexp(rtn); | |
| 159 | + sdatflg = true; | |
| 160 | + break; | |
| 161 | + } | |
| 162 | + } | |
| 163 | + } | |
| 164 | + if(sdatflg) SearchData.add(sdat); | |
| 165 | + } | |
| 166 | + } | |
| 167 | + | |
| 168 | + public String loadMsg404() { | |
| 169 | + StringBuilder strbuf = new StringBuilder(); | |
| 170 | + NodeList nodelist = root.getElementsByTagName("msg404"); | |
| 171 | + for(int i = 0; i < nodelist.getLength(); i++) { | |
| 172 | + Node childnode = nodelist.item(i); | |
| 173 | + String str = childnode.getFirstChild().getNodeValue(); | |
| 174 | + if(strbuf.length() > 0) { | |
| 175 | + strbuf.append("\n"); | |
| 176 | + } | |
| 177 | + strbuf.append(str); | |
| 178 | + } | |
| 179 | + return strbuf.toString(); | |
| 180 | + } | |
| 181 | + | |
| 182 | + public Element loadElement(String elementTagName) { | |
| 183 | + NodeList nodelist = root.getElementsByTagName(elementTagName); | |
| 184 | + Element element = (Element)nodelist.item(0); | |
| 185 | + | |
| 186 | + return element; | |
| 187 | + } | |
| 188 | + | |
| 189 | + /* ---------------------------------------------------------------------- */ | |
| 190 | + | |
| 191 | + void saveUrl(String urladdress) { | |
| 192 | + checkdoc(); | |
| 193 | + removeElement("url"); // 既にElementが存在してた場合、一度削除 | |
| 194 | + | |
| 195 | + Element url = document.createElement("url"); | |
| 196 | + url.appendChild(document.createTextNode(urladdress)); | |
| 197 | + root.appendChild(url); | |
| 198 | + } | |
| 199 | + | |
| 200 | + void saveSearchList() { | |
| 201 | + checkdoc(); | |
| 202 | + removeElement("searchlist"); // 既にElementが存在してた場合、一度削除 | |
| 203 | + | |
| 204 | + int count = 0; | |
| 205 | + for(int i = 0; i < SearchData.size(); i++) { | |
| 206 | + SearchData sdat = SearchData.get(i); | |
| 207 | + | |
| 208 | + Element cslist = document.createElement("searchlist"); | |
| 209 | + cslist.setAttribute("listNo", String.valueOf(++count)); | |
| 210 | + | |
| 211 | + addChild(cslist, "item", sdat.getitem()); | |
| 212 | + addChild(cslist, "htmltag", sdat.getHtmltag()); | |
| 213 | + addChild(cslist, "htmlid", sdat.getHtmlid()); | |
| 214 | + addChild(cslist, "htmlclass", sdat.getHtmlclass()); | |
| 215 | + addChild(cslist, "around", sdat.getaround()); | |
| 216 | + addChild(cslist, "regexp", sdat.getregexp()); | |
| 217 | + | |
| 218 | + root.appendChild(cslist); | |
| 219 | + } | |
| 220 | + } | |
| 221 | + | |
| 222 | + void saveMsg404(String msg) { | |
| 223 | + checkdoc(); | |
| 224 | + removeElement("msg404"); // 既にElementが存在してた場合、一度削除 | |
| 225 | + | |
| 226 | + String[] msgs = msg.split("\n"); | |
| 227 | + int count = 0; | |
| 228 | + for(String msgOne : msgs) { | |
| 229 | + Element msgElement = document.createElement("msg404"); | |
| 230 | + msgElement.setAttribute("No", String.valueOf(++count)); | |
| 231 | + msgElement.appendChild(document.createTextNode(msgOne)); | |
| 232 | + | |
| 233 | + root.appendChild(msgElement); | |
| 234 | + } | |
| 235 | + } | |
| 236 | + | |
| 237 | + public void saveElement(Element element) { | |
| 238 | + checkdoc(); | |
| 239 | + removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除 | |
| 240 | + | |
| 241 | + root.appendChild(element); | |
| 242 | + } | |
| 243 | + | |
| 244 | + /* ---------------------------------------------------------------------- */ | |
| 245 | + | |
| 246 | + private void addChild(Element cslist, String keyword, String data) { | |
| 247 | + if(!data.isEmpty()) { | |
| 248 | + Element element = document.createElement(keyword); | |
| 249 | + element.appendChild(document.createTextNode(data)); | |
| 250 | + cslist.appendChild(element); | |
| 251 | + } | |
| 252 | + } | |
| 253 | + | |
| 254 | + private void removeElement(String elementTagName) { | |
| 255 | + int nodeSize; | |
| 256 | + do { | |
| 257 | + NodeList nodelist = document.getElementsByTagName(elementTagName); | |
| 258 | + nodeSize = nodelist.getLength(); | |
| 259 | + for(int i = 0; i < nodelist.getLength(); i++) { | |
| 260 | + Node node = nodelist.item(i); | |
| 261 | + root.removeChild(node); | |
| 262 | + } | |
| 263 | + } while(nodeSize > 0); | |
| 264 | + } | |
| 265 | + | |
| 266 | + /** | |
| 267 | + * ドキュメントチェック. | |
| 268 | + * 新規の場合やXMLファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。 | |
| 269 | + * 既読の場合、ルートエレメントの取得を行う。 | |
| 270 | + */ | |
| 271 | + public void checkdoc() { | |
| 272 | + if(document == null) { | |
| 273 | + DOMImplementation domImpl = builder.getDOMImplementation(); | |
| 274 | + document = domImpl.createDocument("","searchdata",null); | |
| 275 | + } | |
| 276 | + root = document.getDocumentElement(); | |
| 277 | + } | |
| 278 | + | |
| 279 | + /** | |
| 280 | + * XML読込み. | |
| 281 | + * @param file | |
| 282 | + */ | |
| 283 | + public void read(File file) { | |
| 284 | + try { | |
| 285 | + document = builder.parse(file); | |
| 286 | + root = document.getDocumentElement(); | |
| 287 | + | |
| 288 | + } catch (SAXException | IOException ex) { | |
| 289 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 290 | + } | |
| 291 | + } | |
| 292 | + | |
| 293 | + /** | |
| 294 | + * XML書込み. | |
| 295 | + * @param file | |
| 296 | + */ | |
| 297 | + public void write(File file) { | |
| 298 | + try { | |
| 299 | + TransformerFactory transFactory = TransformerFactory.newInstance(); | |
| 300 | + Transformer transformer = transFactory.newTransformer(); | |
| 301 | + | |
| 302 | + DOMSource source = new DOMSource(document); | |
| 303 | + FileOutputStream os = new FileOutputStream(file); | |
| 304 | + StreamResult result = new StreamResult(os); | |
| 305 | + transformer.transform(source, result); | |
| 306 | + | |
| 307 | + } catch (TransformerConfigurationException ex) { | |
| 308 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 309 | + } catch (FileNotFoundException | TransformerException ex) { | |
| 310 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 311 | + } | |
| 312 | + } | |
| 313 | + | |
| 314 | +} |
| @@ -1,7 +1,6 @@ | ||
| 1 | 1 | |
| 2 | 2 | package utility.test1; |
| 3 | 3 | |
| 4 | -import webScraping.utility.SearchDataRW; | |
| 5 | 4 | import java.io.File; |
| 6 | 5 | import java.lang.reflect.InvocationTargetException; |
| 7 | 6 | import java.lang.reflect.Method; |
| @@ -1,7 +1,6 @@ | ||
| 1 | 1 | |
| 2 | 2 | package utility.test1; |
| 3 | 3 | |
| 4 | -import webScraping.utility.SearchDataRW; | |
| 5 | 4 | import java.io.File; |
| 6 | 5 | import webScraping.core.SearchData; |
| 7 | 6 |
| @@ -0,0 +1,42 @@ | ||
| 1 | + | |
| 2 | +package utility.test1; | |
| 3 | + | |
| 4 | +import java.io.File; | |
| 5 | +import webScraping.utility.ScrapingXml; | |
| 6 | + | |
| 7 | +/** | |
| 8 | + * XMLコンバータ | |
| 9 | + * 旧:SearchDataRW.java → 新:ScrapingXml.java | |
| 10 | + * @author kgto | |
| 11 | + */ | |
| 12 | +public class ConvertXml01 { | |
| 13 | + | |
| 14 | + private String UrlAdress; | |
| 15 | + File file = new File("test1.xml"); | |
| 16 | + | |
| 17 | + /** | |
| 18 | + * @param args the command line arguments | |
| 19 | + */ | |
| 20 | + public static void main(String[] args) { | |
| 21 | + ConvertXml01 conv = new ConvertXml01(); | |
| 22 | + | |
| 23 | + conv.readold(); | |
| 24 | + conv.writenew(); | |
| 25 | + | |
| 26 | + System.exit(0); | |
| 27 | + } | |
| 28 | + | |
| 29 | + void readold() { | |
| 30 | + SearchDataRW sdatrw = new SearchDataRW(); | |
| 31 | + sdatrw.load(file); | |
| 32 | + UrlAdress = sdatrw.geturl(); | |
| 33 | + } | |
| 34 | + | |
| 35 | + void writenew() { | |
| 36 | + ScrapingXml xmlwriter = new ScrapingXml(); | |
| 37 | + xmlwriter.setTestUrl(UrlAdress); | |
| 38 | + xmlwriter.setSdata(); | |
| 39 | + xmlwriter.save(file); | |
| 40 | + } | |
| 41 | + | |
| 42 | +} |
| @@ -1,16 +1,71 @@ | ||
| 1 | -<?xml version="1.0" encoding="UTF-8" standalone="no"?><searchdata> | |
| 2 | - | |
| 3 | - | |
| 4 | - | |
| 5 | - | |
| 6 | - | |
| 7 | - | |
| 8 | - | |
| 9 | - | |
| 10 | - | |
| 11 | - | |
| 12 | - | |
| 13 | - | |
| 14 | - | |
| 15 | - | |
| 16 | -<url>http://weather.yahoo.co.jp/weather/</url><searchlist listNo="1"><item>天気01</item><htmltag>li</htmltag><htmlclass>point pt1400</htmlclass></searchlist><searchlist listNo="2"><item>天気02</item><htmltag>li</htmltag><htmlclass>point pt1900</htmlclass></searchlist><searchlist listNo="3"><item>天気03</item><htmltag>li</htmltag><htmlclass>point pt3410</htmlclass></searchlist><searchlist listNo="4"><item>天気04</item><htmltag>li</htmltag><htmlclass>point pt4410</htmlclass></searchlist><searchlist listNo="5"><item>天気05</item><htmltag>li</htmltag><htmlclass>point pt5110</htmlclass></searchlist><searchlist listNo="6"><item>天気06</item><htmltag>li</htmltag><htmlclass>point pt5410</htmlclass></searchlist><searchlist listNo="7"><item>天気07</item><htmltag>li</htmltag><htmlclass>point pt5610</htmlclass></searchlist><searchlist listNo="8"><item>天気08</item><htmltag>li</htmltag><htmlclass>point pt6200</htmlclass></searchlist><searchlist listNo="9"><item>天気09</item><htmltag>li</htmltag><htmlclass>point pt6710</htmlclass></searchlist><searchlist listNo="10"><item>天気10</item><htmltag>li</htmltag><htmlclass>point pt7410</htmlclass></searchlist><searchlist listNo="11"><item>天気11</item><htmltag>li</htmltag><htmlclass>point pt8210</htmlclass></searchlist><searchlist listNo="12"><item>天気12</item><htmltag>li</htmltag><htmlclass>point pt8810</htmlclass></searchlist><searchlist listNo="13"><item>天気13</item><htmltag>li</htmltag><htmlclass>point pt9110</htmlclass></searchlist></searchdata> | |
| \ No newline at end of file | ||
| 1 | +<?xml version="1.0" encoding="UTF-8" standalone="no"?> | |
| 2 | +<xmlcontainer> | |
| 3 | +<webscraping> | |
| 4 | +<url>http://weather.yahoo.co.jp/weather/</url> | |
| 5 | +<searchlist listNo="1"> | |
| 6 | +<item>天気01</item> | |
| 7 | +<htmltag>li</htmltag> | |
| 8 | +<htmlclass>point pt1400</htmlclass> | |
| 9 | +</searchlist> | |
| 10 | +<searchlist listNo="2"> | |
| 11 | +<item>天気02</item> | |
| 12 | +<htmltag>li</htmltag> | |
| 13 | +<htmlclass>point pt1900</htmlclass> | |
| 14 | +</searchlist> | |
| 15 | +<searchlist listNo="3"> | |
| 16 | +<item>天気03</item> | |
| 17 | +<htmltag>li</htmltag> | |
| 18 | +<htmlclass>point pt3410</htmlclass> | |
| 19 | +</searchlist> | |
| 20 | +<searchlist listNo="4"> | |
| 21 | +<item>天気04</item> | |
| 22 | +<htmltag>li</htmltag> | |
| 23 | +<htmlclass>point pt4410</htmlclass> | |
| 24 | +</searchlist> | |
| 25 | +<searchlist listNo="5"> | |
| 26 | +<item>天気05</item> | |
| 27 | +<htmltag>li</htmltag> | |
| 28 | +<htmlclass>point pt5110</htmlclass> | |
| 29 | +</searchlist> | |
| 30 | +<searchlist listNo="6"> | |
| 31 | +<item>天気06</item> | |
| 32 | +<htmltag>li</htmltag> | |
| 33 | +<htmlclass>point pt5410</htmlclass> | |
| 34 | +</searchlist> | |
| 35 | +<searchlist listNo="7"> | |
| 36 | +<item>天気07</item> | |
| 37 | +<htmltag>li</htmltag> | |
| 38 | +<htmlclass>point pt5610</htmlclass> | |
| 39 | +</searchlist> | |
| 40 | +<searchlist listNo="8"> | |
| 41 | +<item>天気08</item> | |
| 42 | +<htmltag>li</htmltag> | |
| 43 | +<htmlclass>point pt6200</htmlclass> | |
| 44 | +</searchlist> | |
| 45 | +<searchlist listNo="9"> | |
| 46 | +<item>天気09</item> | |
| 47 | +<htmltag>li</htmltag> | |
| 48 | +<htmlclass>point pt6710</htmlclass> | |
| 49 | +</searchlist> | |
| 50 | +<searchlist listNo="10"> | |
| 51 | +<item>天気10</item> | |
| 52 | +<htmltag>li</htmltag> | |
| 53 | +<htmlclass>point pt7410</htmlclass> | |
| 54 | +</searchlist> | |
| 55 | +<searchlist listNo="11"> | |
| 56 | +<item>天気11</item> | |
| 57 | +<htmltag>li</htmltag> | |
| 58 | +<htmlclass>point pt8210</htmlclass> | |
| 59 | +</searchlist> | |
| 60 | +<searchlist listNo="12"> | |
| 61 | +<item>天気12</item> | |
| 62 | +<htmltag>li</htmltag> | |
| 63 | +<htmlclass>point pt8810</htmlclass> | |
| 64 | +</searchlist> | |
| 65 | +<searchlist listNo="13"> | |
| 66 | +<item>天気13</item> | |
| 67 | +<htmltag>li</htmltag> | |
| 68 | +<htmlclass>point pt9110</htmlclass> | |
| 69 | +</searchlist> | |
| 70 | +</webscraping> | |
| 71 | +</xmlcontainer> |