作業部屋の使い方を試しています。
マージ branches/b3/WebScraping
| @@ -1,508 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package Form; | |
| 24 | - | |
| 25 | -import WebScraping.SearchData; | |
| 26 | -import java.io.BufferedReader; | |
| 27 | -import java.io.BufferedWriter; | |
| 28 | -import java.io.File; | |
| 29 | -import java.io.FileInputStream; | |
| 30 | -import java.io.FileNotFoundException; | |
| 31 | -import java.io.FileOutputStream; | |
| 32 | -import java.io.IOException; | |
| 33 | -import java.io.InputStreamReader; | |
| 34 | -import java.io.OutputStreamWriter; | |
| 35 | -import java.util.ArrayList; | |
| 36 | -import java.util.logging.Level; | |
| 37 | -import java.util.logging.Logger; | |
| 38 | -import javax.xml.parsers.DocumentBuilder; | |
| 39 | -import javax.xml.parsers.DocumentBuilderFactory; | |
| 40 | -import javax.xml.parsers.ParserConfigurationException; | |
| 41 | -import javax.xml.transform.Transformer; | |
| 42 | -import javax.xml.transform.TransformerConfigurationException; | |
| 43 | -import javax.xml.transform.TransformerException; | |
| 44 | -import javax.xml.transform.TransformerFactory; | |
| 45 | -import javax.xml.transform.dom.DOMSource; | |
| 46 | -import javax.xml.transform.stream.StreamResult; | |
| 47 | -import org.w3c.dom.DOMImplementation; | |
| 48 | -import org.w3c.dom.Document; | |
| 49 | -import org.w3c.dom.Element; | |
| 50 | -import org.w3c.dom.Node; | |
| 51 | -import org.w3c.dom.NodeList; | |
| 52 | -import org.xml.sax.SAXException; | |
| 53 | - | |
| 54 | -/** | |
| 55 | - * | |
| 56 | - * @author kgto | |
| 57 | - */ | |
| 58 | -public class SearchDataRW { | |
| 59 | - | |
| 60 | - DocumentBuilder builder; | |
| 61 | - public Document document; | |
| 62 | - Element root; | |
| 63 | - | |
| 64 | - private final String splitchar = "\t"; | |
| 65 | - | |
| 66 | - private String UrlAdress; | |
| 67 | - private ArrayList slist; | |
| 68 | - | |
| 69 | - public SearchDataRW() { | |
| 70 | - try { | |
| 71 | - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 72 | - builder = factory.newDocumentBuilder(); | |
| 73 | - | |
| 74 | - } catch (ParserConfigurationException ex) { | |
| 75 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 76 | - } | |
| 77 | - } | |
| 78 | - | |
| 79 | - public void seturl(String UrlAdress) { | |
| 80 | - this.UrlAdress = UrlAdress; | |
| 81 | - } | |
| 82 | - | |
| 83 | - public void setslist(ArrayList slist) { | |
| 84 | - this.slist = slist; | |
| 85 | - } | |
| 86 | - | |
| 87 | - public String geturl() { | |
| 88 | - return UrlAdress; | |
| 89 | - } | |
| 90 | - | |
| 91 | - public ArrayList getslist() { | |
| 92 | - return slist; | |
| 93 | - } | |
| 94 | - | |
| 95 | - /** | |
| 96 | - * 保存. | |
| 97 | - * @param file | |
| 98 | - */ | |
| 99 | - public void save(File file) { | |
| 100 | - //saveCsv(file); | |
| 101 | - //saveXml(file); | |
| 102 | - | |
| 103 | - saveUrl(UrlAdress); | |
| 104 | - saveSearchList(slist); | |
| 105 | - write(file); | |
| 106 | - } | |
| 107 | - | |
| 108 | - /** | |
| 109 | - * 読込. | |
| 110 | - * @param file | |
| 111 | - */ | |
| 112 | - public void load(File file) { | |
| 113 | - //loadCsv(file); | |
| 114 | - //loadXml(file); | |
| 115 | - | |
| 116 | - read(file); | |
| 117 | - UrlAdress = loadUrl(); | |
| 118 | - slist = loadSearchList(); | |
| 119 | - } | |
| 120 | - | |
| 121 | - /* ---------------------------------------------------------------------- */ | |
| 122 | - /** | |
| 123 | - * 保存(CSV形式). | |
| 124 | - * @param file | |
| 125 | - */ | |
| 126 | - public void saveCsv(File file) { | |
| 127 | - | |
| 128 | - try { | |
| 129 | - //空のファイルを作成 | |
| 130 | - file.createNewFile(); | |
| 131 | - | |
| 132 | - FileOutputStream fileoutputstream = new FileOutputStream(file); | |
| 133 | - OutputStreamWriter outputstreamwriter = new OutputStreamWriter(fileoutputstream, "UTF-8"); | |
| 134 | - BufferedWriter bufferedwriter = new BufferedWriter(outputstreamwriter); | |
| 135 | - | |
| 136 | - // URL | |
| 137 | - bufferedwriter.write(UrlAdress); | |
| 138 | - bufferedwriter.write("\n"); | |
| 139 | - // 検索情報 | |
| 140 | - for (Object slist1 : slist) { | |
| 141 | - SearchData sdat = (SearchData) slist1; | |
| 142 | - StringBuilder str = new StringBuilder(); | |
| 143 | - str.append(sdat.getitem()).append(splitchar); | |
| 144 | - str.append(sdat.getHtmltag()).append(splitchar); | |
| 145 | - str.append(sdat.getHtmlid()).append(splitchar); | |
| 146 | - str.append(sdat.getHtmlclass()).append(splitchar); | |
| 147 | - str.append(sdat.getaround()).append(splitchar); | |
| 148 | - str.append(sdat.getregexp()).append("\n"); | |
| 149 | - | |
| 150 | - bufferedwriter.write(str.toString()); | |
| 151 | - } | |
| 152 | - bufferedwriter.close(); | |
| 153 | - | |
| 154 | - } catch(IOException ex) { | |
| 155 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 156 | - } | |
| 157 | - } | |
| 158 | - | |
| 159 | - /** | |
| 160 | - * 読込(CSV形式). | |
| 161 | - * @param file | |
| 162 | - */ | |
| 163 | - public void loadCsv(File file) { | |
| 164 | - slist = new ArrayList(); | |
| 165 | - | |
| 166 | - try { | |
| 167 | - FileInputStream fileinputstream = new FileInputStream(file); | |
| 168 | - InputStreamReader inputstreamreader = new InputStreamReader(fileinputstream, "UTF-8"); | |
| 169 | - BufferedReader bufferedreader = new BufferedReader(inputstreamreader); | |
| 170 | - | |
| 171 | - String rec; | |
| 172 | - | |
| 173 | - // URL | |
| 174 | - UrlAdress = bufferedreader.readLine(); | |
| 175 | - // 検索情報 | |
| 176 | - while((rec = bufferedreader.readLine()) != null) { | |
| 177 | - String[] recary = rec.split(splitchar, -1); | |
| 178 | - SearchData sdat = new SearchData(); | |
| 179 | - sdat.setitem(recary[0]); | |
| 180 | - sdat.setHtmltag(recary[1]); | |
| 181 | - sdat.setHtmlid(recary[2]); | |
| 182 | - sdat.setHtmlclass(recary[3]); | |
| 183 | - sdat.setaround(recary[4]); | |
| 184 | - sdat.setregexp(recary[5]); | |
| 185 | - | |
| 186 | - slist.add(sdat); | |
| 187 | - } | |
| 188 | - bufferedreader.close(); | |
| 189 | - | |
| 190 | - } catch(IOException ex) { | |
| 191 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 192 | - } | |
| 193 | - | |
| 194 | - } | |
| 195 | - | |
| 196 | - /* ---------------------------------------------------------------------- */ | |
| 197 | - /** | |
| 198 | - * 保存(XML形式). | |
| 199 | - * @param file | |
| 200 | - */ | |
| 201 | - public void saveXml(File file) { | |
| 202 | - try { | |
| 203 | - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 204 | - DocumentBuilder builder = factory.newDocumentBuilder(); | |
| 205 | - DOMImplementation domImpl = builder.getDOMImplementation(); | |
| 206 | - | |
| 207 | - Document document = domImpl.createDocument("","searchdata",null); | |
| 208 | - Element root = document.getDocumentElement(); | |
| 209 | - | |
| 210 | - // URL | |
| 211 | - Element url = document.createElement("url"); | |
| 212 | - url.appendChild(document.createTextNode(UrlAdress)); | |
| 213 | - root.appendChild(url); | |
| 214 | - | |
| 215 | - // 検索情報 | |
| 216 | - for (Object slist1 : slist) { | |
| 217 | - SearchData sdat = (SearchData) slist1; | |
| 218 | - | |
| 219 | - Element cslist = document.createElement("searchlist"); | |
| 220 | - Element item = document.createElement("item"); | |
| 221 | - Element htmltag = document.createElement("htmltag"); | |
| 222 | - Element htmlid = document.createElement("htmlid"); | |
| 223 | - Element htmlclass = document.createElement("htmlclass"); | |
| 224 | - Element around = document.createElement("around"); | |
| 225 | - Element regexp = document.createElement("regexp"); | |
| 226 | - | |
| 227 | - item.appendChild(document.createTextNode(sdat.getitem())); | |
| 228 | - htmltag.appendChild(document.createTextNode(sdat.getHtmltag())); | |
| 229 | - htmlid.appendChild(document.createTextNode(sdat.getHtmlid())); | |
| 230 | - htmlclass.appendChild(document.createTextNode(sdat.getHtmlclass())); | |
| 231 | - around.appendChild(document.createTextNode(sdat.getaround())); | |
| 232 | - regexp.appendChild(document.createTextNode(sdat.getregexp())); | |
| 233 | - | |
| 234 | - cslist.appendChild(item); | |
| 235 | - cslist.appendChild(htmltag); | |
| 236 | - cslist.appendChild(htmlid); | |
| 237 | - cslist.appendChild(htmlclass); | |
| 238 | - cslist.appendChild(around); | |
| 239 | - cslist.appendChild(regexp); | |
| 240 | - | |
| 241 | - root.appendChild(cslist); | |
| 242 | - } | |
| 243 | - // 出力 | |
| 244 | - TransformerFactory transFactory = TransformerFactory.newInstance(); | |
| 245 | - Transformer transformer = transFactory.newTransformer(); | |
| 246 | - | |
| 247 | - DOMSource source = new DOMSource(document); | |
| 248 | - FileOutputStream os = new FileOutputStream(file); | |
| 249 | - StreamResult result = new StreamResult(os); | |
| 250 | - transformer.transform(source, result); | |
| 251 | - | |
| 252 | - } catch (ParserConfigurationException | FileNotFoundException ex) { | |
| 253 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 254 | - } catch (TransformerConfigurationException ex) { | |
| 255 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 256 | - } catch (TransformerException ex) { | |
| 257 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 258 | - } | |
| 259 | - } | |
| 260 | - | |
| 261 | - /** | |
| 262 | - * 読込(XML形式). | |
| 263 | - * @param file | |
| 264 | - */ | |
| 265 | - public void loadXml(File file) { | |
| 266 | - slist = new ArrayList(); | |
| 267 | - | |
| 268 | - try { | |
| 269 | - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 270 | - DocumentBuilder builder = factory.newDocumentBuilder(); | |
| 271 | - Document doc = builder.parse(file); | |
| 272 | - | |
| 273 | - // ルート要素の取得 | |
| 274 | - Element root = doc.getDocumentElement(); | |
| 275 | - | |
| 276 | - // URL | |
| 277 | - NodeList url = root.getElementsByTagName("url"); | |
| 278 | - Node urlnode = url.item(0); | |
| 279 | - UrlAdress = urlnode.getFirstChild().getNodeValue(); | |
| 280 | - | |
| 281 | - // 検索情報 | |
| 282 | - NodeList cslist = root.getElementsByTagName("searchlist"); | |
| 283 | - for(int i = 0; i < cslist.getLength(); i++) { | |
| 284 | - SearchData sdat = new SearchData(); | |
| 285 | - | |
| 286 | - Node slistnode = cslist.item(i); | |
| 287 | - Node child; | |
| 288 | - for (child = slistnode.getFirstChild(); child != null; child = child.getNextSibling()) { | |
| 289 | - if(child.getNodeType() == Node.ELEMENT_NODE) { | |
| 290 | - | |
| 291 | - String tag = child.getNodeName(); | |
| 292 | - String rtn = ""; | |
| 293 | - if(child.getFirstChild() != null) { | |
| 294 | - rtn = child.getFirstChild().getNodeValue(); | |
| 295 | - } | |
| 296 | - | |
| 297 | - switch (tag) { | |
| 298 | - case "item" : | |
| 299 | - sdat.setitem(rtn); | |
| 300 | - break; | |
| 301 | - case "htmltag" : | |
| 302 | - sdat.setHtmltag(rtn); | |
| 303 | - break; | |
| 304 | - case "htmlid" : | |
| 305 | - sdat.setHtmlid(rtn); | |
| 306 | - break; | |
| 307 | - case "htmlclass" : | |
| 308 | - sdat.setHtmlclass(rtn); | |
| 309 | - break; | |
| 310 | - case "around" : | |
| 311 | - sdat.setaround(rtn); | |
| 312 | - break; | |
| 313 | - case "regexp" : | |
| 314 | - sdat.setregexp(rtn); | |
| 315 | - break; | |
| 316 | - } | |
| 317 | - } | |
| 318 | - } | |
| 319 | - slist.add(sdat); | |
| 320 | - } | |
| 321 | - | |
| 322 | - } catch (ParserConfigurationException | SAXException | IOException ex) { | |
| 323 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 324 | - } | |
| 325 | - } | |
| 326 | - | |
| 327 | - /* ---------------------------------------------------------------------- */ | |
| 328 | - | |
| 329 | - public String loadUrl() { | |
| 330 | - String urladdress; | |
| 331 | - | |
| 332 | - NodeList nodelist = root.getElementsByTagName("url"); | |
| 333 | - Node node = nodelist.item(0); | |
| 334 | - urladdress = node.getFirstChild().getNodeValue(); | |
| 335 | - | |
| 336 | - return urladdress; | |
| 337 | - } | |
| 338 | - | |
| 339 | - public ArrayList<SearchData> loadSearchList() { | |
| 340 | - ArrayList<SearchData> slist = new ArrayList<>(); | |
| 341 | - | |
| 342 | - NodeList nodelist = root.getElementsByTagName("searchlist"); | |
| 343 | - for(int i = 0; i < nodelist.getLength(); i++) { | |
| 344 | - Node childnode = nodelist.item(i); | |
| 345 | - | |
| 346 | - boolean sdatflg = false; | |
| 347 | - SearchData sdat = new SearchData(); | |
| 348 | - | |
| 349 | - //NodeList childnodelist = childnode.getChildNodes(); | |
| 350 | - //for(int j = 0; j < childnodelist.getLength(); j++) { | |
| 351 | - // Node child = childnodelist.item(j); | |
| 352 | - | |
| 353 | - for (Node child = childnode.getFirstChild(); | |
| 354 | - child != null; child = child.getNextSibling()) { | |
| 355 | - | |
| 356 | - if(child.getNodeType() == Node.ELEMENT_NODE) { | |
| 357 | - | |
| 358 | - String tag = child.getNodeName(); | |
| 359 | - String rtn = ""; | |
| 360 | - if(child.getFirstChild() != null) { | |
| 361 | - rtn = child.getFirstChild().getNodeValue(); | |
| 362 | - } | |
| 363 | - | |
| 364 | - switch (tag) { | |
| 365 | - case "item" : | |
| 366 | - sdat.setitem(rtn); | |
| 367 | - sdatflg = true; | |
| 368 | - break; | |
| 369 | - case "htmltag" : | |
| 370 | - sdat.setHtmltag(rtn); | |
| 371 | - sdatflg = true; | |
| 372 | - break; | |
| 373 | - case "htmlid" : | |
| 374 | - sdat.setHtmlid(rtn); | |
| 375 | - sdatflg = true; | |
| 376 | - break; | |
| 377 | - case "htmlclass" : | |
| 378 | - sdat.setHtmlclass(rtn); | |
| 379 | - sdatflg = true; | |
| 380 | - break; | |
| 381 | - case "around" : | |
| 382 | - sdat.setaround(rtn); | |
| 383 | - sdatflg = true; | |
| 384 | - break; | |
| 385 | - case "regexp" : | |
| 386 | - sdat.setregexp(rtn); | |
| 387 | - sdatflg = true; | |
| 388 | - break; | |
| 389 | - } | |
| 390 | - } | |
| 391 | - } | |
| 392 | - if(sdatflg) slist.add(sdat); | |
| 393 | - } | |
| 394 | - return slist; | |
| 395 | - } | |
| 396 | - | |
| 397 | - public Element loadElement(String elementTagName) { | |
| 398 | - NodeList nodelist = root.getElementsByTagName(elementTagName); | |
| 399 | - Element element = (Element)nodelist.item(0); | |
| 400 | - | |
| 401 | - return element; | |
| 402 | - } | |
| 403 | - | |
| 404 | - public void saveUrl(String urladdress) { | |
| 405 | - checkdoc(); | |
| 406 | - removeElement("url"); // 既にElementが存在してた場合、一度削除 | |
| 407 | - | |
| 408 | - Element url = document.createElement("url"); | |
| 409 | - url.appendChild(document.createTextNode(urladdress)); | |
| 410 | - root.appendChild(url); | |
| 411 | - } | |
| 412 | - | |
| 413 | - public void saveSearchList(ArrayList slist) { | |
| 414 | - checkdoc(); | |
| 415 | - removeElement("searchlist"); // 既にElementが存在してた場合、一度削除 | |
| 416 | - | |
| 417 | - for (Object slist1 : slist) { | |
| 418 | - SearchData sdat = (SearchData) slist1; | |
| 419 | - | |
| 420 | - Element cslist = document.createElement("searchlist"); | |
| 421 | - | |
| 422 | - addChild(cslist, "item", sdat.getitem()); | |
| 423 | - addChild(cslist, "htmltag", sdat.getHtmltag()); | |
| 424 | - addChild(cslist, "htmlid", sdat.getHtmlid()); | |
| 425 | - addChild(cslist, "htmlclass", sdat.getHtmlclass()); | |
| 426 | - addChild(cslist, "around", sdat.getaround()); | |
| 427 | - addChild(cslist, "regexp", sdat.getregexp()); | |
| 428 | - | |
| 429 | - root.appendChild(cslist); | |
| 430 | - } | |
| 431 | - } | |
| 432 | - | |
| 433 | - public void saveElement(Element element) { | |
| 434 | - checkdoc(); | |
| 435 | - removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除 | |
| 436 | - | |
| 437 | - root.appendChild(element); | |
| 438 | - } | |
| 439 | - | |
| 440 | - private void addChild(Element cslist, String keyword, String data) { | |
| 441 | - if(!data.isEmpty()) { | |
| 442 | - Element element = document.createElement(keyword); | |
| 443 | - element.appendChild(document.createTextNode(data)); | |
| 444 | - cslist.appendChild(element); | |
| 445 | - } | |
| 446 | - } | |
| 447 | - | |
| 448 | - private void removeElement(String elementTagName) { | |
| 449 | - int nodeSize; | |
| 450 | - do { | |
| 451 | - NodeList nodelist = document.getElementsByTagName(elementTagName); | |
| 452 | - nodeSize = nodelist.getLength(); | |
| 453 | - for(int i = 0; i < nodelist.getLength(); i++) { | |
| 454 | - Node node = nodelist.item(i); | |
| 455 | - root.removeChild(node); | |
| 456 | - } | |
| 457 | - } while(nodeSize > 0); | |
| 458 | - } | |
| 459 | - | |
| 460 | - /** | |
| 461 | - * ドキュメントチェック. | |
| 462 | - * 新規の場合やXMLファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。 | |
| 463 | - * 既読の場合、ルートエレメントの取得を行う。 | |
| 464 | - */ | |
| 465 | - public void checkdoc() { | |
| 466 | - if(document == null) { | |
| 467 | - DOMImplementation domImpl = builder.getDOMImplementation(); | |
| 468 | - document = domImpl.createDocument("","searchdata",null); | |
| 469 | - } | |
| 470 | - root = document.getDocumentElement(); | |
| 471 | - } | |
| 472 | - | |
| 473 | - /** | |
| 474 | - * XML読込み. | |
| 475 | - * @param file | |
| 476 | - */ | |
| 477 | - public void read(File file) { | |
| 478 | - try { | |
| 479 | - document = builder.parse(file); | |
| 480 | - root = document.getDocumentElement(); | |
| 481 | - | |
| 482 | - } catch (SAXException | IOException ex) { | |
| 483 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 484 | - } | |
| 485 | - } | |
| 486 | - | |
| 487 | - /** | |
| 488 | - * XML書込み. | |
| 489 | - * @param file | |
| 490 | - */ | |
| 491 | - public void write(File file) { | |
| 492 | - try { | |
| 493 | - TransformerFactory transFactory = TransformerFactory.newInstance(); | |
| 494 | - Transformer transformer = transFactory.newTransformer(); | |
| 495 | - | |
| 496 | - DOMSource source = new DOMSource(document); | |
| 497 | - FileOutputStream os = new FileOutputStream(file); | |
| 498 | - StreamResult result = new StreamResult(os); | |
| 499 | - transformer.transform(source, result); | |
| 500 | - | |
| 501 | - } catch (TransformerConfigurationException ex) { | |
| 502 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 503 | - } catch (FileNotFoundException | TransformerException ex) { | |
| 504 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 505 | - } | |
| 506 | - } | |
| 507 | - | |
| 508 | -} |
| @@ -1,454 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | -package Form; | |
| 23 | - | |
| 24 | -import WebScraping.HtmlParser; | |
| 25 | -import WebScraping.SearchData; | |
| 26 | -import java.awt.Desktop; | |
| 27 | -import java.io.File; | |
| 28 | -import java.io.IOException; | |
| 29 | -import java.net.URI; | |
| 30 | -import java.net.URISyntaxException; | |
| 31 | -import java.util.*; | |
| 32 | -import java.util.logging.Level; | |
| 33 | -import java.util.logging.Logger; | |
| 34 | -import javax.swing.JFileChooser; | |
| 35 | -import javax.swing.filechooser.FileFilter; | |
| 36 | -import javax.swing.filechooser.FileNameExtensionFilter; | |
| 37 | -import org.jdesktop.observablecollections.ObservableCollections; | |
| 38 | - | |
| 39 | -/** | |
| 40 | - * HTMLページ上の特定の項目を検索し、その項目内容の値を取得する. | |
| 41 | - * @author kgto | |
| 42 | - */ | |
| 43 | -public class HtmlSearch extends javax.swing.JFrame { | |
| 44 | - | |
| 45 | - private final SearchDataRW sio = new SearchDataRW(); | |
| 46 | - | |
| 47 | - private ArrayList slist = new ArrayList(); | |
| 48 | - private List serachDataList = ObservableCollections.observableList(slist); | |
| 49 | - | |
| 50 | - /** | |
| 51 | - * Creates new form Frame1 | |
| 52 | - */ | |
| 53 | - public HtmlSearch() { | |
| 54 | - initComponents(); | |
| 55 | - | |
| 56 | - // カレントディレクトリ取得 | |
| 57 | - String dir = System.getProperty("user.dir"); | |
| 58 | - File file = new java.io.File(dir + "\\data"); | |
| 59 | - jFileChooser1.setCurrentDirectory(file); | |
| 60 | - | |
| 61 | - FileFilter filter1 = new FileNameExtensionFilter("XMLファイル", "xml"); | |
| 62 | - FileFilter filter2 = new FileNameExtensionFilter("TEXTファイル", "txt"); | |
| 63 | - jFileChooser1.addChoosableFileFilter(filter1); | |
| 64 | - jFileChooser1.addChoosableFileFilter(filter2); | |
| 65 | - jFileChooser1.setFileFilter(filter1); | |
| 66 | - } | |
| 67 | - | |
| 68 | - public List getSerachDataList() { | |
| 69 | - return this.serachDataList; | |
| 70 | - } | |
| 71 | - | |
| 72 | - public void setSerachDataList(List serachDataList) { | |
| 73 | - this.serachDataList = serachDataList; | |
| 74 | - } | |
| 75 | - | |
| 76 | - /** | |
| 77 | - * This method is called from within the constructor to initialize the form. | |
| 78 | - * WARNING: Do NOT modify this code. The content of this method is always | |
| 79 | - * regenerated by the Form Editor. | |
| 80 | - */ | |
| 81 | - @SuppressWarnings("unchecked") | |
| 82 | - // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents | |
| 83 | - private void initComponents() { | |
| 84 | - bindingGroup = new org.jdesktop.beansbinding.BindingGroup(); | |
| 85 | - | |
| 86 | - jFileChooser1 = new javax.swing.JFileChooser(); | |
| 87 | - jLabel1 = new javax.swing.JLabel(); | |
| 88 | - jTxtUrl = new javax.swing.JTextField(); | |
| 89 | - jBtnSearch = new javax.swing.JButton(); | |
| 90 | - jPanel1 = new javax.swing.JPanel(); | |
| 91 | - jScrollPane1 = new javax.swing.JScrollPane(); | |
| 92 | - jTable1 = new javax.swing.JTable(); | |
| 93 | - jBtnRowIns = new javax.swing.JButton(); | |
| 94 | - jBtnRowDel = new javax.swing.JButton(); | |
| 95 | - jBtnRowCpy = new javax.swing.JButton(); | |
| 96 | - jPanel2 = new javax.swing.JPanel(); | |
| 97 | - jScrollPane2 = new javax.swing.JScrollPane(); | |
| 98 | - jTxtRtn = new javax.swing.JTextArea(); | |
| 99 | - jMenuBar1 = new javax.swing.JMenuBar(); | |
| 100 | - jMenu1 = new javax.swing.JMenu(); | |
| 101 | - jMenuLoad = new javax.swing.JMenuItem(); | |
| 102 | - jMenuSave = new javax.swing.JMenuItem(); | |
| 103 | - jMenu3 = new javax.swing.JMenu(); | |
| 104 | - jMenuItem1 = new javax.swing.JMenuItem(); | |
| 105 | - jMenu2 = new javax.swing.JMenu(); | |
| 106 | - | |
| 107 | - jFileChooser1.setCurrentDirectory(null); | |
| 108 | - jFileChooser1.setDialogTitle(""); | |
| 109 | - | |
| 110 | - setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); | |
| 111 | - setTitle("タグ検索"); | |
| 112 | - | |
| 113 | - jLabel1.setText(" URL:"); | |
| 114 | - | |
| 115 | - jBtnSearch.setText("検索"); | |
| 116 | - jBtnSearch.addActionListener(new java.awt.event.ActionListener() { | |
| 117 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 118 | - jBtnSearchActionPerformed(evt); | |
| 119 | - } | |
| 120 | - }); | |
| 121 | - | |
| 122 | - jPanel1.setBorder(javax.swing.BorderFactory.createTitledBorder("検索情報")); | |
| 123 | - | |
| 124 | - jTable1.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION); | |
| 125 | - jTable1.getTableHeader().setReorderingAllowed(false); | |
| 126 | - | |
| 127 | - org.jdesktop.beansbinding.ELProperty eLProperty = org.jdesktop.beansbinding.ELProperty.create("${serachDataList}"); | |
| 128 | - org.jdesktop.swingbinding.JTableBinding jTableBinding = org.jdesktop.swingbinding.SwingBindings.createJTableBinding(org.jdesktop.beansbinding.AutoBinding.UpdateStrategy.READ_WRITE, this, eLProperty, jTable1); | |
| 129 | - org.jdesktop.swingbinding.JTableBinding.ColumnBinding columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${item}")); | |
| 130 | - columnBinding.setColumnName("項目名"); | |
| 131 | - columnBinding.setColumnClass(String.class); | |
| 132 | - columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${htmltag}")); | |
| 133 | - columnBinding.setColumnName("タグ"); | |
| 134 | - columnBinding.setColumnClass(String.class); | |
| 135 | - columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${htmlid}")); | |
| 136 | - columnBinding.setColumnName("ID"); | |
| 137 | - columnBinding.setColumnClass(String.class); | |
| 138 | - columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${htmlclass}")); | |
| 139 | - columnBinding.setColumnName("クラス"); | |
| 140 | - columnBinding.setColumnClass(String.class); | |
| 141 | - columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${around}")); | |
| 142 | - columnBinding.setColumnName("位置"); | |
| 143 | - columnBinding.setColumnClass(String.class); | |
| 144 | - columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${regexp}")); | |
| 145 | - columnBinding.setColumnName("抽出条件"); | |
| 146 | - columnBinding.setColumnClass(String.class); | |
| 147 | - bindingGroup.addBinding(jTableBinding); | |
| 148 | - jTableBinding.bind(); | |
| 149 | - jScrollPane1.setViewportView(jTable1); | |
| 150 | - | |
| 151 | - jBtnRowIns.setText("行挿入"); | |
| 152 | - jBtnRowIns.addActionListener(new java.awt.event.ActionListener() { | |
| 153 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 154 | - jBtnRowInsActionPerformed(evt); | |
| 155 | - } | |
| 156 | - }); | |
| 157 | - | |
| 158 | - jBtnRowDel.setText("行削除"); | |
| 159 | - jBtnRowDel.addActionListener(new java.awt.event.ActionListener() { | |
| 160 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 161 | - jBtnRowDelActionPerformed(evt); | |
| 162 | - } | |
| 163 | - }); | |
| 164 | - | |
| 165 | - jBtnRowCpy.setText("行コピー"); | |
| 166 | - jBtnRowCpy.addActionListener(new java.awt.event.ActionListener() { | |
| 167 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 168 | - jBtnRowCpyActionPerformed(evt); | |
| 169 | - } | |
| 170 | - }); | |
| 171 | - | |
| 172 | - javax.swing.GroupLayout jPanel1Layout = new javax.swing.GroupLayout(jPanel1); | |
| 173 | - jPanel1.setLayout(jPanel1Layout); | |
| 174 | - jPanel1Layout.setHorizontalGroup( | |
| 175 | - jPanel1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 176 | - .addGroup(jPanel1Layout.createSequentialGroup() | |
| 177 | - .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
| 178 | - .addComponent(jBtnRowCpy) | |
| 179 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 180 | - .addComponent(jBtnRowDel) | |
| 181 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 182 | - .addComponent(jBtnRowIns)) | |
| 183 | - .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 0, Short.MAX_VALUE) | |
| 184 | - ); | |
| 185 | - jPanel1Layout.setVerticalGroup( | |
| 186 | - jPanel1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 187 | - .addGroup(jPanel1Layout.createSequentialGroup() | |
| 188 | - .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 140, javax.swing.GroupLayout.PREFERRED_SIZE) | |
| 189 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 190 | - .addGroup(jPanel1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) | |
| 191 | - .addComponent(jBtnRowDel) | |
| 192 | - .addComponent(jBtnRowIns) | |
| 193 | - .addComponent(jBtnRowCpy))) | |
| 194 | - ); | |
| 195 | - | |
| 196 | - jPanel2.setBorder(javax.swing.BorderFactory.createTitledBorder("検索結果")); | |
| 197 | - | |
| 198 | - jTxtRtn.setColumns(20); | |
| 199 | - jTxtRtn.setRows(5); | |
| 200 | - jScrollPane2.setViewportView(jTxtRtn); | |
| 201 | - | |
| 202 | - javax.swing.GroupLayout jPanel2Layout = new javax.swing.GroupLayout(jPanel2); | |
| 203 | - jPanel2.setLayout(jPanel2Layout); | |
| 204 | - jPanel2Layout.setHorizontalGroup( | |
| 205 | - jPanel2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 206 | - .addComponent(jScrollPane2, javax.swing.GroupLayout.DEFAULT_SIZE, 532, Short.MAX_VALUE) | |
| 207 | - ); | |
| 208 | - jPanel2Layout.setVerticalGroup( | |
| 209 | - jPanel2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 210 | - .addComponent(jScrollPane2, javax.swing.GroupLayout.DEFAULT_SIZE, 156, Short.MAX_VALUE) | |
| 211 | - ); | |
| 212 | - | |
| 213 | - jMenu1.setText("ファイル"); | |
| 214 | - | |
| 215 | - jMenuLoad.setText("LOAD"); | |
| 216 | - jMenuLoad.addActionListener(new java.awt.event.ActionListener() { | |
| 217 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 218 | - jMenuLoadActionPerformed(evt); | |
| 219 | - } | |
| 220 | - }); | |
| 221 | - jMenu1.add(jMenuLoad); | |
| 222 | - | |
| 223 | - jMenuSave.setText("SAVE"); | |
| 224 | - jMenuSave.addActionListener(new java.awt.event.ActionListener() { | |
| 225 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 226 | - jMenuSaveActionPerformed(evt); | |
| 227 | - } | |
| 228 | - }); | |
| 229 | - jMenu1.add(jMenuSave); | |
| 230 | - | |
| 231 | - jMenuBar1.add(jMenu1); | |
| 232 | - | |
| 233 | - jMenu3.setText("ツール"); | |
| 234 | - | |
| 235 | - jMenuItem1.setText("ブラウザで表示"); | |
| 236 | - jMenuItem1.addActionListener(new java.awt.event.ActionListener() { | |
| 237 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 238 | - jMenuItem1ActionPerformed(evt); | |
| 239 | - } | |
| 240 | - }); | |
| 241 | - jMenu3.add(jMenuItem1); | |
| 242 | - | |
| 243 | - jMenuBar1.add(jMenu3); | |
| 244 | - | |
| 245 | - jMenu2.setText("検索"); | |
| 246 | - jMenu2.addMouseListener(new java.awt.event.MouseAdapter() { | |
| 247 | - public void mouseClicked(java.awt.event.MouseEvent evt) { | |
| 248 | - jMenu2MouseClicked(evt); | |
| 249 | - } | |
| 250 | - }); | |
| 251 | - jMenuBar1.add(jMenu2); | |
| 252 | - | |
| 253 | - setJMenuBar(jMenuBar1); | |
| 254 | - | |
| 255 | - javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane()); | |
| 256 | - getContentPane().setLayout(layout); | |
| 257 | - layout.setHorizontalGroup( | |
| 258 | - layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 259 | - .addGroup(layout.createSequentialGroup() | |
| 260 | - .addComponent(jLabel1) | |
| 261 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 262 | - .addComponent(jTxtUrl) | |
| 263 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 264 | - .addComponent(jBtnSearch)) | |
| 265 | - .addComponent(jPanel2, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
| 266 | - .addComponent(jPanel1, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
| 267 | - ); | |
| 268 | - layout.setVerticalGroup( | |
| 269 | - layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 270 | - .addGroup(layout.createSequentialGroup() | |
| 271 | - .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) | |
| 272 | - .addComponent(jLabel1) | |
| 273 | - .addComponent(jTxtUrl, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE) | |
| 274 | - .addComponent(jBtnSearch)) | |
| 275 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 276 | - .addComponent(jPanel1, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE) | |
| 277 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 278 | - .addComponent(jPanel2, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)) | |
| 279 | - ); | |
| 280 | - | |
| 281 | - bindingGroup.bind(); | |
| 282 | - | |
| 283 | - pack(); | |
| 284 | - }// </editor-fold>//GEN-END:initComponents | |
| 285 | - | |
| 286 | - private void jBtnRowInsActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowInsActionPerformed | |
| 287 | - int SelectedRow = jTable1.getSelectedRow(); | |
| 288 | - SearchData sdat = new SearchData(); | |
| 289 | - | |
| 290 | - if(SelectedRow >= 0) { | |
| 291 | - this.serachDataList.add(SelectedRow, sdat); | |
| 292 | - } else { | |
| 293 | - this.serachDataList.add(sdat); | |
| 294 | - } | |
| 295 | - }//GEN-LAST:event_jBtnRowInsActionPerformed | |
| 296 | - | |
| 297 | - private void jBtnRowDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowDelActionPerformed | |
| 298 | - int SelectedRow = jTable1.getSelectedRow(); | |
| 299 | - if(!(SelectedRow < 0)) { | |
| 300 | - this.serachDataList.remove(SelectedRow); | |
| 301 | - } | |
| 302 | - }//GEN-LAST:event_jBtnRowDelActionPerformed | |
| 303 | - | |
| 304 | - private void jMenuLoadActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuLoadActionPerformed | |
| 305 | - jFileChooser1.setDialogTitle("読込"); | |
| 306 | - int selected = jFileChooser1.showOpenDialog(this); | |
| 307 | - if (selected == JFileChooser.APPROVE_OPTION) { | |
| 308 | - File file = jFileChooser1.getSelectedFile(); | |
| 309 | - serachDataList.clear(); | |
| 310 | - sio.load(file); | |
| 311 | - jTxtUrl.setText(sio.geturl()); | |
| 312 | - serachDataList.addAll(sio.getslist()); | |
| 313 | - } | |
| 314 | - }//GEN-LAST:event_jMenuLoadActionPerformed | |
| 315 | - | |
| 316 | - private void jMenuSaveActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuSaveActionPerformed | |
| 317 | - jFileChooser1.setDialogTitle("保存"); | |
| 318 | - int selected = jFileChooser1.showSaveDialog(this); | |
| 319 | - if (selected == JFileChooser.APPROVE_OPTION) { | |
| 320 | - File file = jFileChooser1.getSelectedFile(); | |
| 321 | - sio.seturl(jTxtUrl.getText()); | |
| 322 | - sio.setslist(slist); | |
| 323 | - sio.save(file); | |
| 324 | - } | |
| 325 | - }//GEN-LAST:event_jMenuSaveActionPerformed | |
| 326 | - | |
| 327 | - private void jMenu2MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jMenu2MouseClicked | |
| 328 | - jTxtRtn.setText(null); | |
| 329 | - HtmlParser par = new HtmlParser(jTxtUrl.getText()); | |
| 330 | - | |
| 331 | - String strdata = par.getStringPageData(); | |
| 332 | - String strsearch = "一致する銘柄は見つかりませんでした"; | |
| 333 | - if(check404(strdata, strsearch)) { | |
| 334 | - jTxtRtn.append(strsearch); | |
| 335 | - return; | |
| 336 | - } | |
| 337 | - | |
| 338 | - for (Object slist1 : slist) { | |
| 339 | - SearchData sdata = (SearchData)slist1; | |
| 340 | - String ans = sdata.getitem(); | |
| 341 | - String rtn = par.search(sdata); | |
| 342 | - jTxtRtn.append(ans + "\t" + rtn + "\r\n"); | |
| 343 | - } | |
| 344 | - jTxtRtn.setCaretPosition(0); | |
| 345 | - }//GEN-LAST:event_jMenu2MouseClicked | |
| 346 | - | |
| 347 | - private void jBtnRowCpyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowCpyActionPerformed | |
| 348 | - int SelectedRow = jTable1.getSelectedRow(); | |
| 349 | - if(SelectedRow >= 0) { | |
| 350 | - SearchData SelectData = (SearchData)slist.get(SelectedRow); | |
| 351 | - SearchData Cpydata = new SearchData(SelectData); | |
| 352 | - this.serachDataList.add(SelectedRow, Cpydata); | |
| 353 | - } | |
| 354 | - }//GEN-LAST:event_jBtnRowCpyActionPerformed | |
| 355 | - | |
| 356 | - private void jBtnSearchActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnSearchActionPerformed | |
| 357 | - jTxtRtn.setText(null); | |
| 358 | - HtmlParser par = new HtmlParser(jTxtUrl.getText()); | |
| 359 | - | |
| 360 | - String strdata = par.getStringPageData(); | |
| 361 | - String strsearch = "一致する銘柄は見つかりませんでした"; | |
| 362 | - if(check404(strdata, strsearch)) { | |
| 363 | - jTxtRtn.append(strsearch); | |
| 364 | - return; | |
| 365 | - } | |
| 366 | - | |
| 367 | - for (Object slist1 : slist) { | |
| 368 | - SearchData sdata = (SearchData)slist1; | |
| 369 | - String ans = sdata.getitem(); | |
| 370 | - String rtn = par.search(sdata); | |
| 371 | - jTxtRtn.append(ans + "\t" + rtn + "\r\n"); | |
| 372 | - } | |
| 373 | - jTxtRtn.setCaretPosition(0); | |
| 374 | - }//GEN-LAST:event_jBtnSearchActionPerformed | |
| 375 | - | |
| 376 | - private void jMenuItem1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuItem1ActionPerformed | |
| 377 | - Desktop desktop = Desktop.getDesktop(); | |
| 378 | - String uriString = jTxtUrl.getText(); | |
| 379 | - try { | |
| 380 | - URI uri = new URI(uriString); | |
| 381 | - desktop.browse(uri); | |
| 382 | - | |
| 383 | - } catch (URISyntaxException | IOException ex) { | |
| 384 | - Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex); | |
| 385 | - } | |
| 386 | - }//GEN-LAST:event_jMenuItem1ActionPerformed | |
| 387 | - | |
| 388 | - boolean check404(String strdata, String strsearch) { | |
| 389 | - if(strdata.contains(strsearch)) { | |
| 390 | - return true; | |
| 391 | - } | |
| 392 | - return false; | |
| 393 | - } | |
| 394 | - | |
| 395 | - /** | |
| 396 | - * @param args the command line arguments | |
| 397 | - */ | |
| 398 | - public static void main(String args[]) { | |
| 399 | - /* Set the Nimbus look and feel */ | |
| 400 | - //<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) "> | |
| 401 | - /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel. | |
| 402 | - * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html | |
| 403 | - */ | |
| 404 | - try { | |
| 405 | - for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) { | |
| 406 | - if ("Nimbus".equals(info.getName())) { | |
| 407 | - javax.swing.UIManager.setLookAndFeel(info.getClassName()); | |
| 408 | - break; | |
| 409 | - } | |
| 410 | - } | |
| 411 | - } catch (ClassNotFoundException ex) { | |
| 412 | - java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); | |
| 413 | - } catch (InstantiationException ex) { | |
| 414 | - java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); | |
| 415 | - } catch (IllegalAccessException ex) { | |
| 416 | - java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); | |
| 417 | - } catch (javax.swing.UnsupportedLookAndFeelException ex) { | |
| 418 | - java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); | |
| 419 | - } | |
| 420 | - //</editor-fold> | |
| 421 | - | |
| 422 | - /* Create and display the form */ | |
| 423 | - java.awt.EventQueue.invokeLater(new Runnable() { | |
| 424 | - @Override | |
| 425 | - public void run() { | |
| 426 | - new HtmlSearch().setVisible(true); | |
| 427 | - } | |
| 428 | - }); | |
| 429 | - } | |
| 430 | - | |
| 431 | - // Variables declaration - do not modify//GEN-BEGIN:variables | |
| 432 | - private javax.swing.JButton jBtnRowCpy; | |
| 433 | - private javax.swing.JButton jBtnRowDel; | |
| 434 | - private javax.swing.JButton jBtnRowIns; | |
| 435 | - private javax.swing.JButton jBtnSearch; | |
| 436 | - private javax.swing.JFileChooser jFileChooser1; | |
| 437 | - private javax.swing.JLabel jLabel1; | |
| 438 | - private javax.swing.JMenu jMenu1; | |
| 439 | - private javax.swing.JMenu jMenu2; | |
| 440 | - private javax.swing.JMenu jMenu3; | |
| 441 | - private javax.swing.JMenuBar jMenuBar1; | |
| 442 | - private javax.swing.JMenuItem jMenuItem1; | |
| 443 | - private javax.swing.JMenuItem jMenuLoad; | |
| 444 | - private javax.swing.JMenuItem jMenuSave; | |
| 445 | - private javax.swing.JPanel jPanel1; | |
| 446 | - private javax.swing.JPanel jPanel2; | |
| 447 | - private javax.swing.JScrollPane jScrollPane1; | |
| 448 | - private javax.swing.JScrollPane jScrollPane2; | |
| 449 | - private javax.swing.JTable jTable1; | |
| 450 | - private javax.swing.JTextArea jTxtRtn; | |
| 451 | - private javax.swing.JTextField jTxtUrl; | |
| 452 | - private org.jdesktop.beansbinding.BindingGroup bindingGroup; | |
| 453 | - // End of variables declaration//GEN-END:variables | |
| 454 | -} |
| @@ -1,163 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package WebScraping; | |
| 24 | - | |
| 25 | -import java.util.ArrayList; | |
| 26 | -import java.util.Enumeration; | |
| 27 | -import javax.swing.text.MutableAttributeSet; | |
| 28 | -import javax.swing.text.html.HTML; | |
| 29 | - | |
| 30 | -/** | |
| 31 | - * HTMLタグの属性情報を保持する. | |
| 32 | - * @author kgto | |
| 33 | - */ | |
| 34 | -public class AttributeData { | |
| 35 | - | |
| 36 | - public AttributeData() { | |
| 37 | - AttrList = new ArrayList(); | |
| 38 | - size = 0; | |
| 39 | - } | |
| 40 | - | |
| 41 | - /** | |
| 42 | - * 属性情報追加. | |
| 43 | - * @param tag | |
| 44 | - * @param attr | |
| 45 | - */ | |
| 46 | - public void add(HTML.Tag tag, MutableAttributeSet attr) { | |
| 47 | - | |
| 48 | - int tagcount = tagcnt(tag); | |
| 49 | - ++tagcount; | |
| 50 | - | |
| 51 | - Enumeration e = attr.getAttributeNames(); | |
| 52 | - while(e.hasMoreElements()) { | |
| 53 | - Object obj = e.nextElement(); | |
| 54 | - | |
| 55 | - AttrData a = new AttrData(); | |
| 56 | - a.tag = tag; | |
| 57 | - a.count = tagcount; | |
| 58 | - a.attrname = obj.toString(); | |
| 59 | - a.attrvalue = attr.getAttribute(obj).toString(); | |
| 60 | - | |
| 61 | - AttrList.add(a); | |
| 62 | - size = AttrList.size(); | |
| 63 | - } | |
| 64 | - | |
| 65 | - } | |
| 66 | - | |
| 67 | - /** | |
| 68 | - * 属性情報検索. | |
| 69 | - * @param tag | |
| 70 | - * @param attrname | |
| 71 | - * @param attrvalue | |
| 72 | - * @return | |
| 73 | - */ | |
| 74 | - public boolean search(HTML.Tag tag, String attrname, String attrvalue) { | |
| 75 | - boolean ret = false; | |
| 76 | - for (Object AttrList1 : AttrList) { | |
| 77 | - AttrData a = (AttrData)AttrList1; | |
| 78 | - if(a.tag == tag) { | |
| 79 | - if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) { | |
| 80 | - ret = true; | |
| 81 | - } | |
| 82 | - } | |
| 83 | - } | |
| 84 | - return ret; | |
| 85 | - } | |
| 86 | - | |
| 87 | - public boolean searchId(HTML.Tag tag, String attrvalue) { | |
| 88 | - return search(tag, "id", attrvalue); | |
| 89 | - } | |
| 90 | - | |
| 91 | - public boolean searchClass(HTML.Tag tag, String attrvalue) { | |
| 92 | - return search(tag, "class", attrvalue); | |
| 93 | - } | |
| 94 | - | |
| 95 | - /** | |
| 96 | - * 属性の値を取得する. | |
| 97 | - * @param tag | |
| 98 | - * @param attrname | |
| 99 | - * @return | |
| 100 | - */ | |
| 101 | - public ArrayList getvale(HTML.Tag tag, String attrname) { | |
| 102 | - ArrayList ret = new ArrayList(); | |
| 103 | - for (Object AttrList1 : AttrList) { | |
| 104 | - AttrData a = (AttrData)AttrList1; | |
| 105 | - if(a.tag == tag) { | |
| 106 | - if(a.attrname.equals(attrname)) { | |
| 107 | - ret.add(a.attrvalue); | |
| 108 | - } | |
| 109 | - } | |
| 110 | - } | |
| 111 | - return ret; | |
| 112 | - } | |
| 113 | - | |
| 114 | - /** | |
| 115 | - * 引数で渡されたTAGの最新カウント数を返す. | |
| 116 | - * @param tag | |
| 117 | - * @return | |
| 118 | - */ | |
| 119 | - private int tagcnt(HTML.Tag tag) { | |
| 120 | - int wkcnt = 0; | |
| 121 | - for (Object AttrList1 : AttrList) { | |
| 122 | - AttrData a = (AttrData)AttrList1; | |
| 123 | - if(a.tag == tag) { | |
| 124 | - if(wkcnt < a.count) { | |
| 125 | - wkcnt = a.count; | |
| 126 | - } | |
| 127 | - } | |
| 128 | - } | |
| 129 | - return wkcnt; | |
| 130 | - } | |
| 131 | - | |
| 132 | - // AttrList の内容を返すメソッド | |
| 133 | - public HTML.Tag gettag(int i) { | |
| 134 | - AttrData a = (AttrData)AttrList.get(i); | |
| 135 | - return a.tag; | |
| 136 | - } | |
| 137 | - | |
| 138 | - public int getcount(int i) { | |
| 139 | - AttrData a = (AttrData)AttrList.get(i); | |
| 140 | - return a.count; | |
| 141 | - } | |
| 142 | - | |
| 143 | - public String getattrname(int i) { | |
| 144 | - AttrData a = (AttrData)AttrList.get(i); | |
| 145 | - return a.attrname; | |
| 146 | - } | |
| 147 | - | |
| 148 | - public String getattrvalue(int i) { | |
| 149 | - AttrData a = (AttrData)AttrList.get(i); | |
| 150 | - return a.attrvalue; | |
| 151 | - } | |
| 152 | - | |
| 153 | - // フィールド変数 | |
| 154 | - public class AttrData { | |
| 155 | - public HTML.Tag tag; | |
| 156 | - public int count; | |
| 157 | - public String attrname; | |
| 158 | - public String attrvalue; | |
| 159 | - } | |
| 160 | - public ArrayList AttrList; | |
| 161 | - public int size; // AttrListのサイズ | |
| 162 | - | |
| 163 | -} |
| @@ -1,264 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package WebScraping; | |
| 24 | - | |
| 25 | -import java.io.File; | |
| 26 | -import java.io.FileInputStream; | |
| 27 | -import java.io.FileNotFoundException; | |
| 28 | -import java.io.IOException; | |
| 29 | -import java.util.logging.FileHandler; | |
| 30 | -import java.util.logging.Formatter; | |
| 31 | -import java.util.logging.Handler; | |
| 32 | -import java.util.logging.Level; | |
| 33 | -import java.util.logging.LogManager; | |
| 34 | -import java.util.logging.LogRecord; | |
| 35 | -import java.util.logging.Logger; | |
| 36 | -import javax.swing.text.MutableAttributeSet; | |
| 37 | -import javax.swing.text.html.HTML; | |
| 38 | - | |
| 39 | -/** | |
| 40 | - * デバック情報. | |
| 41 | - * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。 | |
| 42 | - * @author kgto | |
| 43 | - */ | |
| 44 | -public class DebugProcess { | |
| 45 | - // 設定ファイル名 | |
| 46 | - protected static final String configurationFilename = "Debug.prop"; | |
| 47 | - // ロガー名 | |
| 48 | - protected static final Logger logger = Logger.getLogger("WebScraping"); | |
| 49 | - // ログ出力デフォルトレベル | |
| 50 | - protected static final Level loggerlevel = Level.FINEST; | |
| 51 | - | |
| 52 | - | |
| 53 | - /** | |
| 54 | - * ログ出力設定. | |
| 55 | - * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、 | |
| 56 | - * ファイルハンドラの設定と出力書式の設定を行う。 | |
| 57 | - */ | |
| 58 | - public static void debuglog_set() { | |
| 59 | - try { | |
| 60 | - initLogConfiguration(); | |
| 61 | - | |
| 62 | - if(Level.ALL.equals(logger.getLevel())) { | |
| 63 | - //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2)); | |
| 64 | - logger.addHandler(new FileHandler("WebScraping%g.log", true)); | |
| 65 | - } | |
| 66 | - setFomatter(); | |
| 67 | - | |
| 68 | - } catch (IOException | SecurityException ex) { | |
| 69 | - Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 70 | - } | |
| 71 | - } | |
| 72 | - | |
| 73 | - /** | |
| 74 | - * ログ出力設定解除. | |
| 75 | - */ | |
| 76 | - public static void debuglog_unset() { | |
| 77 | - } | |
| 78 | - | |
| 79 | - | |
| 80 | - /** | |
| 81 | - * デバック出力(HTML解析-タグ&属性). | |
| 82 | - * HTMLのタグと属性の解析状態を出力する。 | |
| 83 | - * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br> | |
| 84 | - * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br> | |
| 85 | - * @param tag タグ | |
| 86 | - * @param attr 属性 | |
| 87 | - * @param methodname このメソッドを呼び出した親メソッド名 | |
| 88 | - * @param count HTMLタグの階層レベル | |
| 89 | - */ | |
| 90 | - public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr, | |
| 91 | - String methodname, int count) { | |
| 92 | - | |
| 93 | - // ログ出力レベルチェック | |
| 94 | - if(logger.getLevel() == null) { | |
| 95 | - return; | |
| 96 | - } | |
| 97 | - if(logger.getLevel().intValue() > loggerlevel.intValue()) { | |
| 98 | - return; | |
| 99 | - } | |
| 100 | - | |
| 101 | - // 編集処理 | |
| 102 | - char kbn = ' '; | |
| 103 | - if("handleStartTag".equals(methodname)) { | |
| 104 | - kbn = 'F'; | |
| 105 | - } | |
| 106 | - if("handleEndTag".equals(methodname)) { | |
| 107 | - kbn = 'E'; | |
| 108 | - } | |
| 109 | - if("handleSimpleTag".equals(methodname)) { | |
| 110 | - kbn = 'S'; | |
| 111 | - } | |
| 112 | - | |
| 113 | - StringBuilder strBuf = new StringBuilder(80); | |
| 114 | - strBuf.append(count).append(" : "); | |
| 115 | - strBuf.append(kbn).append(" : "); | |
| 116 | - strBuf.append(tag.toString()); | |
| 117 | - // 属性情報 | |
| 118 | - if(attr != null) { | |
| 119 | - if(attr.getAttributeCount() != 0) { | |
| 120 | - AttributeData handleAttrData = new AttributeData(); | |
| 121 | - handleAttrData.add(tag, attr); | |
| 122 | - for(int i = 0; i < handleAttrData.size; i++) { | |
| 123 | - strBuf.append(" ["); | |
| 124 | - strBuf.append(handleAttrData.getattrname(i)); | |
| 125 | - strBuf.append("]"); | |
| 126 | - strBuf.append(handleAttrData.getcount(i)); | |
| 127 | - strBuf.append(" = "); | |
| 128 | - strBuf.append(handleAttrData.getattrvalue(i)); | |
| 129 | - } | |
| 130 | - } | |
| 131 | - } | |
| 132 | - | |
| 133 | - logger.log(loggerlevel, strBuf.toString()); | |
| 134 | - } | |
| 135 | - | |
| 136 | - /** | |
| 137 | - * デバック出力(メッセージ). | |
| 138 | - * 引数に渡された任意のメッセージを出力する。 | |
| 139 | - * @param str メッセージ | |
| 140 | - * @param methodname このメソッドを呼び出した親メソッド名 | |
| 141 | - */ | |
| 142 | - public static void htmlinfo(String str, String methodname) { | |
| 143 | - logger.log(loggerlevel, str); | |
| 144 | - } | |
| 145 | - | |
| 146 | - public static void htmlinfo(String str) { | |
| 147 | - logger.log(loggerlevel, str); | |
| 148 | - } | |
| 149 | - | |
| 150 | - /** | |
| 151 | - * デバック出力(HTML解析-本文). | |
| 152 | - * 本文の内容を出力する。 | |
| 153 | - * @param data 本文(HTML内の文字列) | |
| 154 | - * @param methodname このメソッドを呼び出した親メソッド名 | |
| 155 | - */ | |
| 156 | - public static void htmlinfo(char[] data, String methodname) { | |
| 157 | - String dat = new String(data); | |
| 158 | - logger.log(loggerlevel, dat); | |
| 159 | - } | |
| 160 | - | |
| 161 | - public static void htmlinfo(char[] data) { | |
| 162 | - String dat = new String(data); | |
| 163 | - logger.log(loggerlevel, dat); | |
| 164 | - } | |
| 165 | - | |
| 166 | - /** | |
| 167 | - * デバック出力(検索キー). | |
| 168 | - * 検索キー(SearchData)の内容を出力する。 | |
| 169 | - * @param skey | |
| 170 | - */ | |
| 171 | - public static void searchDatainfo(SearchData skey) { | |
| 172 | - | |
| 173 | - StringBuilder strBuf = new StringBuilder(30); | |
| 174 | - strBuf.append("SearchData KEY tag["); | |
| 175 | - strBuf.append(skey.getHtmltag()); | |
| 176 | - strBuf.append("] ID["); | |
| 177 | - strBuf.append(skey.getHtmlid()); | |
| 178 | - strBuf.append("] CLASS["); | |
| 179 | - strBuf.append(skey.getHtmlclass()); | |
| 180 | - strBuf.append("]\n"); | |
| 181 | - | |
| 182 | - logger.log(loggerlevel, strBuf.toString()); | |
| 183 | - } | |
| 184 | - | |
| 185 | - /** | |
| 186 | - * ログ出力設定ファイルチェック. | |
| 187 | - * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。 | |
| 188 | - */ | |
| 189 | - private static void initLogConfiguration() { | |
| 190 | - | |
| 191 | - File file = new File(configurationFilename); | |
| 192 | - try { | |
| 193 | - if(file.exists()) { | |
| 194 | - FileInputStream inputStream = new FileInputStream(file); | |
| 195 | - // 設定ファイルの読み込み | |
| 196 | - LogManager.getLogManager().readConfiguration(inputStream); | |
| 197 | - } | |
| 198 | - | |
| 199 | - } catch (FileNotFoundException ex) { | |
| 200 | - Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 201 | - } catch (IOException ex) { | |
| 202 | - Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 203 | - } | |
| 204 | - } | |
| 205 | - | |
| 206 | - /** | |
| 207 | - * ログ出力フォーマッター設定. | |
| 208 | - * ファイルへログ出力時の書式を設定する。 | |
| 209 | - */ | |
| 210 | - private static void setFomatter() { | |
| 211 | - Handler[] handlers = logger.getHandlers(); | |
| 212 | - for(int i = 0 ; i < handlers.length ; i++) { | |
| 213 | - if(handlers[i] instanceof java.util.logging.FileHandler) { | |
| 214 | - handlers[i].setFormatter(new HtmlFormatter()); | |
| 215 | - } | |
| 216 | - } | |
| 217 | - } | |
| 218 | - | |
| 219 | -} | |
| 220 | - | |
| 221 | -/** | |
| 222 | - * ログ出力フォーマッター. | |
| 223 | - * @author kgto | |
| 224 | - */ | |
| 225 | -class HtmlFormatter extends Formatter { | |
| 226 | - /** | |
| 227 | - * Logの出力文字列を生成する。 | |
| 228 | - * 出力書式:<br> | |
| 229 | - * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ | |
| 230 | - */ | |
| 231 | - @Override | |
| 232 | - public synchronized String format(final LogRecord aRecord) { | |
| 233 | - | |
| 234 | - final StringBuffer message = new StringBuffer(100); | |
| 235 | - | |
| 236 | - long millis = aRecord.getMillis(); | |
| 237 | - String time = String.format("%tF %<tT", millis); | |
| 238 | - | |
| 239 | - message.append(time); | |
| 240 | - message.append(' '); | |
| 241 | - | |
| 242 | - message.append(aRecord.getLevel()); | |
| 243 | - message.append('<'); | |
| 244 | - String methodName = aRecord.getSourceMethodName(); | |
| 245 | - message.append(methodName != null ? methodName : "N/A"); | |
| 246 | - message.append('>'); | |
| 247 | - | |
| 248 | - message.append(formatMessage(aRecord)); | |
| 249 | - message.append('\n'); | |
| 250 | - | |
| 251 | - // 例外エラーの場合、エラー内容とスタックトレース出力 | |
| 252 | - Throwable throwable = aRecord.getThrown(); | |
| 253 | - if (throwable != null) { | |
| 254 | - message.append(throwable.toString()); | |
| 255 | - message.append('\n'); | |
| 256 | - for (StackTraceElement trace : throwable.getStackTrace()) { | |
| 257 | - message.append('\t'); | |
| 258 | - message.append(trace.toString()); | |
| 259 | - message.append('\n'); | |
| 260 | - } | |
| 261 | - } | |
| 262 | - return message.toString(); | |
| 263 | - } | |
| 264 | -} |
| @@ -1,255 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package WebScraping; | |
| 24 | - | |
| 25 | -import java.io.*; | |
| 26 | -import java.net.*; | |
| 27 | -import java.util.ArrayList; | |
| 28 | -import java.util.logging.Level; | |
| 29 | -import java.util.logging.Logger; | |
| 30 | -import java.util.regex.Matcher; | |
| 31 | -import java.util.regex.Pattern; | |
| 32 | -import javax.swing.text.html.parser.ParserDelegator; | |
| 33 | - | |
| 34 | -/** | |
| 35 | - * | |
| 36 | - * @author kgto | |
| 37 | - */ | |
| 38 | -public class HtmlParser { | |
| 39 | - | |
| 40 | - URL url; | |
| 41 | - String pageData; | |
| 42 | - ArrayList sData; | |
| 43 | - | |
| 44 | - // 作業ワーク | |
| 45 | - String htmltag; | |
| 46 | - String htmlid; | |
| 47 | - String htmlclass; | |
| 48 | - | |
| 49 | - public HtmlParser(URL UrlAdress) { | |
| 50 | - DebugProcess.debuglog_set(); | |
| 51 | - this.url = UrlAdress; | |
| 52 | - getPageData(); | |
| 53 | - } | |
| 54 | - | |
| 55 | - public HtmlParser(String UrlAdress) { | |
| 56 | - DebugProcess.debuglog_set(); | |
| 57 | - try { | |
| 58 | - url = new URL(UrlAdress); | |
| 59 | - getPageData(); | |
| 60 | - | |
| 61 | - } catch (MalformedURLException ex) { | |
| 62 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 63 | - } | |
| 64 | - } | |
| 65 | - | |
| 66 | - public HtmlParser() { | |
| 67 | - DebugProcess.debuglog_set(); | |
| 68 | - url = null; | |
| 69 | - } | |
| 70 | - | |
| 71 | - public String getStringPageData() { | |
| 72 | - return pageData; | |
| 73 | - } | |
| 74 | - | |
| 75 | - public void seturl(URL UrlAdress) { | |
| 76 | - this.url = UrlAdress; | |
| 77 | - getPageData(); | |
| 78 | - } | |
| 79 | - | |
| 80 | - public void seturl(String UrlAdress) { | |
| 81 | - try { | |
| 82 | - url = new URL(UrlAdress); | |
| 83 | - getPageData(); | |
| 84 | - | |
| 85 | - } catch (MalformedURLException ex) { | |
| 86 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 87 | - } | |
| 88 | - } | |
| 89 | - | |
| 90 | - /** | |
| 91 | - * HTMLページ内検索. | |
| 92 | - * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、 | |
| 93 | - * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を | |
| 94 | - * 行った結果を返す。<br> | |
| 95 | - * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br> | |
| 96 | - * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br> | |
| 97 | - * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。 | |
| 98 | - * @param skey 検索キーデータ(SearchData) | |
| 99 | - * @return String 検索キーに一致するデータの文字列 | |
| 100 | - */ | |
| 101 | - public String search(SearchData skey) { | |
| 102 | - | |
| 103 | - // htmlページ内を検索 | |
| 104 | - if(isHtmlkeyEq(skey) == false) { | |
| 105 | - searchPageData(skey); | |
| 106 | - } | |
| 107 | - /* | |
| 108 | - around 出現位置指定 入力有り:指定された位置の情報のみ返す。 | |
| 109 | - 入力無し:取得した全ての情報を返す。 | |
| 110 | - */ | |
| 111 | - String regexp = skey.getregexp(); | |
| 112 | - if(skey.getaround().length() > 0) { | |
| 113 | - int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換 | |
| 114 | - if(wkAround < sData.size()) { | |
| 115 | - String str = (String)sData.get(wkAround); | |
| 116 | - String rtn = RegularExpression(str, regexp); | |
| 117 | - return rtn; | |
| 118 | - } | |
| 119 | - } else { | |
| 120 | - StringBuilder strbuf = new StringBuilder(); | |
| 121 | - for (Object sData1 : sData) { | |
| 122 | - String str = (String)sData1; | |
| 123 | - String rtn = RegularExpression(str, regexp); | |
| 124 | - if(strbuf.length() > 0) { | |
| 125 | - strbuf.append("\t"); | |
| 126 | - } | |
| 127 | - strbuf.append(rtn); | |
| 128 | - } | |
| 129 | - return strbuf.toString(); | |
| 130 | - } | |
| 131 | - return null; | |
| 132 | - } | |
| 133 | - | |
| 134 | - /** | |
| 135 | - * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する. | |
| 136 | - * @param skey HTMLタグ/ID/CLASSが格納された検索キー | |
| 137 | - * @return boolean HTMLタグ/ID/CLASS値が一致する時、true | |
| 138 | - */ | |
| 139 | - boolean isHtmlkeyEq(SearchData skey) { | |
| 140 | - | |
| 141 | - String stag = skey.getHtmltag(); | |
| 142 | - String sid = skey.getHtmlid(); | |
| 143 | - String sclass = skey.getHtmlclass(); | |
| 144 | - | |
| 145 | - boolean rtn = true; | |
| 146 | - | |
| 147 | - // htmltag | |
| 148 | - if(htmltag == null) { | |
| 149 | - rtn = false; | |
| 150 | - } else { | |
| 151 | - if(htmltag.equals(stag) == false) { | |
| 152 | - rtn = false; | |
| 153 | - } | |
| 154 | - } | |
| 155 | - | |
| 156 | - // htmlid | |
| 157 | - if(htmlid == null) { | |
| 158 | - rtn = false; | |
| 159 | - } else { | |
| 160 | - if(htmlid.equals(sid) == false) { | |
| 161 | - rtn = false; | |
| 162 | - } | |
| 163 | - } | |
| 164 | - | |
| 165 | - // htmlclass | |
| 166 | - if(htmlclass == null) { | |
| 167 | - rtn = false; | |
| 168 | - } else { | |
| 169 | - if(htmlclass.equals(sclass) == false) { | |
| 170 | - rtn = false; | |
| 171 | - } | |
| 172 | - } | |
| 173 | - | |
| 174 | - if(!rtn) { | |
| 175 | - htmltag = stag; | |
| 176 | - htmlid = sid; | |
| 177 | - htmlclass = sclass; | |
| 178 | - } | |
| 179 | - | |
| 180 | - return rtn; | |
| 181 | - } | |
| 182 | - | |
| 183 | - /** | |
| 184 | - * 正規表現検索. | |
| 185 | - * @param strdata | |
| 186 | - * @param regexp | |
| 187 | - * @return | |
| 188 | - */ | |
| 189 | - String RegularExpression(String strdata, String regexp) { | |
| 190 | - String expdata = null; | |
| 191 | - | |
| 192 | - //regexpのチェック | |
| 193 | - if(regexp.isEmpty()) { | |
| 194 | - expdata = strdata; | |
| 195 | - return expdata; | |
| 196 | - } | |
| 197 | - | |
| 198 | - //正規表現検索 | |
| 199 | - Pattern ptn = Pattern.compile(regexp); | |
| 200 | - Matcher matchdata = ptn.matcher(strdata); | |
| 201 | - if (matchdata.find()) { | |
| 202 | - if(matchdata.groupCount() >= 1) { | |
| 203 | - expdata = matchdata.group(1); | |
| 204 | - } | |
| 205 | - } | |
| 206 | - return expdata; | |
| 207 | - } | |
| 208 | - | |
| 209 | - /** | |
| 210 | - * インターネット接続. | |
| 211 | - */ | |
| 212 | - private void getPageData() { | |
| 213 | - try { | |
| 214 | - //URL url = new URL(UrlAdress); | |
| 215 | - HttpURLConnection con = (HttpURLConnection)url.openConnection(); | |
| 216 | - con.setRequestMethod("GET"); | |
| 217 | - BufferedReader reader = new BufferedReader( | |
| 218 | - new InputStreamReader(con.getInputStream(), "utf-8")); | |
| 219 | - String wkline; | |
| 220 | - StringBuilder sb = new StringBuilder(); | |
| 221 | - while((wkline = reader.readLine()) != null) { | |
| 222 | - sb.append(wkline).append("\n"); | |
| 223 | - } | |
| 224 | - pageData = sb.toString(); | |
| 225 | - | |
| 226 | - con.disconnect(); | |
| 227 | - } | |
| 228 | - catch(IOException ex) { | |
| 229 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 230 | - } | |
| 231 | - } | |
| 232 | - | |
| 233 | - /** | |
| 234 | - * HTMLパーサ. | |
| 235 | - * @param skey | |
| 236 | - */ | |
| 237 | - private void searchPageData(SearchData skey) { | |
| 238 | - | |
| 239 | - DebugProcess.searchDatainfo(skey); | |
| 240 | - | |
| 241 | - Reader reader; | |
| 242 | - try { | |
| 243 | - reader = new BufferedReader(new StringReader(pageData)); | |
| 244 | - HtmlParserCallback cb = new HtmlParserCallback(skey); | |
| 245 | - ParserDelegator pd = new ParserDelegator(); | |
| 246 | - pd.parse(reader, cb, true); | |
| 247 | - reader.close(); | |
| 248 | - | |
| 249 | - sData = cb.getrtnData(); | |
| 250 | - | |
| 251 | - } catch (IOException ex) { | |
| 252 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 253 | - } | |
| 254 | - } | |
| 255 | -} |
| @@ -1,113 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package WebScraping; | |
| 24 | - | |
| 25 | -/** | |
| 26 | - * 検索データ. | |
| 27 | - * @author kgto | |
| 28 | - */ | |
| 29 | -public class SearchData { | |
| 30 | - | |
| 31 | - private String item; | |
| 32 | - private String htmltag; | |
| 33 | - private String htmlid; | |
| 34 | - private String htmlclass; | |
| 35 | - private String around; | |
| 36 | - private String regexp; | |
| 37 | - | |
| 38 | - public SearchData() { | |
| 39 | - initialize(); | |
| 40 | - } | |
| 41 | - | |
| 42 | - public SearchData(SearchData dat) { | |
| 43 | - this.item = dat.getitem(); | |
| 44 | - this.htmltag = dat.getHtmltag(); | |
| 45 | - this.htmlid = dat.getHtmlid(); | |
| 46 | - this.htmlclass = dat.getHtmlclass(); | |
| 47 | - this.around = dat.getaround(); | |
| 48 | - this.regexp = dat.getregexp(); | |
| 49 | - } | |
| 50 | - | |
| 51 | - /** | |
| 52 | - * データ初期化. | |
| 53 | - */ | |
| 54 | - public final void initialize() { | |
| 55 | - this.item = ""; | |
| 56 | - this.htmltag = ""; | |
| 57 | - this.htmlid = ""; | |
| 58 | - this.htmlclass = ""; | |
| 59 | - this.around = ""; | |
| 60 | - this.regexp = ""; | |
| 61 | - } | |
| 62 | - | |
| 63 | - // Setter | |
| 64 | - public void setitem(String item) { | |
| 65 | - this.item = item; | |
| 66 | - } | |
| 67 | - | |
| 68 | - public void setHtmltag(String htmltag) { | |
| 69 | - this.htmltag = htmltag; | |
| 70 | - } | |
| 71 | - | |
| 72 | - public void setHtmlid(String htmlid) { | |
| 73 | - this.htmlid = htmlid; | |
| 74 | - } | |
| 75 | - | |
| 76 | - public void setHtmlclass(String htmlclass) { | |
| 77 | - this.htmlclass = htmlclass; | |
| 78 | - } | |
| 79 | - | |
| 80 | - public void setaround(String around) { | |
| 81 | - this.around = around; | |
| 82 | - } | |
| 83 | - | |
| 84 | - public void setregexp(String regexp) { | |
| 85 | - this.regexp = regexp; | |
| 86 | - } | |
| 87 | - | |
| 88 | - // Getter | |
| 89 | - public String getitem() { | |
| 90 | - return item; | |
| 91 | - } | |
| 92 | - | |
| 93 | - public String getHtmltag() { | |
| 94 | - return htmltag; | |
| 95 | - } | |
| 96 | - | |
| 97 | - public String getHtmlid() { | |
| 98 | - return htmlid; | |
| 99 | - } | |
| 100 | - | |
| 101 | - public String getHtmlclass() { | |
| 102 | - return htmlclass; | |
| 103 | - } | |
| 104 | - | |
| 105 | - public String getaround() { | |
| 106 | - return around; | |
| 107 | - } | |
| 108 | - | |
| 109 | - public String getregexp() { | |
| 110 | - return regexp; | |
| 111 | - } | |
| 112 | - | |
| 113 | -} |
| @@ -1,211 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package WebScraping; | |
| 24 | - | |
| 25 | -import java.util.ArrayList; | |
| 26 | -import java.util.HashMap; | |
| 27 | -import javax.swing.text.MutableAttributeSet; | |
| 28 | -import javax.swing.text.html.HTML; | |
| 29 | -import javax.swing.text.html.HTMLEditorKit; | |
| 30 | - | |
| 31 | -/** | |
| 32 | - * HTMLパーサ部品. | |
| 33 | - * @author kgto | |
| 34 | - */ | |
| 35 | -class HtmlParserCallback extends HTMLEditorKit.ParserCallback { | |
| 36 | - | |
| 37 | - // Tag毎の階層 | |
| 38 | - HashMap<HTML.Tag,Integer> tagMap = new HashMap<>(); | |
| 39 | - | |
| 40 | - // serach key 情報 | |
| 41 | - String keytag; | |
| 42 | - String keyid; | |
| 43 | - String keyclass; | |
| 44 | - | |
| 45 | - // serach key と一致時の情報退避 | |
| 46 | - int bufCount = 0; | |
| 47 | - HTML.Tag bufTag = null; | |
| 48 | - // serach key と一致時の情報格納ワーク | |
| 49 | - StringBuilder bufText; | |
| 50 | - | |
| 51 | - // serach key と一致時のデータ一覧 | |
| 52 | - ArrayList sData; | |
| 53 | - | |
| 54 | - // 属性データ | |
| 55 | - AttributeData attrdata; | |
| 56 | - | |
| 57 | - protected HtmlParserCallback(SearchData skey) { | |
| 58 | - | |
| 59 | - // キー情報展開 | |
| 60 | - keytag = skey.getHtmltag(); | |
| 61 | - keyid = skey.getHtmlid(); | |
| 62 | - keyclass = skey.getHtmlclass(); | |
| 63 | - | |
| 64 | - sData = new ArrayList(); | |
| 65 | - } | |
| 66 | - | |
| 67 | - ArrayList getrtnData() { | |
| 68 | - return this.sData; | |
| 69 | - } | |
| 70 | - | |
| 71 | - @Override | |
| 72 | - public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
| 73 | - // Tag毎の階層を保持 | |
| 74 | - int count = 1; | |
| 75 | - if(tagMap.containsKey(tag)) { | |
| 76 | - count = tagMap.get(tag); | |
| 77 | - count++; | |
| 78 | - } | |
| 79 | - tagMap.put(tag, count); | |
| 80 | - | |
| 81 | - // 属性解析 | |
| 82 | - AttributeData handleStartattrdata = new AttributeData(); | |
| 83 | - handleStartattrdata.add(tag, attr); | |
| 84 | - | |
| 85 | - DebugProcess.htmlinfo(tag, attr, "handleStartTag", count); | |
| 86 | - | |
| 87 | - if(bufCount == 0) { | |
| 88 | - if(tag.toString().equals(keytag)) { | |
| 89 | - //if(serachAttribute(attr)) { | |
| 90 | - if(serachAttribute(tag, handleStartattrdata)) { | |
| 91 | - bufCount = count; | |
| 92 | - bufTag = tag; | |
| 93 | - attrdata = new AttributeData(); | |
| 94 | - bufText = new StringBuilder(); | |
| 95 | - } | |
| 96 | - } | |
| 97 | - } | |
| 98 | - if(bufCount > 0) { | |
| 99 | - attrdata.add(tag, attr); | |
| 100 | - } | |
| 101 | - } | |
| 102 | - | |
| 103 | - @Override | |
| 104 | - public void handleEndTag(HTML.Tag tag, int pos){ | |
| 105 | - // Tag毎の階層を取得 | |
| 106 | - int count = 0; | |
| 107 | - if(tagMap.containsKey(tag)) { | |
| 108 | - count = tagMap.get(tag); | |
| 109 | - } | |
| 110 | - | |
| 111 | - DebugProcess.htmlinfo(tag, null, "handleEndTag", count); | |
| 112 | - | |
| 113 | - if(tag.equals(bufTag) && count <= bufCount) { | |
| 114 | - | |
| 115 | - // 溜め込んだ一致情報をリストへ格納 | |
| 116 | - sData.add(bufText.toString()); | |
| 117 | - | |
| 118 | - // 退避したserach keyとの一致情報クリア | |
| 119 | - bufCount = 0; | |
| 120 | - bufTag = null; | |
| 121 | - bufText = null; | |
| 122 | - } | |
| 123 | - | |
| 124 | - // Tag毎の階層減算 | |
| 125 | - tagMap.put(tag, --count); | |
| 126 | - } | |
| 127 | - | |
| 128 | - @Override | |
| 129 | - public void handleText(char[] data, int pos){ | |
| 130 | - | |
| 131 | - DebugProcess.htmlinfo(data, "handleText"); | |
| 132 | - | |
| 133 | - String splitchar = "\t"; | |
| 134 | - //制御文字の削除 | |
| 135 | - // 0xa0 | |
| 136 | - StringBuilder buf = new StringBuilder(); | |
| 137 | - for(int i = 0; i < data.length; i++) { | |
| 138 | - if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) { | |
| 139 | - buf.append(data[i]); | |
| 140 | - } | |
| 141 | - } | |
| 142 | - if(bufCount > 0) { | |
| 143 | - if(bufText.length() > 0) { | |
| 144 | - bufText.append(splitchar); | |
| 145 | - } | |
| 146 | - bufText.append(buf.toString()); | |
| 147 | - } | |
| 148 | - } | |
| 149 | - | |
| 150 | - @Override | |
| 151 | - public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
| 152 | - if(bufCount > 0) { | |
| 153 | - attrdata.add(tag, attr); | |
| 154 | - } | |
| 155 | - DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0); | |
| 156 | - } | |
| 157 | - | |
| 158 | - /** | |
| 159 | - * ページ内のID/CLASS値と検索キーを比較する. | |
| 160 | - * @param attr ページのMutableAttributeSet | |
| 161 | - * @return boolean 検索キーと一致の時、true | |
| 162 | - */ | |
| 163 | - boolean serachAttribute(MutableAttributeSet attr) { | |
| 164 | - String currentID = (String)attr.getAttribute(HTML.Attribute.ID); | |
| 165 | - String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS); | |
| 166 | - | |
| 167 | - if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
| 168 | - if(keyid.equals(currentID) && keyclass.equals(currentClass)) { | |
| 169 | - return true; | |
| 170 | - } | |
| 171 | - } | |
| 172 | - | |
| 173 | - if(keyid.isEmpty() == false) { | |
| 174 | - if(keyid.equals(currentID)) { | |
| 175 | - return true; | |
| 176 | - } | |
| 177 | - } | |
| 178 | - | |
| 179 | - if(keyclass.isEmpty() == false) { | |
| 180 | - if(keyclass.equals(currentClass)) { | |
| 181 | - return true; | |
| 182 | - } | |
| 183 | - } | |
| 184 | - | |
| 185 | - return false; | |
| 186 | - } | |
| 187 | - | |
| 188 | - /** | |
| 189 | - * ページ内のID/CLASS値と検索キーを比較する. | |
| 190 | - * @param tag | |
| 191 | - * @param attrdata | |
| 192 | - * @return boolean 検索キーと一致の時、true | |
| 193 | - */ | |
| 194 | - boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) { | |
| 195 | - // ID と CLASS の両方にキー入力有りの場合 | |
| 196 | - if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
| 197 | - if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) { | |
| 198 | - return true; | |
| 199 | - } | |
| 200 | - } | |
| 201 | - // ID のキーチェック | |
| 202 | - if(keyid.isEmpty() == false) { | |
| 203 | - return attrdata.searchId(tag, keyid); | |
| 204 | - } | |
| 205 | - // CLASS のキーチェック | |
| 206 | - if(keyclass.isEmpty() == false) { | |
| 207 | - return attrdata.searchClass(tag, keyclass); | |
| 208 | - } | |
| 209 | - return false; | |
| 210 | - } | |
| 211 | -} |
| @@ -0,0 +1,547 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.utility; | |
| 24 | + | |
| 25 | +import webScraping.core.SearchData; | |
| 26 | +import java.io.BufferedReader; | |
| 27 | +import java.io.BufferedWriter; | |
| 28 | +import java.io.File; | |
| 29 | +import java.io.FileInputStream; | |
| 30 | +import java.io.FileNotFoundException; | |
| 31 | +import java.io.FileOutputStream; | |
| 32 | +import java.io.IOException; | |
| 33 | +import java.io.InputStreamReader; | |
| 34 | +import java.io.OutputStreamWriter; | |
| 35 | +import java.util.ArrayList; | |
| 36 | +import java.util.logging.Level; | |
| 37 | +import java.util.logging.Logger; | |
| 38 | +import javax.xml.parsers.DocumentBuilder; | |
| 39 | +import javax.xml.parsers.DocumentBuilderFactory; | |
| 40 | +import javax.xml.parsers.ParserConfigurationException; | |
| 41 | +import javax.xml.transform.Transformer; | |
| 42 | +import javax.xml.transform.TransformerConfigurationException; | |
| 43 | +import javax.xml.transform.TransformerException; | |
| 44 | +import javax.xml.transform.TransformerFactory; | |
| 45 | +import javax.xml.transform.dom.DOMSource; | |
| 46 | +import javax.xml.transform.stream.StreamResult; | |
| 47 | +import org.w3c.dom.DOMImplementation; | |
| 48 | +import org.w3c.dom.Document; | |
| 49 | +import org.w3c.dom.Element; | |
| 50 | +import org.w3c.dom.Node; | |
| 51 | +import org.w3c.dom.NodeList; | |
| 52 | +import org.xml.sax.SAXException; | |
| 53 | + | |
| 54 | +/** | |
| 55 | + * | |
| 56 | + * @author kgto | |
| 57 | + */ | |
| 58 | +public class SearchDataRW { | |
| 59 | + | |
| 60 | + DocumentBuilder builder; | |
| 61 | + public Document document; | |
| 62 | + Element root; | |
| 63 | + | |
| 64 | + private final String splitchar = "\t"; | |
| 65 | + | |
| 66 | + private String UrlAdress; | |
| 67 | + private ArrayList<SearchData> slist = new ArrayList<>(); | |
| 68 | + | |
| 69 | + public SearchDataRW() { | |
| 70 | + try { | |
| 71 | + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 72 | + builder = factory.newDocumentBuilder(); | |
| 73 | + | |
| 74 | + } catch (ParserConfigurationException ex) { | |
| 75 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 76 | + } | |
| 77 | + } | |
| 78 | + | |
| 79 | + public void seturl(String UrlAdress) { | |
| 80 | + this.UrlAdress = UrlAdress; | |
| 81 | + } | |
| 82 | + | |
| 83 | + public void setslist(ArrayList slist) { | |
| 84 | + this.slist = slist; | |
| 85 | + } | |
| 86 | + | |
| 87 | + public String geturl() { | |
| 88 | + return UrlAdress; | |
| 89 | + } | |
| 90 | + | |
| 91 | + public ArrayList getslist() { | |
| 92 | + return slist; | |
| 93 | + } | |
| 94 | + | |
| 95 | + /** | |
| 96 | + * 保存. | |
| 97 | + * @param file | |
| 98 | + */ | |
| 99 | + public void save(File file) { | |
| 100 | + //saveCsv(file); | |
| 101 | + //saveXml(file); | |
| 102 | + | |
| 103 | + saveUrl(UrlAdress); | |
| 104 | + saveSearchList(slist); | |
| 105 | + write(file); | |
| 106 | + } | |
| 107 | + | |
| 108 | + /** | |
| 109 | + * 読込. | |
| 110 | + * @param file | |
| 111 | + */ | |
| 112 | + public void load(File file) { | |
| 113 | + //loadCsv(file); | |
| 114 | + //loadXml(file); | |
| 115 | + | |
| 116 | + read(file); | |
| 117 | + loadUrl(); | |
| 118 | + loadSearchList(); | |
| 119 | + } | |
| 120 | + | |
| 121 | + /* ---------------------------------------------------------------------- */ | |
| 122 | + /** | |
| 123 | + * 保存(CSV形式). | |
| 124 | + * @param file | |
| 125 | + */ | |
| 126 | + public void saveCsv(File file) { | |
| 127 | + BufferedWriter bufferedwriter = null; | |
| 128 | + try { | |
| 129 | + //空のファイルを作成 | |
| 130 | + file.createNewFile(); | |
| 131 | + FileOutputStream fileoutputstream = new FileOutputStream(file); | |
| 132 | + OutputStreamWriter outputstreamwriter = new OutputStreamWriter(fileoutputstream, "UTF-8"); | |
| 133 | + bufferedwriter = new BufferedWriter(outputstreamwriter); | |
| 134 | + | |
| 135 | + // URL | |
| 136 | + bufferedwriter.write(UrlAdress); | |
| 137 | + bufferedwriter.write("\n"); | |
| 138 | + // 検索情報 | |
| 139 | + for(Object slist1 : slist) { | |
| 140 | + SearchData sdat = (SearchData)slist1; | |
| 141 | + // | |
| 142 | + StringBuilder str = new StringBuilder(); | |
| 143 | + str.append(sdat.getitem()).append(splitchar); | |
| 144 | + str.append(sdat.getHtmltag()).append(splitchar); | |
| 145 | + str.append(sdat.getHtmlid()).append(splitchar); | |
| 146 | + str.append(sdat.getHtmlclass()).append(splitchar); | |
| 147 | + str.append(sdat.getaround()).append(splitchar); | |
| 148 | + str.append(sdat.getregexp()).append("\n"); | |
| 149 | + // 書込み | |
| 150 | + bufferedwriter.write(str.toString()); | |
| 151 | + } | |
| 152 | + | |
| 153 | + } catch (IOException ex) { | |
| 154 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 155 | + } finally { | |
| 156 | + try { | |
| 157 | + if(bufferedwriter != null) { | |
| 158 | + bufferedwriter.close(); | |
| 159 | + } | |
| 160 | + | |
| 161 | + } catch (IOException ex) { | |
| 162 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 163 | + } | |
| 164 | + } | |
| 165 | + } | |
| 166 | + | |
| 167 | + /** | |
| 168 | + * 読込(CSV形式). | |
| 169 | + * @param file | |
| 170 | + */ | |
| 171 | + public void loadCsv(File file) { | |
| 172 | + slist = new ArrayList(); | |
| 173 | + | |
| 174 | + BufferedReader bufferedreader = null; | |
| 175 | + try { | |
| 176 | + FileInputStream fileinputstream = new FileInputStream(file); | |
| 177 | + InputStreamReader inputstreamreader = new InputStreamReader(fileinputstream, "UTF-8"); | |
| 178 | + bufferedreader = new BufferedReader(inputstreamreader); | |
| 179 | + | |
| 180 | + // URL | |
| 181 | + UrlAdress = bufferedreader.readLine(); | |
| 182 | + // 検索情報 | |
| 183 | + String rec; | |
| 184 | + while((rec = bufferedreader.readLine()) != null) { | |
| 185 | + String[] recary = rec.split(splitchar, -1); | |
| 186 | + SearchData sdat = new SearchData(); | |
| 187 | + sdat.setitem(recary[0]); | |
| 188 | + sdat.setHtmltag(recary[1]); | |
| 189 | + sdat.setHtmlid(recary[2]); | |
| 190 | + sdat.setHtmlclass(recary[3]); | |
| 191 | + sdat.setaround(recary[4]); | |
| 192 | + sdat.setregexp(recary[5]); | |
| 193 | + | |
| 194 | + slist.add(sdat); | |
| 195 | + } | |
| 196 | + | |
| 197 | + } catch(IOException ex) { | |
| 198 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 199 | + | |
| 200 | + } finally { | |
| 201 | + try { | |
| 202 | + if(bufferedreader != null) { | |
| 203 | + bufferedreader.close(); | |
| 204 | + } | |
| 205 | + | |
| 206 | + } catch (IOException ex) { | |
| 207 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 208 | + } | |
| 209 | + } | |
| 210 | + } | |
| 211 | + | |
| 212 | + /* ---------------------------------------------------------------------- */ | |
| 213 | + /** | |
| 214 | + * 保存(XML形式). | |
| 215 | + * @param file | |
| 216 | + */ | |
| 217 | + public void saveXml(File file) { | |
| 218 | + try { | |
| 219 | + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 220 | + DocumentBuilder wkBuilder = factory.newDocumentBuilder(); | |
| 221 | + DOMImplementation domImpl = wkBuilder.getDOMImplementation(); | |
| 222 | + | |
| 223 | + Document doc = domImpl.createDocument("","searchdata",null); | |
| 224 | + Element wkRoot = doc.getDocumentElement(); | |
| 225 | + | |
| 226 | + // URL | |
| 227 | + Element url = doc.createElement("url"); | |
| 228 | + url.appendChild(doc.createTextNode(UrlAdress)); | |
| 229 | + wkRoot.appendChild(url); | |
| 230 | + | |
| 231 | + // 検索情報 | |
| 232 | + for (Object slist1 : slist) { | |
| 233 | + SearchData sdat = (SearchData) slist1; | |
| 234 | + | |
| 235 | + Element cslist = doc.createElement("searchlist"); | |
| 236 | + Element item = doc.createElement("item"); | |
| 237 | + Element htmltag = doc.createElement("htmltag"); | |
| 238 | + Element htmlid = doc.createElement("htmlid"); | |
| 239 | + Element htmlclass = doc.createElement("htmlclass"); | |
| 240 | + Element around = doc.createElement("around"); | |
| 241 | + Element regexp = doc.createElement("regexp"); | |
| 242 | + | |
| 243 | + item.appendChild(doc.createTextNode(sdat.getitem())); | |
| 244 | + htmltag.appendChild(doc.createTextNode(sdat.getHtmltag())); | |
| 245 | + htmlid.appendChild(doc.createTextNode(sdat.getHtmlid())); | |
| 246 | + htmlclass.appendChild(doc.createTextNode(sdat.getHtmlclass())); | |
| 247 | + around.appendChild(doc.createTextNode(sdat.getaround())); | |
| 248 | + regexp.appendChild(doc.createTextNode(sdat.getregexp())); | |
| 249 | + | |
| 250 | + cslist.appendChild(item); | |
| 251 | + cslist.appendChild(htmltag); | |
| 252 | + cslist.appendChild(htmlid); | |
| 253 | + cslist.appendChild(htmlclass); | |
| 254 | + cslist.appendChild(around); | |
| 255 | + cslist.appendChild(regexp); | |
| 256 | + | |
| 257 | + wkRoot.appendChild(cslist); | |
| 258 | + } | |
| 259 | + // 出力 | |
| 260 | + TransformerFactory transFactory = TransformerFactory.newInstance(); | |
| 261 | + Transformer transformer = transFactory.newTransformer(); | |
| 262 | + | |
| 263 | + DOMSource source = new DOMSource(doc); | |
| 264 | + FileOutputStream os = new FileOutputStream(file); | |
| 265 | + StreamResult result = new StreamResult(os); | |
| 266 | + transformer.transform(source, result); | |
| 267 | + | |
| 268 | + } catch (ParserConfigurationException | FileNotFoundException ex) { | |
| 269 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 270 | + } catch (TransformerConfigurationException ex) { | |
| 271 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 272 | + } catch (TransformerException ex) { | |
| 273 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 274 | + } | |
| 275 | + } | |
| 276 | + | |
| 277 | + /** | |
| 278 | + * 読込(XML形式). | |
| 279 | + * @param file | |
| 280 | + */ | |
| 281 | + public void loadXml(File file) { | |
| 282 | + slist = new ArrayList(); | |
| 283 | + | |
| 284 | + try { | |
| 285 | + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 286 | + DocumentBuilder wkBuilder = factory.newDocumentBuilder(); | |
| 287 | + Document doc = wkBuilder.parse(file); | |
| 288 | + | |
| 289 | + // ルート要素の取得 | |
| 290 | + Element wkRoot = doc.getDocumentElement(); | |
| 291 | + | |
| 292 | + // URL | |
| 293 | + NodeList url = wkRoot.getElementsByTagName("url"); | |
| 294 | + Node urlnode = url.item(0); | |
| 295 | + UrlAdress = urlnode.getFirstChild().getNodeValue(); | |
| 296 | + | |
| 297 | + // 検索情報 | |
| 298 | + NodeList cslist = wkRoot.getElementsByTagName("searchlist"); | |
| 299 | + for(int i = 0; i < cslist.getLength(); i++) { | |
| 300 | + SearchData sdat = new SearchData(); | |
| 301 | + | |
| 302 | + Node slistnode = cslist.item(i); | |
| 303 | + Node child; | |
| 304 | + for (child = slistnode.getFirstChild(); child != null; child = child.getNextSibling()) { | |
| 305 | + if(child.getNodeType() == Node.ELEMENT_NODE) { | |
| 306 | + | |
| 307 | + String tag = child.getNodeName(); | |
| 308 | + String rtn = ""; | |
| 309 | + if(child.getFirstChild() != null) { | |
| 310 | + rtn = child.getFirstChild().getNodeValue(); | |
| 311 | + } | |
| 312 | + | |
| 313 | + switch (tag) { | |
| 314 | + case "item" : | |
| 315 | + sdat.setitem(rtn); | |
| 316 | + break; | |
| 317 | + case "htmltag" : | |
| 318 | + sdat.setHtmltag(rtn); | |
| 319 | + break; | |
| 320 | + case "htmlid" : | |
| 321 | + sdat.setHtmlid(rtn); | |
| 322 | + break; | |
| 323 | + case "htmlclass" : | |
| 324 | + sdat.setHtmlclass(rtn); | |
| 325 | + break; | |
| 326 | + case "around" : | |
| 327 | + sdat.setaround(rtn); | |
| 328 | + break; | |
| 329 | + case "regexp" : | |
| 330 | + sdat.setregexp(rtn); | |
| 331 | + break; | |
| 332 | + } | |
| 333 | + } | |
| 334 | + } | |
| 335 | + slist.add(sdat); | |
| 336 | + } | |
| 337 | + | |
| 338 | + } catch (ParserConfigurationException | SAXException | IOException ex) { | |
| 339 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 340 | + } | |
| 341 | + } | |
| 342 | + | |
| 343 | + /* ---------------------------------------------------------------------- */ | |
| 344 | + | |
| 345 | + void loadUrl() { | |
| 346 | + NodeList nodelist = root.getElementsByTagName("url"); | |
| 347 | + Node node = nodelist.item(0); | |
| 348 | + UrlAdress = node.getFirstChild().getNodeValue(); | |
| 349 | + } | |
| 350 | + | |
| 351 | + public void loadSearchList() { | |
| 352 | + slist.clear(); | |
| 353 | + SearchData.clear(); | |
| 354 | + | |
| 355 | + NodeList nodelist = root.getElementsByTagName("searchlist"); | |
| 356 | + for(int i = 0; i < nodelist.getLength(); i++) { | |
| 357 | + Node childnode = nodelist.item(i); | |
| 358 | + | |
| 359 | + boolean sdatflg = false; | |
| 360 | + SearchData sdat = new SearchData(); | |
| 361 | + for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) { | |
| 362 | + if(child.getNodeType() == Node.ELEMENT_NODE) { | |
| 363 | + String tag = child.getNodeName(); | |
| 364 | + String rtn = ""; | |
| 365 | + if(child.getFirstChild() != null) { | |
| 366 | + rtn = child.getFirstChild().getNodeValue(); | |
| 367 | + } | |
| 368 | + switch (tag) { | |
| 369 | + case "item" : | |
| 370 | + sdat.setitem(rtn); | |
| 371 | + sdatflg = true; | |
| 372 | + break; | |
| 373 | + case "htmltag" : | |
| 374 | + sdat.setHtmltag(rtn); | |
| 375 | + sdatflg = true; | |
| 376 | + break; | |
| 377 | + case "htmlid" : | |
| 378 | + sdat.setHtmlid(rtn); | |
| 379 | + sdatflg = true; | |
| 380 | + break; | |
| 381 | + case "htmlclass" : | |
| 382 | + sdat.setHtmlclass(rtn); | |
| 383 | + sdatflg = true; | |
| 384 | + break; | |
| 385 | + case "around" : | |
| 386 | + sdat.setaround(rtn); | |
| 387 | + sdatflg = true; | |
| 388 | + break; | |
| 389 | + case "regexp" : | |
| 390 | + sdat.setregexp(rtn); | |
| 391 | + sdatflg = true; | |
| 392 | + break; | |
| 393 | + } | |
| 394 | + } | |
| 395 | + } | |
| 396 | + if(sdatflg) slist.add(sdat); | |
| 397 | + if(sdatflg) SearchData.add(sdat); | |
| 398 | + } | |
| 399 | + } | |
| 400 | + | |
| 401 | + public String loadMsg404() { | |
| 402 | + StringBuilder strbuf = new StringBuilder(); | |
| 403 | + NodeList nodelist = root.getElementsByTagName("msg404"); | |
| 404 | + for(int i = 0; i < nodelist.getLength(); i++) { | |
| 405 | + Node childnode = nodelist.item(i); | |
| 406 | + String str = childnode.getFirstChild().getNodeValue(); | |
| 407 | + if(strbuf.length() > 0) { | |
| 408 | + strbuf.append("\n"); | |
| 409 | + } | |
| 410 | + strbuf.append(str); | |
| 411 | + } | |
| 412 | + return strbuf.toString(); | |
| 413 | + } | |
| 414 | + | |
| 415 | + public Element loadElement(String elementTagName) { | |
| 416 | + NodeList nodelist = root.getElementsByTagName(elementTagName); | |
| 417 | + Element element = (Element)nodelist.item(0); | |
| 418 | + | |
| 419 | + return element; | |
| 420 | + } | |
| 421 | + | |
| 422 | + /* ---------------------------------------------------------------------- */ | |
| 423 | + | |
| 424 | + void saveUrl(String urladdress) { | |
| 425 | + checkdoc(); | |
| 426 | + removeElement("url"); // 既にElementが存在してた場合、一度削除 | |
| 427 | + | |
| 428 | + Element url = document.createElement("url"); | |
| 429 | + url.appendChild(document.createTextNode(urladdress)); | |
| 430 | + root.appendChild(url); | |
| 431 | + } | |
| 432 | + | |
| 433 | + void saveSearchList(ArrayList slist) { | |
| 434 | + checkdoc(); | |
| 435 | + removeElement("searchlist"); // 既にElementが存在してた場合、一度削除 | |
| 436 | + | |
| 437 | + int count = 0; | |
| 438 | + for (Object slist1 : slist) { | |
| 439 | + SearchData sdat = (SearchData) slist1; | |
| 440 | + | |
| 441 | + Element cslist = document.createElement("searchlist"); | |
| 442 | + cslist.setAttribute("listNo", String.valueOf(++count)); | |
| 443 | + | |
| 444 | + addChild(cslist, "item", sdat.getitem()); | |
| 445 | + addChild(cslist, "htmltag", sdat.getHtmltag()); | |
| 446 | + addChild(cslist, "htmlid", sdat.getHtmlid()); | |
| 447 | + addChild(cslist, "htmlclass", sdat.getHtmlclass()); | |
| 448 | + addChild(cslist, "around", sdat.getaround()); | |
| 449 | + addChild(cslist, "regexp", sdat.getregexp()); | |
| 450 | + | |
| 451 | + root.appendChild(cslist); | |
| 452 | + } | |
| 453 | + } | |
| 454 | + | |
| 455 | + void saveMsg404(String msg) { | |
| 456 | + checkdoc(); | |
| 457 | + removeElement("msg404"); // 既にElementが存在してた場合、一度削除 | |
| 458 | + | |
| 459 | + String[] msgs = msg.split("\n"); | |
| 460 | + int count = 0; | |
| 461 | + for(String msgOne : msgs) { | |
| 462 | + Element msgElement = document.createElement("msg404"); | |
| 463 | + msgElement.setAttribute("No", String.valueOf(++count)); | |
| 464 | + msgElement.appendChild(document.createTextNode(msgOne)); | |
| 465 | + | |
| 466 | + root.appendChild(msgElement); | |
| 467 | + } | |
| 468 | + } | |
| 469 | + | |
| 470 | + public void saveElement(Element element) { | |
| 471 | + checkdoc(); | |
| 472 | + removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除 | |
| 473 | + | |
| 474 | + root.appendChild(element); | |
| 475 | + } | |
| 476 | + | |
| 477 | + /* ---------------------------------------------------------------------- */ | |
| 478 | + | |
| 479 | + private void addChild(Element cslist, String keyword, String data) { | |
| 480 | + if(!data.isEmpty()) { | |
| 481 | + Element element = document.createElement(keyword); | |
| 482 | + element.appendChild(document.createTextNode(data)); | |
| 483 | + cslist.appendChild(element); | |
| 484 | + } | |
| 485 | + } | |
| 486 | + | |
| 487 | + private void removeElement(String elementTagName) { | |
| 488 | + int nodeSize; | |
| 489 | + do { | |
| 490 | + NodeList nodelist = document.getElementsByTagName(elementTagName); | |
| 491 | + nodeSize = nodelist.getLength(); | |
| 492 | + for(int i = 0; i < nodelist.getLength(); i++) { | |
| 493 | + Node node = nodelist.item(i); | |
| 494 | + root.removeChild(node); | |
| 495 | + } | |
| 496 | + } while(nodeSize > 0); | |
| 497 | + } | |
| 498 | + | |
| 499 | + /** | |
| 500 | + * ドキュメントチェック. | |
| 501 | + * 新規の場合やXMLファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。 | |
| 502 | + * 既読の場合、ルートエレメントの取得を行う。 | |
| 503 | + */ | |
| 504 | + public void checkdoc() { | |
| 505 | + if(document == null) { | |
| 506 | + DOMImplementation domImpl = builder.getDOMImplementation(); | |
| 507 | + document = domImpl.createDocument("","searchdata",null); | |
| 508 | + } | |
| 509 | + root = document.getDocumentElement(); | |
| 510 | + } | |
| 511 | + | |
| 512 | + /** | |
| 513 | + * XML読込み. | |
| 514 | + * @param file | |
| 515 | + */ | |
| 516 | + public void read(File file) { | |
| 517 | + try { | |
| 518 | + document = builder.parse(file); | |
| 519 | + root = document.getDocumentElement(); | |
| 520 | + | |
| 521 | + } catch (SAXException | IOException ex) { | |
| 522 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 523 | + } | |
| 524 | + } | |
| 525 | + | |
| 526 | + /** | |
| 527 | + * XML書込み. | |
| 528 | + * @param file | |
| 529 | + */ | |
| 530 | + public void write(File file) { | |
| 531 | + try { | |
| 532 | + TransformerFactory transFactory = TransformerFactory.newInstance(); | |
| 533 | + Transformer transformer = transFactory.newTransformer(); | |
| 534 | + | |
| 535 | + DOMSource source = new DOMSource(document); | |
| 536 | + FileOutputStream os = new FileOutputStream(file); | |
| 537 | + StreamResult result = new StreamResult(os); | |
| 538 | + transformer.transform(source, result); | |
| 539 | + | |
| 540 | + } catch (TransformerConfigurationException ex) { | |
| 541 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 542 | + } catch (FileNotFoundException | TransformerException ex) { | |
| 543 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 544 | + } | |
| 545 | + } | |
| 546 | + | |
| 547 | +} |
| @@ -0,0 +1,566 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | +package webScraping.utility; | |
| 23 | + | |
| 24 | +import webScraping.core.HtmlParser; | |
| 25 | +import webScraping.core.SearchData; | |
| 26 | +import java.awt.Desktop; | |
| 27 | +import java.io.File; | |
| 28 | +import java.io.IOException; | |
| 29 | +import java.net.URI; | |
| 30 | +import java.net.URISyntaxException; | |
| 31 | +import java.util.logging.Level; | |
| 32 | +import java.util.logging.Logger; | |
| 33 | +import javax.swing.JFileChooser; | |
| 34 | +import javax.swing.filechooser.FileFilter; | |
| 35 | +import javax.swing.filechooser.FileNameExtensionFilter; | |
| 36 | +import javax.swing.table.DefaultTableModel; | |
| 37 | + | |
| 38 | +/** | |
| 39 | + * HTMLページ上の特定の項目を検索し、その項目内容の値を取得する. | |
| 40 | + * @author kgto | |
| 41 | + */ | |
| 42 | +public class HtmlSearch extends javax.swing.JFrame { | |
| 43 | + private final SearchDataRW sio = new SearchDataRW(); | |
| 44 | + | |
| 45 | + SearchDataTableModel sdatatblmodel; | |
| 46 | + | |
| 47 | + /** | |
| 48 | + * Creates new form Frame1 | |
| 49 | + */ | |
| 50 | + public HtmlSearch() { | |
| 51 | + sdatatblmodel = new SearchDataTableModel(); | |
| 52 | + | |
| 53 | + initComponents(); | |
| 54 | + | |
| 55 | + // カレントディレクトリ取得 | |
| 56 | + String dir = System.getProperty("user.dir"); | |
| 57 | + File file = new java.io.File(dir + "\\data"); | |
| 58 | + jFileChooser1.setCurrentDirectory(file); | |
| 59 | + | |
| 60 | + FileFilter filter1 = new FileNameExtensionFilter("XMLファイル", "xml"); | |
| 61 | + FileFilter filter2 = new FileNameExtensionFilter("TEXTファイル", "txt"); | |
| 62 | + jFileChooser1.addChoosableFileFilter(filter1); | |
| 63 | + jFileChooser1.addChoosableFileFilter(filter2); | |
| 64 | + jFileChooser1.setFileFilter(filter1); | |
| 65 | + | |
| 66 | + } | |
| 67 | + | |
| 68 | + /** | |
| 69 | + * This method is called from within the constructor to initialize the form. | |
| 70 | + * WARNING: Do NOT modify this code. The content of this method is always | |
| 71 | + * regenerated by the Form Editor. | |
| 72 | + */ | |
| 73 | + @SuppressWarnings("unchecked") | |
| 74 | + // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents | |
| 75 | + private void initComponents() { | |
| 76 | + | |
| 77 | + jFileChooser1 = new javax.swing.JFileChooser(); | |
| 78 | + jRadioButton1 = new javax.swing.JRadioButton(); | |
| 79 | + jLabel1 = new javax.swing.JLabel(); | |
| 80 | + jTxtUrl = new javax.swing.JTextField(); | |
| 81 | + jBtnSearch = new javax.swing.JButton(); | |
| 82 | + jTabbedPane1 = new javax.swing.JTabbedPane(); | |
| 83 | + jPanelTab1 = new javax.swing.JPanel(); | |
| 84 | + jScrollPane1 = new javax.swing.JScrollPane(); | |
| 85 | + jTable1 = new javax.swing.JTable(); | |
| 86 | + jBtnRowIns = new javax.swing.JButton(); | |
| 87 | + jBtnRowDel = new javax.swing.JButton(); | |
| 88 | + jBtnRowCpy = new javax.swing.JButton(); | |
| 89 | + jPanelTab2 = new javax.swing.JPanel(); | |
| 90 | + jScrollPaneLabel = new javax.swing.JScrollPane(); | |
| 91 | + jTxtLabel = new javax.swing.JTextArea(); | |
| 92 | + jScrollPane404msg = new javax.swing.JScrollPane(); | |
| 93 | + jTxt404msg = new javax.swing.JTextArea(); | |
| 94 | + jPanelRtn = new javax.swing.JPanel(); | |
| 95 | + jScrollPaneRtn = new javax.swing.JScrollPane(); | |
| 96 | + jTxtRtn = new javax.swing.JTextArea(); | |
| 97 | + jMenuBar1 = new javax.swing.JMenuBar(); | |
| 98 | + jMenu1 = new javax.swing.JMenu(); | |
| 99 | + jMenuLoad = new javax.swing.JMenuItem(); | |
| 100 | + jMenuSave = new javax.swing.JMenuItem(); | |
| 101 | + jMenu3 = new javax.swing.JMenu(); | |
| 102 | + jMenuItem1 = new javax.swing.JMenuItem(); | |
| 103 | + jMenu2 = new javax.swing.JMenu(); | |
| 104 | + | |
| 105 | + jFileChooser1.setCurrentDirectory(null); | |
| 106 | + jFileChooser1.setDialogTitle(""); | |
| 107 | + | |
| 108 | + jRadioButton1.setText("jRadioButton1"); | |
| 109 | + | |
| 110 | + setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); | |
| 111 | + setTitle("タグ検索"); | |
| 112 | + | |
| 113 | + jLabel1.setText(" URL:"); | |
| 114 | + | |
| 115 | + jBtnSearch.setText("検索"); | |
| 116 | + jBtnSearch.addActionListener(new java.awt.event.ActionListener() { | |
| 117 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 118 | + jBtnSearchActionPerformed(evt); | |
| 119 | + } | |
| 120 | + }); | |
| 121 | + | |
| 122 | + jPanelTab1.setBorder(javax.swing.BorderFactory.createTitledBorder("検索情報")); | |
| 123 | + | |
| 124 | + jTable1.setModel(sdatatblmodel); | |
| 125 | + jTable1.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION); | |
| 126 | + jTable1.getTableHeader().setReorderingAllowed(false); | |
| 127 | + jScrollPane1.setViewportView(jTable1); | |
| 128 | + | |
| 129 | + jBtnRowIns.setText("行挿入"); | |
| 130 | + jBtnRowIns.addActionListener(new java.awt.event.ActionListener() { | |
| 131 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 132 | + jBtnRowInsActionPerformed(evt); | |
| 133 | + } | |
| 134 | + }); | |
| 135 | + | |
| 136 | + jBtnRowDel.setText("行削除"); | |
| 137 | + jBtnRowDel.addActionListener(new java.awt.event.ActionListener() { | |
| 138 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 139 | + jBtnRowDelActionPerformed(evt); | |
| 140 | + } | |
| 141 | + }); | |
| 142 | + | |
| 143 | + jBtnRowCpy.setText("行コピー"); | |
| 144 | + jBtnRowCpy.addActionListener(new java.awt.event.ActionListener() { | |
| 145 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 146 | + jBtnRowCpyActionPerformed(evt); | |
| 147 | + } | |
| 148 | + }); | |
| 149 | + | |
| 150 | + javax.swing.GroupLayout jPanelTab1Layout = new javax.swing.GroupLayout(jPanelTab1); | |
| 151 | + jPanelTab1.setLayout(jPanelTab1Layout); | |
| 152 | + jPanelTab1Layout.setHorizontalGroup( | |
| 153 | + jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 154 | + .addGroup(jPanelTab1Layout.createSequentialGroup() | |
| 155 | + .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
| 156 | + .addComponent(jBtnRowCpy) | |
| 157 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 158 | + .addComponent(jBtnRowDel) | |
| 159 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 160 | + .addComponent(jBtnRowIns)) | |
| 161 | + .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 0, Short.MAX_VALUE) | |
| 162 | + ); | |
| 163 | + jPanelTab1Layout.setVerticalGroup( | |
| 164 | + jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 165 | + .addGroup(jPanelTab1Layout.createSequentialGroup() | |
| 166 | + .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 173, Short.MAX_VALUE) | |
| 167 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 168 | + .addGroup(jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) | |
| 169 | + .addComponent(jBtnRowDel) | |
| 170 | + .addComponent(jBtnRowIns) | |
| 171 | + .addComponent(jBtnRowCpy))) | |
| 172 | + ); | |
| 173 | + | |
| 174 | + jTabbedPane1.addTab("キー設定", jPanelTab1); | |
| 175 | + | |
| 176 | + jPanelTab2.setBorder(javax.swing.BorderFactory.createTitledBorder("メッセージ")); | |
| 177 | + | |
| 178 | + jScrollPaneLabel.setHorizontalScrollBarPolicy(javax.swing.ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER); | |
| 179 | + jScrollPaneLabel.setVerticalScrollBarPolicy(javax.swing.ScrollPaneConstants.VERTICAL_SCROLLBAR_NEVER); | |
| 180 | + | |
| 181 | + jTxtLabel.setEditable(false); | |
| 182 | + jTxtLabel.setBackground(java.awt.Color.lightGray); | |
| 183 | + jTxtLabel.setColumns(20); | |
| 184 | + jTxtLabel.setFont(new java.awt.Font("MS UI Gothic", 0, 12)); // NOI18N | |
| 185 | + jTxtLabel.setLineWrap(true); | |
| 186 | + jTxtLabel.setRows(2); | |
| 187 | + jTxtLabel.setText("取得ページに以下のメッセージが含まれていた場合、対象データが取得出来なかったと通知します。"); | |
| 188 | + jTxtLabel.setAutoscrolls(false); | |
| 189 | + jTxtLabel.setBorder(null); | |
| 190 | + jTxtLabel.setCursor(new java.awt.Cursor(java.awt.Cursor.DEFAULT_CURSOR)); | |
| 191 | + jTxtLabel.setFocusable(false); | |
| 192 | + jTxtLabel.setHighlighter(null); | |
| 193 | + jTxtLabel.setKeymap(null); | |
| 194 | + jTxtLabel.setOpaque(false); | |
| 195 | + jTxtLabel.setRequestFocusEnabled(false); | |
| 196 | + jTxtLabel.setVerifyInputWhenFocusTarget(false); | |
| 197 | + jScrollPaneLabel.setViewportView(jTxtLabel); | |
| 198 | + | |
| 199 | + jTxt404msg.setColumns(20); | |
| 200 | + jTxt404msg.setRows(3); | |
| 201 | + jTxt404msg.setText("一致する銘柄は見つかりませんでした\n"); | |
| 202 | + jScrollPane404msg.setViewportView(jTxt404msg); | |
| 203 | + | |
| 204 | + javax.swing.GroupLayout jPanelTab2Layout = new javax.swing.GroupLayout(jPanelTab2); | |
| 205 | + jPanelTab2.setLayout(jPanelTab2Layout); | |
| 206 | + jPanelTab2Layout.setHorizontalGroup( | |
| 207 | + jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 208 | + .addComponent(jScrollPane404msg) | |
| 209 | + .addGroup(javax.swing.GroupLayout.Alignment.TRAILING, jPanelTab2Layout.createSequentialGroup() | |
| 210 | + .addContainerGap() | |
| 211 | + .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.DEFAULT_SIZE, 359, Short.MAX_VALUE) | |
| 212 | + .addContainerGap()) | |
| 213 | + ); | |
| 214 | + jPanelTab2Layout.setVerticalGroup( | |
| 215 | + jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 216 | + .addGroup(jPanelTab2Layout.createSequentialGroup() | |
| 217 | + .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.PREFERRED_SIZE, 38, javax.swing.GroupLayout.PREFERRED_SIZE) | |
| 218 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) | |
| 219 | + .addComponent(jScrollPane404msg)) | |
| 220 | + ); | |
| 221 | + | |
| 222 | + jTabbedPane1.addTab("結果無し判定", jPanelTab2); | |
| 223 | + | |
| 224 | + jPanelRtn.setBorder(javax.swing.BorderFactory.createTitledBorder("検索結果")); | |
| 225 | + | |
| 226 | + jTxtRtn.setColumns(20); | |
| 227 | + jTxtRtn.setRows(5); | |
| 228 | + jScrollPaneRtn.setViewportView(jTxtRtn); | |
| 229 | + | |
| 230 | + javax.swing.GroupLayout jPanelRtnLayout = new javax.swing.GroupLayout(jPanelRtn); | |
| 231 | + jPanelRtn.setLayout(jPanelRtnLayout); | |
| 232 | + jPanelRtnLayout.setHorizontalGroup( | |
| 233 | + jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 234 | + .addComponent(jScrollPaneRtn) | |
| 235 | + ); | |
| 236 | + jPanelRtnLayout.setVerticalGroup( | |
| 237 | + jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 238 | + .addComponent(jScrollPaneRtn, javax.swing.GroupLayout.DEFAULT_SIZE, 163, Short.MAX_VALUE) | |
| 239 | + ); | |
| 240 | + | |
| 241 | + jMenu1.setText("ファイル"); | |
| 242 | + | |
| 243 | + jMenuLoad.setText("LOAD"); | |
| 244 | + jMenuLoad.addActionListener(new java.awt.event.ActionListener() { | |
| 245 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 246 | + jMenuLoadActionPerformed(evt); | |
| 247 | + } | |
| 248 | + }); | |
| 249 | + jMenu1.add(jMenuLoad); | |
| 250 | + | |
| 251 | + jMenuSave.setText("SAVE"); | |
| 252 | + jMenuSave.addActionListener(new java.awt.event.ActionListener() { | |
| 253 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 254 | + jMenuSaveActionPerformed(evt); | |
| 255 | + } | |
| 256 | + }); | |
| 257 | + jMenu1.add(jMenuSave); | |
| 258 | + | |
| 259 | + jMenuBar1.add(jMenu1); | |
| 260 | + | |
| 261 | + jMenu3.setText("ツール"); | |
| 262 | + | |
| 263 | + jMenuItem1.setText("ブラウザで表示"); | |
| 264 | + jMenuItem1.addActionListener(new java.awt.event.ActionListener() { | |
| 265 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 266 | + jMenuItem1ActionPerformed(evt); | |
| 267 | + } | |
| 268 | + }); | |
| 269 | + jMenu3.add(jMenuItem1); | |
| 270 | + | |
| 271 | + jMenuBar1.add(jMenu3); | |
| 272 | + | |
| 273 | + jMenu2.setText("検索"); | |
| 274 | + jMenu2.addMouseListener(new java.awt.event.MouseAdapter() { | |
| 275 | + public void mouseClicked(java.awt.event.MouseEvent evt) { | |
| 276 | + jMenu2MouseClicked(evt); | |
| 277 | + } | |
| 278 | + }); | |
| 279 | + jMenuBar1.add(jMenu2); | |
| 280 | + | |
| 281 | + setJMenuBar(jMenuBar1); | |
| 282 | + | |
| 283 | + javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane()); | |
| 284 | + getContentPane().setLayout(layout); | |
| 285 | + layout.setHorizontalGroup( | |
| 286 | + layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 287 | + .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
| 288 | + .addGroup(layout.createSequentialGroup() | |
| 289 | + .addComponent(jLabel1) | |
| 290 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 291 | + .addComponent(jTxtUrl) | |
| 292 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 293 | + .addComponent(jBtnSearch)) | |
| 294 | + .addComponent(jTabbedPane1) | |
| 295 | + ); | |
| 296 | + layout.setVerticalGroup( | |
| 297 | + layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 298 | + .addGroup(layout.createSequentialGroup() | |
| 299 | + .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) | |
| 300 | + .addComponent(jLabel1) | |
| 301 | + .addComponent(jTxtUrl, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE) | |
| 302 | + .addComponent(jBtnSearch)) | |
| 303 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 304 | + .addComponent(jTabbedPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 250, javax.swing.GroupLayout.PREFERRED_SIZE) | |
| 305 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 306 | + .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
| 307 | + .addContainerGap()) | |
| 308 | + ); | |
| 309 | + | |
| 310 | + pack(); | |
| 311 | + }// </editor-fold>//GEN-END:initComponents | |
| 312 | + | |
| 313 | + private void jBtnRowInsActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowInsActionPerformed | |
| 314 | + int SelectedRow = jTable1.getSelectedRow(); | |
| 315 | + SearchData sdata = new SearchData(); | |
| 316 | + if(SelectedRow >= 0) { | |
| 317 | + sdatatblmodel.insertRow(SelectedRow, sdata); | |
| 318 | + } else { | |
| 319 | + sdatatblmodel.addRow(sdata); | |
| 320 | + } | |
| 321 | + }//GEN-LAST:event_jBtnRowInsActionPerformed | |
| 322 | + | |
| 323 | + private void jBtnRowDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowDelActionPerformed | |
| 324 | + int SelectedRow = jTable1.getSelectedRow(); | |
| 325 | + if(!(SelectedRow < 0)) { | |
| 326 | + sdatatblmodel.removeRow(SelectedRow); | |
| 327 | + } | |
| 328 | + }//GEN-LAST:event_jBtnRowDelActionPerformed | |
| 329 | + | |
| 330 | + private void jMenuLoadActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuLoadActionPerformed | |
| 331 | + jFileChooser1.setDialogTitle("読込"); | |
| 332 | + int selected = jFileChooser1.showOpenDialog(this); | |
| 333 | + if (selected == JFileChooser.APPROVE_OPTION) { | |
| 334 | + File file = jFileChooser1.getSelectedFile(); | |
| 335 | + sio.load(file); | |
| 336 | + jTxtUrl.setText(sio.geturl()); | |
| 337 | + sdatatblmodel.setRowCount(0); | |
| 338 | + for(int i = 0; i < SearchData.size(); i++) { | |
| 339 | + SearchData sdata = SearchData.get(i); | |
| 340 | + sdatatblmodel.addRow(sdata); | |
| 341 | + } | |
| 342 | + } | |
| 343 | + }//GEN-LAST:event_jMenuLoadActionPerformed | |
| 344 | + | |
| 345 | + private void jMenuSaveActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuSaveActionPerformed | |
| 346 | + jFileChooser1.setDialogTitle("保存"); | |
| 347 | + int selected = jFileChooser1.showSaveDialog(this); | |
| 348 | + if (selected == JFileChooser.APPROVE_OPTION) { | |
| 349 | + File file = jFileChooser1.getSelectedFile(); | |
| 350 | + sio.seturl(jTxtUrl.getText()); | |
| 351 | + | |
| 352 | + SearchData.clear(); | |
| 353 | + for(int row = 0; row < sdatatblmodel.getRowCount(); row++) { | |
| 354 | + SearchData sdata = sdatatblmodel.getSearchData(row); | |
| 355 | + SearchData.add(sdata); | |
| 356 | + } | |
| 357 | + sio.save(file); | |
| 358 | + } | |
| 359 | + }//GEN-LAST:event_jMenuSaveActionPerformed | |
| 360 | + | |
| 361 | + private void jBtnRowCpyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowCpyActionPerformed | |
| 362 | + int SelectedRow = jTable1.getSelectedRow(); | |
| 363 | + if(SelectedRow >= 0) { | |
| 364 | + SearchData sdata = sdatatblmodel.getSearchData(SelectedRow); | |
| 365 | + sdatatblmodel.insertRow(SelectedRow, sdata); | |
| 366 | + } | |
| 367 | + }//GEN-LAST:event_jBtnRowCpyActionPerformed | |
| 368 | + | |
| 369 | + private void jMenuItem1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuItem1ActionPerformed | |
| 370 | + Desktop desktop = Desktop.getDesktop(); | |
| 371 | + String uriString = jTxtUrl.getText(); | |
| 372 | + try { | |
| 373 | + URI uri = new URI(uriString); | |
| 374 | + desktop.browse(uri); | |
| 375 | + | |
| 376 | + } catch (URISyntaxException | IOException ex) { | |
| 377 | + Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex); | |
| 378 | + } | |
| 379 | + }//GEN-LAST:event_jMenuItem1ActionPerformed | |
| 380 | + | |
| 381 | + private void jMenu2MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jMenu2MouseClicked | |
| 382 | + Search_execution(); | |
| 383 | + }//GEN-LAST:event_jMenu2MouseClicked | |
| 384 | + | |
| 385 | + private void jBtnSearchActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnSearchActionPerformed | |
| 386 | + Search_execution(); | |
| 387 | + }//GEN-LAST:event_jBtnSearchActionPerformed | |
| 388 | + | |
| 389 | + /** | |
| 390 | + * 検索実行. | |
| 391 | + */ | |
| 392 | + void Search_execution() { | |
| 393 | + jTxtRtn.setText(null); | |
| 394 | + HtmlParser par = new HtmlParser(jTxtUrl.getText()); | |
| 395 | + | |
| 396 | + // データ無し(404)判定 | |
| 397 | + String strdata = par.getStringPageData(); | |
| 398 | + if(strdata == null) { | |
| 399 | + jTxtRtn.append("読込みページがありません"); | |
| 400 | + return; | |
| 401 | + } | |
| 402 | + String text = jTxt404msg.getText(); | |
| 403 | + String[] strsearch = text.split("\n"); | |
| 404 | + for(String strsearch1 : strsearch) { | |
| 405 | + if(strdata.contains(strsearch1)) { | |
| 406 | + jTxtRtn.append(strsearch1); | |
| 407 | + return; | |
| 408 | + } | |
| 409 | + } | |
| 410 | + | |
| 411 | + // 検索結果 | |
| 412 | + for(int row = 0; row < sdatatblmodel.getRowCount(); row++) { | |
| 413 | + SearchData sdata = sdatatblmodel.getSearchData(row); | |
| 414 | + String ans = sdata.getitem(); | |
| 415 | + String rtn = par.search(sdata); | |
| 416 | + jTxtRtn.append(ans + "\t" + rtn + "\n"); | |
| 417 | + } | |
| 418 | + | |
| 419 | + jTxtRtn.setCaretPosition(0); | |
| 420 | + } | |
| 421 | + | |
| 422 | + /** | |
| 423 | + * @param args the command line arguments | |
| 424 | + */ | |
| 425 | + public static void main(String args[]) { | |
| 426 | + /* Set the Nimbus look and feel */ | |
| 427 | + //<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) "> | |
| 428 | + /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel. | |
| 429 | + * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html | |
| 430 | + */ | |
| 431 | + try { | |
| 432 | + for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) { | |
| 433 | + if ("Nimbus".equals(info.getName())) { | |
| 434 | + javax.swing.UIManager.setLookAndFeel(info.getClassName()); | |
| 435 | + break; | |
| 436 | + } | |
| 437 | + } | |
| 438 | + } catch (ClassNotFoundException | |
| 439 | + | InstantiationException | |
| 440 | + | IllegalAccessException | |
| 441 | + | javax.swing.UnsupportedLookAndFeelException ex) { | |
| 442 | + java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); | |
| 443 | + } | |
| 444 | + //</editor-fold> | |
| 445 | + | |
| 446 | + /* Create and display the form */ | |
| 447 | + java.awt.EventQueue.invokeLater(new Runnable() { | |
| 448 | + @Override | |
| 449 | + public void run() { | |
| 450 | + new HtmlSearch().setVisible(true); | |
| 451 | + } | |
| 452 | + }); | |
| 453 | + } | |
| 454 | + | |
| 455 | + // Variables declaration - do not modify//GEN-BEGIN:variables | |
| 456 | + private javax.swing.JButton jBtnRowCpy; | |
| 457 | + private javax.swing.JButton jBtnRowDel; | |
| 458 | + private javax.swing.JButton jBtnRowIns; | |
| 459 | + private javax.swing.JButton jBtnSearch; | |
| 460 | + private javax.swing.JFileChooser jFileChooser1; | |
| 461 | + private javax.swing.JLabel jLabel1; | |
| 462 | + private javax.swing.JMenu jMenu1; | |
| 463 | + private javax.swing.JMenu jMenu2; | |
| 464 | + private javax.swing.JMenu jMenu3; | |
| 465 | + private javax.swing.JMenuBar jMenuBar1; | |
| 466 | + private javax.swing.JMenuItem jMenuItem1; | |
| 467 | + private javax.swing.JMenuItem jMenuLoad; | |
| 468 | + private javax.swing.JMenuItem jMenuSave; | |
| 469 | + private javax.swing.JPanel jPanelRtn; | |
| 470 | + private javax.swing.JPanel jPanelTab1; | |
| 471 | + private javax.swing.JPanel jPanelTab2; | |
| 472 | + private javax.swing.JRadioButton jRadioButton1; | |
| 473 | + private javax.swing.JScrollPane jScrollPane1; | |
| 474 | + private javax.swing.JScrollPane jScrollPane404msg; | |
| 475 | + private javax.swing.JScrollPane jScrollPaneLabel; | |
| 476 | + private javax.swing.JScrollPane jScrollPaneRtn; | |
| 477 | + private javax.swing.JTabbedPane jTabbedPane1; | |
| 478 | + private javax.swing.JTable jTable1; | |
| 479 | + private javax.swing.JTextArea jTxt404msg; | |
| 480 | + private javax.swing.JTextArea jTxtLabel; | |
| 481 | + private javax.swing.JTextArea jTxtRtn; | |
| 482 | + private javax.swing.JTextField jTxtUrl; | |
| 483 | + // End of variables declaration//GEN-END:variables | |
| 484 | +} | |
| 485 | + | |
| 486 | +class SearchDataTableModel extends DefaultTableModel { | |
| 487 | + /* ---------------------------------------------------------------------- * | |
| 488 | + * データ属性 | |
| 489 | + * ---------------------------------------------------------------------- */ | |
| 490 | + public String[] columnName = { | |
| 491 | + /* 0 */ "項目名", | |
| 492 | + /* 1 */ "タグ", | |
| 493 | + /* 2 */ "ID", | |
| 494 | + /* 3 */ "クラス", | |
| 495 | + /* 4 */ "位置", | |
| 496 | + /* 5 */ "抽出条件" | |
| 497 | + }; | |
| 498 | + | |
| 499 | + public Class[] columnClass = { | |
| 500 | + /* 0 */ String.class, | |
| 501 | + /* 1 */ String.class, | |
| 502 | + /* 2 */ String.class, | |
| 503 | + /* 3 */ String.class, | |
| 504 | + /* 4 */ String.class, | |
| 505 | + /* 5 */ String.class | |
| 506 | + }; | |
| 507 | + | |
| 508 | + int column_item = 0; | |
| 509 | + int column_htmltag = 1; | |
| 510 | + int column_htmlid = 2; | |
| 511 | + int column_htmlclass = 3; | |
| 512 | + int column_around = 4; | |
| 513 | + int column_regexp = 5; | |
| 514 | + | |
| 515 | + /* ---------------------------------------------------------------------- * | |
| 516 | + * 処理 | |
| 517 | + * ---------------------------------------------------------------------- */ | |
| 518 | + @Override | |
| 519 | + public String getColumnName(int modelIndex) { | |
| 520 | + return columnName[modelIndex]; | |
| 521 | + } | |
| 522 | + | |
| 523 | + @Override | |
| 524 | + public Class<?> getColumnClass(int modelIndex) { | |
| 525 | + return columnClass[modelIndex]; | |
| 526 | + } | |
| 527 | + | |
| 528 | + @Override | |
| 529 | + public int getColumnCount() { | |
| 530 | + return columnName.length; | |
| 531 | + } | |
| 532 | + | |
| 533 | + /* ---------------------------------------------------------------------- */ | |
| 534 | + | |
| 535 | + public SearchData getSearchData(int row) { | |
| 536 | + SearchData sdata = new SearchData(); | |
| 537 | + sdata.setitem(String.valueOf(getValueAt(row, column_item))); | |
| 538 | + sdata.setHtmltag(String.valueOf(getValueAt(row, column_htmltag))); | |
| 539 | + sdata.setHtmlid(String.valueOf(getValueAt(row, column_htmlid))); | |
| 540 | + sdata.setHtmlclass(String.valueOf(getValueAt(row, column_htmlclass))); | |
| 541 | + sdata.setaround(String.valueOf(getValueAt(row, column_around))); | |
| 542 | + sdata.setregexp(String.valueOf(getValueAt(row, column_regexp))); | |
| 543 | + return sdata; | |
| 544 | + } | |
| 545 | + | |
| 546 | + public void addRow(SearchData sdata) { | |
| 547 | + addRow(getObjdata(sdata)); | |
| 548 | + } | |
| 549 | + | |
| 550 | + public void insertRow(int row, SearchData sdata) { | |
| 551 | + insertRow(row, getObjdata(sdata)); | |
| 552 | + } | |
| 553 | + | |
| 554 | + private Object[] getObjdata(SearchData sdata) { | |
| 555 | + Object[] obj = new Object[] { | |
| 556 | + sdata.getitem(), | |
| 557 | + sdata.getHtmltag(), | |
| 558 | + sdata.getHtmlid(), | |
| 559 | + sdata.getHtmlclass(), | |
| 560 | + sdata.getaround(), | |
| 561 | + sdata.getregexp() | |
| 562 | + }; | |
| 563 | + return obj; | |
| 564 | + } | |
| 565 | + | |
| 566 | +} | |
| \ No newline at end of file |
| @@ -0,0 +1,166 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.core; | |
| 24 | + | |
| 25 | +import java.util.ArrayList; | |
| 26 | + | |
| 27 | +/** | |
| 28 | + * タグ検索データ. | |
| 29 | + * @author kgto | |
| 30 | + */ | |
| 31 | +public class SearchData { | |
| 32 | + /* ---------------------------------------------------------------------- * | |
| 33 | + * フィールド | |
| 34 | + * ---------------------------------------------------------------------- */ | |
| 35 | + private String item; | |
| 36 | + private String htmltag; | |
| 37 | + private String htmlid; | |
| 38 | + private String htmlclass; | |
| 39 | + private String around; | |
| 40 | + private String regexp; | |
| 41 | + | |
| 42 | + /* ---------------------------------------------------------------------- * | |
| 43 | + * static 処理 | |
| 44 | + * ---------------------------------------------------------------------- */ | |
| 45 | + private static ArrayList<SearchData> slist = new ArrayList<>(); | |
| 46 | + | |
| 47 | + public static void addSearchData( | |
| 48 | + String item, String htmltag, String htmlid, | |
| 49 | + String htmlclass, String around, String regexp) { | |
| 50 | + SearchData sdat = new SearchData(); | |
| 51 | + sdat.setitem(item); | |
| 52 | + sdat.setHtmltag(htmltag); | |
| 53 | + sdat.setHtmlid(htmlid); | |
| 54 | + sdat.setHtmlclass(htmlclass); | |
| 55 | + sdat.setaround(around); | |
| 56 | + sdat.setregexp(regexp); | |
| 57 | + | |
| 58 | + slist.add(sdat); | |
| 59 | + } | |
| 60 | + | |
| 61 | + public static void add(SearchData sdat) { | |
| 62 | + slist.add(sdat); | |
| 63 | + } | |
| 64 | + | |
| 65 | + public static SearchData get(int i) { | |
| 66 | + return slist.get(i); | |
| 67 | + } | |
| 68 | + | |
| 69 | + public static int size() { | |
| 70 | + return slist.size(); | |
| 71 | + } | |
| 72 | + | |
| 73 | + public static SearchData remove(int index) { | |
| 74 | + return slist.remove(index); | |
| 75 | + } | |
| 76 | + | |
| 77 | + public static void clear() { | |
| 78 | + slist.clear(); | |
| 79 | + } | |
| 80 | + | |
| 81 | + /* ---------------------------------------------------------------------- * | |
| 82 | + * コンストラクタ | |
| 83 | + * ---------------------------------------------------------------------- */ | |
| 84 | + public SearchData() { | |
| 85 | + initialize(); | |
| 86 | + } | |
| 87 | + | |
| 88 | + public SearchData(SearchData dat) { | |
| 89 | + this.item = dat.getitem(); | |
| 90 | + this.htmltag = dat.getHtmltag(); | |
| 91 | + this.htmlid = dat.getHtmlid(); | |
| 92 | + this.htmlclass = dat.getHtmlclass(); | |
| 93 | + this.around = dat.getaround(); | |
| 94 | + this.regexp = dat.getregexp(); | |
| 95 | + } | |
| 96 | + | |
| 97 | + /* ---------------------------------------------------------------------- * | |
| 98 | + * Setter | |
| 99 | + * ---------------------------------------------------------------------- */ | |
| 100 | + public void setitem(String item) { | |
| 101 | + this.item = item; | |
| 102 | + } | |
| 103 | + | |
| 104 | + public void setHtmltag(String htmltag) { | |
| 105 | + this.htmltag = htmltag; | |
| 106 | + } | |
| 107 | + | |
| 108 | + public void setHtmlid(String htmlid) { | |
| 109 | + this.htmlid = htmlid; | |
| 110 | + } | |
| 111 | + | |
| 112 | + public void setHtmlclass(String htmlclass) { | |
| 113 | + this.htmlclass = htmlclass; | |
| 114 | + } | |
| 115 | + | |
| 116 | + public void setaround(String around) { | |
| 117 | + this.around = around; | |
| 118 | + } | |
| 119 | + | |
| 120 | + public void setregexp(String regexp) { | |
| 121 | + this.regexp = regexp; | |
| 122 | + } | |
| 123 | + | |
| 124 | + /* ---------------------------------------------------------------------- * | |
| 125 | + * Getter | |
| 126 | + * ---------------------------------------------------------------------- */ | |
| 127 | + public String getitem() { | |
| 128 | + return item; | |
| 129 | + } | |
| 130 | + | |
| 131 | + public String getHtmltag() { | |
| 132 | + return htmltag; | |
| 133 | + } | |
| 134 | + | |
| 135 | + public String getHtmlid() { | |
| 136 | + return htmlid; | |
| 137 | + } | |
| 138 | + | |
| 139 | + public String getHtmlclass() { | |
| 140 | + return htmlclass; | |
| 141 | + } | |
| 142 | + | |
| 143 | + public String getaround() { | |
| 144 | + return around; | |
| 145 | + } | |
| 146 | + | |
| 147 | + public String getregexp() { | |
| 148 | + return regexp; | |
| 149 | + } | |
| 150 | + | |
| 151 | + /* ---------------------------------------------------------------------- * | |
| 152 | + * メソッド | |
| 153 | + * ---------------------------------------------------------------------- */ | |
| 154 | + /** | |
| 155 | + * データ初期化. | |
| 156 | + */ | |
| 157 | + public final void initialize() { | |
| 158 | + this.item = ""; | |
| 159 | + this.htmltag = ""; | |
| 160 | + this.htmlid = ""; | |
| 161 | + this.htmlclass = ""; | |
| 162 | + this.around = ""; | |
| 163 | + this.regexp = ""; | |
| 164 | + } | |
| 165 | + | |
| 166 | +} |
| @@ -0,0 +1,259 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.core; | |
| 24 | + | |
| 25 | +import java.io.*; | |
| 26 | +import java.net.*; | |
| 27 | +import java.util.ArrayList; | |
| 28 | +import java.util.logging.Level; | |
| 29 | +import java.util.logging.Logger; | |
| 30 | +import java.util.regex.Matcher; | |
| 31 | +import java.util.regex.Pattern; | |
| 32 | +import javax.swing.text.html.parser.ParserDelegator; | |
| 33 | + | |
| 34 | +/** | |
| 35 | + * | |
| 36 | + * @author kgto | |
| 37 | + */ | |
| 38 | +public class HtmlParser { | |
| 39 | + | |
| 40 | + URL url; | |
| 41 | + String pageData; | |
| 42 | + ArrayList sData; | |
| 43 | + | |
| 44 | + // 作業ワーク | |
| 45 | + String htmltag; | |
| 46 | + String htmlid; | |
| 47 | + String htmlclass; | |
| 48 | + | |
| 49 | + public HtmlParser(URL UrlAdress) { | |
| 50 | + DebugProcess.debuglog_set(); | |
| 51 | + this.url = UrlAdress; | |
| 52 | + getPageData(); | |
| 53 | + } | |
| 54 | + | |
| 55 | + public HtmlParser(String UrlAdress) { | |
| 56 | + DebugProcess.debuglog_set(); | |
| 57 | + try { | |
| 58 | + url = new URL(UrlAdress); | |
| 59 | + getPageData(); | |
| 60 | + | |
| 61 | + } catch (MalformedURLException ex) { | |
| 62 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 63 | + } | |
| 64 | + } | |
| 65 | + | |
| 66 | + public HtmlParser() { | |
| 67 | + DebugProcess.debuglog_set(); | |
| 68 | + url = null; | |
| 69 | + } | |
| 70 | + | |
| 71 | + public String getStringPageData() { | |
| 72 | + return pageData; | |
| 73 | + } | |
| 74 | + | |
| 75 | + public void seturl(URL UrlAdress) { | |
| 76 | + this.url = UrlAdress; | |
| 77 | + getPageData(); | |
| 78 | + } | |
| 79 | + | |
| 80 | + public void seturl(String UrlAdress) { | |
| 81 | + try { | |
| 82 | + url = new URL(UrlAdress); | |
| 83 | + getPageData(); | |
| 84 | + | |
| 85 | + } catch (MalformedURLException ex) { | |
| 86 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 87 | + } | |
| 88 | + } | |
| 89 | + | |
| 90 | + /** | |
| 91 | + * HTMLページ内検索. | |
| 92 | + * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、 | |
| 93 | + * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を | |
| 94 | + * 行った結果を返す。<br> | |
| 95 | + * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br> | |
| 96 | + * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br> | |
| 97 | + * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。 | |
| 98 | + * @param skey 検索キーデータ(SearchData) | |
| 99 | + * @return String 検索キーに一致するデータの文字列 | |
| 100 | + */ | |
| 101 | + public String search(SearchData skey) { | |
| 102 | + | |
| 103 | + // htmlページ内を検索 | |
| 104 | + if(isHtmlkeyEq(skey) == false) { | |
| 105 | + searchPageData(skey); | |
| 106 | + } | |
| 107 | + /* | |
| 108 | + around 出現位置指定 入力有り:指定された位置の情報のみ返す。 | |
| 109 | + 入力無し:取得した全ての情報を返す。 | |
| 110 | + */ | |
| 111 | + String regexp = skey.getregexp(); | |
| 112 | + if(skey.getaround().length() > 0) { | |
| 113 | + int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換 | |
| 114 | + if(wkAround < sData.size()) { | |
| 115 | + String str = (String)sData.get(wkAround); | |
| 116 | + String rtn = RegularExpression(str, regexp); | |
| 117 | + return rtn; | |
| 118 | + } | |
| 119 | + } else { | |
| 120 | + StringBuilder strbuf = new StringBuilder(); | |
| 121 | + for (Object sData1 : sData) { | |
| 122 | + String str = (String)sData1; | |
| 123 | + String rtn = RegularExpression(str, regexp); | |
| 124 | + if(strbuf.length() > 0) { | |
| 125 | + strbuf.append("\t"); | |
| 126 | + } | |
| 127 | + strbuf.append(rtn); | |
| 128 | + } | |
| 129 | + return strbuf.toString(); | |
| 130 | + } | |
| 131 | + return null; | |
| 132 | + } | |
| 133 | + | |
| 134 | + /** | |
| 135 | + * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する. | |
| 136 | + * @param skey HTMLタグ/ID/CLASSが格納された検索キー | |
| 137 | + * @return boolean HTMLタグ/ID/CLASS値が一致する時、true | |
| 138 | + */ | |
| 139 | + boolean isHtmlkeyEq(SearchData skey) { | |
| 140 | + | |
| 141 | + String stag = skey.getHtmltag(); | |
| 142 | + String sid = skey.getHtmlid(); | |
| 143 | + String sclass = skey.getHtmlclass(); | |
| 144 | + | |
| 145 | + boolean rtn = true; | |
| 146 | + | |
| 147 | + // htmltag | |
| 148 | + if(htmltag == null) { | |
| 149 | + rtn = false; | |
| 150 | + } else { | |
| 151 | + if(htmltag.equals(stag) == false) { | |
| 152 | + rtn = false; | |
| 153 | + } | |
| 154 | + } | |
| 155 | + | |
| 156 | + // htmlid | |
| 157 | + if(htmlid == null) { | |
| 158 | + rtn = false; | |
| 159 | + } else { | |
| 160 | + if(htmlid.equals(sid) == false) { | |
| 161 | + rtn = false; | |
| 162 | + } | |
| 163 | + } | |
| 164 | + | |
| 165 | + // htmlclass | |
| 166 | + if(htmlclass == null) { | |
| 167 | + rtn = false; | |
| 168 | + } else { | |
| 169 | + if(htmlclass.equals(sclass) == false) { | |
| 170 | + rtn = false; | |
| 171 | + } | |
| 172 | + } | |
| 173 | + | |
| 174 | + if(!rtn) { | |
| 175 | + htmltag = stag; | |
| 176 | + htmlid = sid; | |
| 177 | + htmlclass = sclass; | |
| 178 | + } | |
| 179 | + | |
| 180 | + return rtn; | |
| 181 | + } | |
| 182 | + | |
| 183 | + /** | |
| 184 | + * 正規表現検索. | |
| 185 | + * @param strdata | |
| 186 | + * @param regexp | |
| 187 | + * @return | |
| 188 | + */ | |
| 189 | + String RegularExpression(String strdata, String regexp) { | |
| 190 | + String expdata = null; | |
| 191 | + | |
| 192 | + //regexpのチェック | |
| 193 | + if(regexp.isEmpty()) { | |
| 194 | + expdata = strdata; | |
| 195 | + return expdata; | |
| 196 | + } | |
| 197 | + | |
| 198 | + //正規表現検索 | |
| 199 | + Pattern ptn = Pattern.compile(regexp); | |
| 200 | + Matcher matchdata = ptn.matcher(strdata); | |
| 201 | + if (matchdata.find()) { | |
| 202 | + if(matchdata.groupCount() >= 1) { | |
| 203 | + expdata = matchdata.group(1); | |
| 204 | + } | |
| 205 | + } | |
| 206 | + return expdata; | |
| 207 | + } | |
| 208 | + | |
| 209 | + /** | |
| 210 | + * インターネット接続. | |
| 211 | + */ | |
| 212 | + private void getPageData() { | |
| 213 | + HttpURLConnection con = null; | |
| 214 | + try { | |
| 215 | + con = (HttpURLConnection)url.openConnection(); | |
| 216 | + con.setRequestMethod("GET"); | |
| 217 | + BufferedReader reader = new BufferedReader( | |
| 218 | + new InputStreamReader(con.getInputStream(), "utf-8")); | |
| 219 | + String wkline; | |
| 220 | + StringBuilder sb = new StringBuilder(); | |
| 221 | + while((wkline = reader.readLine()) != null) { | |
| 222 | + sb.append(wkline).append("\n"); | |
| 223 | + } | |
| 224 | + pageData = sb.toString(); | |
| 225 | + | |
| 226 | + } catch(FileNotFoundException ex) { | |
| 227 | + pageData = null; | |
| 228 | + } catch (IOException ex) { | |
| 229 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 230 | + } finally { | |
| 231 | + if(con != null) { | |
| 232 | + con.disconnect(); | |
| 233 | + } | |
| 234 | + } | |
| 235 | + } | |
| 236 | + | |
| 237 | + /** | |
| 238 | + * HTMLパーサ. | |
| 239 | + * @param skey | |
| 240 | + */ | |
| 241 | + private void searchPageData(SearchData skey) { | |
| 242 | + | |
| 243 | + DebugProcess.searchDatainfo(skey); | |
| 244 | + | |
| 245 | + Reader reader; | |
| 246 | + try { | |
| 247 | + reader = new BufferedReader(new StringReader(pageData)); | |
| 248 | + HtmlParserCallback cb = new HtmlParserCallback(skey); | |
| 249 | + ParserDelegator pd = new ParserDelegator(); | |
| 250 | + pd.parse(reader, cb, true); | |
| 251 | + reader.close(); | |
| 252 | + | |
| 253 | + sData = cb.getrtnData(); | |
| 254 | + | |
| 255 | + } catch (IOException ex) { | |
| 256 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 257 | + } | |
| 258 | + } | |
| 259 | +} |
| @@ -0,0 +1,163 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.core; | |
| 24 | + | |
| 25 | +import java.util.ArrayList; | |
| 26 | +import java.util.Enumeration; | |
| 27 | +import javax.swing.text.MutableAttributeSet; | |
| 28 | +import javax.swing.text.html.HTML; | |
| 29 | + | |
| 30 | +/** | |
| 31 | + * HTMLタグの属性情報を保持する. | |
| 32 | + * @author kgto | |
| 33 | + */ | |
| 34 | +public class AttributeData { | |
| 35 | + | |
| 36 | + public AttributeData() { | |
| 37 | + AttrList = new ArrayList(); | |
| 38 | + size = 0; | |
| 39 | + } | |
| 40 | + | |
| 41 | + /** | |
| 42 | + * 属性情報追加. | |
| 43 | + * @param tag | |
| 44 | + * @param attr | |
| 45 | + */ | |
| 46 | + public void add(HTML.Tag tag, MutableAttributeSet attr) { | |
| 47 | + | |
| 48 | + int tagcount = tagcnt(tag); | |
| 49 | + ++tagcount; | |
| 50 | + | |
| 51 | + Enumeration e = attr.getAttributeNames(); | |
| 52 | + while(e.hasMoreElements()) { | |
| 53 | + Object obj = e.nextElement(); | |
| 54 | + | |
| 55 | + AttrData a = new AttrData(); | |
| 56 | + a.tag = tag; | |
| 57 | + a.count = tagcount; | |
| 58 | + a.attrname = obj.toString(); | |
| 59 | + a.attrvalue = attr.getAttribute(obj).toString(); | |
| 60 | + | |
| 61 | + AttrList.add(a); | |
| 62 | + size = AttrList.size(); | |
| 63 | + } | |
| 64 | + | |
| 65 | + } | |
| 66 | + | |
| 67 | + /** | |
| 68 | + * 属性情報検索. | |
| 69 | + * @param tag | |
| 70 | + * @param attrname | |
| 71 | + * @param attrvalue | |
| 72 | + * @return | |
| 73 | + */ | |
| 74 | + public boolean search(HTML.Tag tag, String attrname, String attrvalue) { | |
| 75 | + boolean ret = false; | |
| 76 | + for (Object AttrList1 : AttrList) { | |
| 77 | + AttrData a = (AttrData)AttrList1; | |
| 78 | + if(a.tag == tag) { | |
| 79 | + if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) { | |
| 80 | + ret = true; | |
| 81 | + } | |
| 82 | + } | |
| 83 | + } | |
| 84 | + return ret; | |
| 85 | + } | |
| 86 | + | |
| 87 | + public boolean searchId(HTML.Tag tag, String attrvalue) { | |
| 88 | + return search(tag, "id", attrvalue); | |
| 89 | + } | |
| 90 | + | |
| 91 | + public boolean searchClass(HTML.Tag tag, String attrvalue) { | |
| 92 | + return search(tag, "class", attrvalue); | |
| 93 | + } | |
| 94 | + | |
| 95 | + /** | |
| 96 | + * 属性の値を取得する. | |
| 97 | + * @param tag | |
| 98 | + * @param attrname | |
| 99 | + * @return | |
| 100 | + */ | |
| 101 | + public ArrayList getvale(HTML.Tag tag, String attrname) { | |
| 102 | + ArrayList ret = new ArrayList(); | |
| 103 | + for (Object AttrList1 : AttrList) { | |
| 104 | + AttrData a = (AttrData)AttrList1; | |
| 105 | + if(a.tag == tag) { | |
| 106 | + if(a.attrname.equals(attrname)) { | |
| 107 | + ret.add(a.attrvalue); | |
| 108 | + } | |
| 109 | + } | |
| 110 | + } | |
| 111 | + return ret; | |
| 112 | + } | |
| 113 | + | |
| 114 | + /** | |
| 115 | + * 引数で渡されたTAGの最新カウント数を返す. | |
| 116 | + * @param tag | |
| 117 | + * @return | |
| 118 | + */ | |
| 119 | + private int tagcnt(HTML.Tag tag) { | |
| 120 | + int wkcnt = 0; | |
| 121 | + for (Object AttrList1 : AttrList) { | |
| 122 | + AttrData a = (AttrData)AttrList1; | |
| 123 | + if(a.tag == tag) { | |
| 124 | + if(wkcnt < a.count) { | |
| 125 | + wkcnt = a.count; | |
| 126 | + } | |
| 127 | + } | |
| 128 | + } | |
| 129 | + return wkcnt; | |
| 130 | + } | |
| 131 | + | |
| 132 | + // AttrList の内容を返すメソッド | |
| 133 | + public HTML.Tag gettag(int i) { | |
| 134 | + AttrData a = (AttrData)AttrList.get(i); | |
| 135 | + return a.tag; | |
| 136 | + } | |
| 137 | + | |
| 138 | + public int getcount(int i) { | |
| 139 | + AttrData a = (AttrData)AttrList.get(i); | |
| 140 | + return a.count; | |
| 141 | + } | |
| 142 | + | |
| 143 | + public String getattrname(int i) { | |
| 144 | + AttrData a = (AttrData)AttrList.get(i); | |
| 145 | + return a.attrname; | |
| 146 | + } | |
| 147 | + | |
| 148 | + public String getattrvalue(int i) { | |
| 149 | + AttrData a = (AttrData)AttrList.get(i); | |
| 150 | + return a.attrvalue; | |
| 151 | + } | |
| 152 | + | |
| 153 | + // フィールド変数 | |
| 154 | + public class AttrData { | |
| 155 | + public HTML.Tag tag; | |
| 156 | + public int count; | |
| 157 | + public String attrname; | |
| 158 | + public String attrvalue; | |
| 159 | + } | |
| 160 | + public ArrayList AttrList; | |
| 161 | + public int size; // AttrListのサイズ | |
| 162 | + | |
| 163 | +} |
| @@ -0,0 +1,211 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.core; | |
| 24 | + | |
| 25 | +import java.util.ArrayList; | |
| 26 | +import java.util.HashMap; | |
| 27 | +import javax.swing.text.MutableAttributeSet; | |
| 28 | +import javax.swing.text.html.HTML; | |
| 29 | +import javax.swing.text.html.HTMLEditorKit; | |
| 30 | + | |
| 31 | +/** | |
| 32 | + * HTMLパーサ部品. | |
| 33 | + * @author kgto | |
| 34 | + */ | |
| 35 | +class HtmlParserCallback extends HTMLEditorKit.ParserCallback { | |
| 36 | + | |
| 37 | + // Tag毎の階層 | |
| 38 | + HashMap<HTML.Tag,Integer> tagMap = new HashMap<>(); | |
| 39 | + | |
| 40 | + // serach key 情報 | |
| 41 | + String keytag; | |
| 42 | + String keyid; | |
| 43 | + String keyclass; | |
| 44 | + | |
| 45 | + // serach key と一致時の情報退避 | |
| 46 | + int bufCount = 0; | |
| 47 | + HTML.Tag bufTag = null; | |
| 48 | + // serach key と一致時の情報格納ワーク | |
| 49 | + StringBuilder bufText; | |
| 50 | + | |
| 51 | + // serach key と一致時のデータ一覧 | |
| 52 | + ArrayList sData; | |
| 53 | + | |
| 54 | + // 属性データ | |
| 55 | + AttributeData attrdata; | |
| 56 | + | |
| 57 | + protected HtmlParserCallback(SearchData skey) { | |
| 58 | + | |
| 59 | + // キー情報展開 | |
| 60 | + keytag = skey.getHtmltag(); | |
| 61 | + keyid = skey.getHtmlid(); | |
| 62 | + keyclass = skey.getHtmlclass(); | |
| 63 | + | |
| 64 | + sData = new ArrayList(); | |
| 65 | + } | |
| 66 | + | |
| 67 | + ArrayList getrtnData() { | |
| 68 | + return this.sData; | |
| 69 | + } | |
| 70 | + | |
| 71 | + @Override | |
| 72 | + public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
| 73 | + // Tag毎の階層を保持 | |
| 74 | + int count = 1; | |
| 75 | + if(tagMap.containsKey(tag)) { | |
| 76 | + count = tagMap.get(tag); | |
| 77 | + count++; | |
| 78 | + } | |
| 79 | + tagMap.put(tag, count); | |
| 80 | + | |
| 81 | + // 属性解析 | |
| 82 | + AttributeData handleStartattrdata = new AttributeData(); | |
| 83 | + handleStartattrdata.add(tag, attr); | |
| 84 | + | |
| 85 | + DebugProcess.htmlinfo(tag, attr, "handleStartTag", count); | |
| 86 | + | |
| 87 | + if(bufCount == 0) { | |
| 88 | + if(tag.toString().equals(keytag)) { | |
| 89 | + //if(serachAttribute(attr)) { | |
| 90 | + if(serachAttribute(tag, handleStartattrdata)) { | |
| 91 | + bufCount = count; | |
| 92 | + bufTag = tag; | |
| 93 | + attrdata = new AttributeData(); | |
| 94 | + bufText = new StringBuilder(); | |
| 95 | + } | |
| 96 | + } | |
| 97 | + } | |
| 98 | + if(bufCount > 0) { | |
| 99 | + attrdata.add(tag, attr); | |
| 100 | + } | |
| 101 | + } | |
| 102 | + | |
| 103 | + @Override | |
| 104 | + public void handleEndTag(HTML.Tag tag, int pos){ | |
| 105 | + // Tag毎の階層を取得 | |
| 106 | + int count = 0; | |
| 107 | + if(tagMap.containsKey(tag)) { | |
| 108 | + count = tagMap.get(tag); | |
| 109 | + } | |
| 110 | + | |
| 111 | + DebugProcess.htmlinfo(tag, null, "handleEndTag", count); | |
| 112 | + | |
| 113 | + if(tag.equals(bufTag) && count <= bufCount) { | |
| 114 | + | |
| 115 | + // 溜め込んだ一致情報をリストへ格納 | |
| 116 | + sData.add(bufText.toString()); | |
| 117 | + | |
| 118 | + // 退避したserach keyとの一致情報クリア | |
| 119 | + bufCount = 0; | |
| 120 | + bufTag = null; | |
| 121 | + bufText = null; | |
| 122 | + } | |
| 123 | + | |
| 124 | + // Tag毎の階層減算 | |
| 125 | + tagMap.put(tag, --count); | |
| 126 | + } | |
| 127 | + | |
| 128 | + @Override | |
| 129 | + public void handleText(char[] data, int pos){ | |
| 130 | + | |
| 131 | + DebugProcess.htmlinfo(data, "handleText"); | |
| 132 | + | |
| 133 | + String splitchar = "\t"; | |
| 134 | + //制御文字の削除 | |
| 135 | + // 0xa0 | |
| 136 | + StringBuilder buf = new StringBuilder(); | |
| 137 | + for(int i = 0; i < data.length; i++) { | |
| 138 | + if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) { | |
| 139 | + buf.append(data[i]); | |
| 140 | + } | |
| 141 | + } | |
| 142 | + if(bufCount > 0) { | |
| 143 | + if(bufText.length() > 0) { | |
| 144 | + bufText.append(splitchar); | |
| 145 | + } | |
| 146 | + bufText.append(buf.toString()); | |
| 147 | + } | |
| 148 | + } | |
| 149 | + | |
| 150 | + @Override | |
| 151 | + public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
| 152 | + if(bufCount > 0) { | |
| 153 | + attrdata.add(tag, attr); | |
| 154 | + } | |
| 155 | + DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0); | |
| 156 | + } | |
| 157 | + | |
| 158 | + /** | |
| 159 | + * ページ内のID/CLASS値と検索キーを比較する. | |
| 160 | + * @param attr ページのMutableAttributeSet | |
| 161 | + * @return boolean 検索キーと一致の時、true | |
| 162 | + */ | |
| 163 | + boolean serachAttribute(MutableAttributeSet attr) { | |
| 164 | + String currentID = (String)attr.getAttribute(HTML.Attribute.ID); | |
| 165 | + String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS); | |
| 166 | + | |
| 167 | + if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
| 168 | + if(keyid.equals(currentID) && keyclass.equals(currentClass)) { | |
| 169 | + return true; | |
| 170 | + } | |
| 171 | + } | |
| 172 | + | |
| 173 | + if(keyid.isEmpty() == false) { | |
| 174 | + if(keyid.equals(currentID)) { | |
| 175 | + return true; | |
| 176 | + } | |
| 177 | + } | |
| 178 | + | |
| 179 | + if(keyclass.isEmpty() == false) { | |
| 180 | + if(keyclass.equals(currentClass)) { | |
| 181 | + return true; | |
| 182 | + } | |
| 183 | + } | |
| 184 | + | |
| 185 | + return false; | |
| 186 | + } | |
| 187 | + | |
| 188 | + /** | |
| 189 | + * ページ内のID/CLASS値と検索キーを比較する. | |
| 190 | + * @param tag | |
| 191 | + * @param attrdata | |
| 192 | + * @return boolean 検索キーと一致の時、true | |
| 193 | + */ | |
| 194 | + boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) { | |
| 195 | + // ID と CLASS の両方にキー入力有りの場合 | |
| 196 | + if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
| 197 | + if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) { | |
| 198 | + return true; | |
| 199 | + } | |
| 200 | + } | |
| 201 | + // ID のキーチェック | |
| 202 | + if(keyid.isEmpty() == false) { | |
| 203 | + return attrdata.searchId(tag, keyid); | |
| 204 | + } | |
| 205 | + // CLASS のキーチェック | |
| 206 | + if(keyclass.isEmpty() == false) { | |
| 207 | + return attrdata.searchClass(tag, keyclass); | |
| 208 | + } | |
| 209 | + return false; | |
| 210 | + } | |
| 211 | +} |
| @@ -0,0 +1,264 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.core; | |
| 24 | + | |
| 25 | +import java.io.File; | |
| 26 | +import java.io.FileInputStream; | |
| 27 | +import java.io.FileNotFoundException; | |
| 28 | +import java.io.IOException; | |
| 29 | +import java.util.logging.FileHandler; | |
| 30 | +import java.util.logging.Formatter; | |
| 31 | +import java.util.logging.Handler; | |
| 32 | +import java.util.logging.Level; | |
| 33 | +import java.util.logging.LogManager; | |
| 34 | +import java.util.logging.LogRecord; | |
| 35 | +import java.util.logging.Logger; | |
| 36 | +import javax.swing.text.MutableAttributeSet; | |
| 37 | +import javax.swing.text.html.HTML; | |
| 38 | + | |
| 39 | +/** | |
| 40 | + * デバック情報. | |
| 41 | + * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。 | |
| 42 | + * @author kgto | |
| 43 | + */ | |
| 44 | +public class DebugProcess { | |
| 45 | + // 設定ファイル名 | |
| 46 | + protected static final String configurationFilename = "Debug.prop"; | |
| 47 | + // ロガー名 | |
| 48 | + protected static final Logger logger = Logger.getLogger("WebScraping"); | |
| 49 | + // ログ出力デフォルトレベル | |
| 50 | + protected static final Level loggerlevel = Level.FINEST; | |
| 51 | + | |
| 52 | + | |
| 53 | + /** | |
| 54 | + * ログ出力設定. | |
| 55 | + * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、 | |
| 56 | + * ファイルハンドラの設定と出力書式の設定を行う。 | |
| 57 | + */ | |
| 58 | + public static void debuglog_set() { | |
| 59 | + try { | |
| 60 | + initLogConfiguration(); | |
| 61 | + | |
| 62 | + if(Level.ALL.equals(logger.getLevel())) { | |
| 63 | + //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2)); | |
| 64 | + logger.addHandler(new FileHandler("WebScraping%g.log", true)); | |
| 65 | + } | |
| 66 | + setFomatter(); | |
| 67 | + | |
| 68 | + } catch (IOException | SecurityException ex) { | |
| 69 | + Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 70 | + } | |
| 71 | + } | |
| 72 | + | |
| 73 | + /** | |
| 74 | + * ログ出力設定解除. | |
| 75 | + */ | |
| 76 | + public static void debuglog_unset() { | |
| 77 | + } | |
| 78 | + | |
| 79 | + | |
| 80 | + /** | |
| 81 | + * デバック出力(HTML解析-タグ&属性). | |
| 82 | + * HTMLのタグと属性の解析状態を出力する。 | |
| 83 | + * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br> | |
| 84 | + * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br> | |
| 85 | + * @param tag タグ | |
| 86 | + * @param attr 属性 | |
| 87 | + * @param methodname このメソッドを呼び出した親メソッド名 | |
| 88 | + * @param count HTMLタグの階層レベル | |
| 89 | + */ | |
| 90 | + public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr, | |
| 91 | + String methodname, int count) { | |
| 92 | + | |
| 93 | + // ログ出力レベルチェック | |
| 94 | + if(logger.getLevel() == null) { | |
| 95 | + return; | |
| 96 | + } | |
| 97 | + if(logger.getLevel().intValue() > loggerlevel.intValue()) { | |
| 98 | + return; | |
| 99 | + } | |
| 100 | + | |
| 101 | + // 編集処理 | |
| 102 | + char kbn = ' '; | |
| 103 | + if("handleStartTag".equals(methodname)) { | |
| 104 | + kbn = 'F'; | |
| 105 | + } | |
| 106 | + if("handleEndTag".equals(methodname)) { | |
| 107 | + kbn = 'E'; | |
| 108 | + } | |
| 109 | + if("handleSimpleTag".equals(methodname)) { | |
| 110 | + kbn = 'S'; | |
| 111 | + } | |
| 112 | + | |
| 113 | + StringBuilder strBuf = new StringBuilder(80); | |
| 114 | + strBuf.append(count).append(" : "); | |
| 115 | + strBuf.append(kbn).append(" : "); | |
| 116 | + strBuf.append(tag.toString()); | |
| 117 | + // 属性情報 | |
| 118 | + if(attr != null) { | |
| 119 | + if(attr.getAttributeCount() != 0) { | |
| 120 | + AttributeData handleAttrData = new AttributeData(); | |
| 121 | + handleAttrData.add(tag, attr); | |
| 122 | + for(int i = 0; i < handleAttrData.size; i++) { | |
| 123 | + strBuf.append(" ["); | |
| 124 | + strBuf.append(handleAttrData.getattrname(i)); | |
| 125 | + strBuf.append("]"); | |
| 126 | + strBuf.append(handleAttrData.getcount(i)); | |
| 127 | + strBuf.append(" = "); | |
| 128 | + strBuf.append(handleAttrData.getattrvalue(i)); | |
| 129 | + } | |
| 130 | + } | |
| 131 | + } | |
| 132 | + | |
| 133 | + logger.log(loggerlevel, strBuf.toString()); | |
| 134 | + } | |
| 135 | + | |
| 136 | + /** | |
| 137 | + * デバック出力(メッセージ). | |
| 138 | + * 引数に渡された任意のメッセージを出力する。 | |
| 139 | + * @param str メッセージ | |
| 140 | + * @param methodname このメソッドを呼び出した親メソッド名 | |
| 141 | + */ | |
| 142 | + public static void htmlinfo(String str, String methodname) { | |
| 143 | + logger.log(loggerlevel, str); | |
| 144 | + } | |
| 145 | + | |
| 146 | + public static void htmlinfo(String str) { | |
| 147 | + logger.log(loggerlevel, str); | |
| 148 | + } | |
| 149 | + | |
| 150 | + /** | |
| 151 | + * デバック出力(HTML解析-本文). | |
| 152 | + * 本文の内容を出力する。 | |
| 153 | + * @param data 本文(HTML内の文字列) | |
| 154 | + * @param methodname このメソッドを呼び出した親メソッド名 | |
| 155 | + */ | |
| 156 | + public static void htmlinfo(char[] data, String methodname) { | |
| 157 | + String dat = new String(data); | |
| 158 | + logger.log(loggerlevel, dat); | |
| 159 | + } | |
| 160 | + | |
| 161 | + public static void htmlinfo(char[] data) { | |
| 162 | + String dat = new String(data); | |
| 163 | + logger.log(loggerlevel, dat); | |
| 164 | + } | |
| 165 | + | |
| 166 | + /** | |
| 167 | + * デバック出力(検索キー). | |
| 168 | + * 検索キー(SearchData)の内容を出力する。 | |
| 169 | + * @param skey | |
| 170 | + */ | |
| 171 | + public static void searchDatainfo(SearchData skey) { | |
| 172 | + | |
| 173 | + StringBuilder strBuf = new StringBuilder(30); | |
| 174 | + strBuf.append("SearchData KEY tag["); | |
| 175 | + strBuf.append(skey.getHtmltag()); | |
| 176 | + strBuf.append("] ID["); | |
| 177 | + strBuf.append(skey.getHtmlid()); | |
| 178 | + strBuf.append("] CLASS["); | |
| 179 | + strBuf.append(skey.getHtmlclass()); | |
| 180 | + strBuf.append("]\n"); | |
| 181 | + | |
| 182 | + logger.log(loggerlevel, strBuf.toString()); | |
| 183 | + } | |
| 184 | + | |
| 185 | + /** | |
| 186 | + * ログ出力設定ファイルチェック. | |
| 187 | + * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。 | |
| 188 | + */ | |
| 189 | + private static void initLogConfiguration() { | |
| 190 | + | |
| 191 | + File file = new File(configurationFilename); | |
| 192 | + try { | |
| 193 | + if(file.exists()) { | |
| 194 | + FileInputStream inputStream = new FileInputStream(file); | |
| 195 | + // 設定ファイルの読み込み | |
| 196 | + LogManager.getLogManager().readConfiguration(inputStream); | |
| 197 | + } | |
| 198 | + | |
| 199 | + } catch (FileNotFoundException ex) { | |
| 200 | + Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 201 | + } catch (IOException ex) { | |
| 202 | + Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 203 | + } | |
| 204 | + } | |
| 205 | + | |
| 206 | + /** | |
| 207 | + * ログ出力フォーマッター設定. | |
| 208 | + * ファイルへログ出力時の書式を設定する。 | |
| 209 | + */ | |
| 210 | + private static void setFomatter() { | |
| 211 | + Handler[] handlers = logger.getHandlers(); | |
| 212 | + for(int i = 0 ; i < handlers.length ; i++) { | |
| 213 | + if(handlers[i] instanceof java.util.logging.FileHandler) { | |
| 214 | + handlers[i].setFormatter(new HtmlFormatter()); | |
| 215 | + } | |
| 216 | + } | |
| 217 | + } | |
| 218 | + | |
| 219 | +} | |
| 220 | + | |
| 221 | +/** | |
| 222 | + * ログ出力フォーマッター. | |
| 223 | + * @author kgto | |
| 224 | + */ | |
| 225 | +class HtmlFormatter extends Formatter { | |
| 226 | + /** | |
| 227 | + * Logの出力文字列を生成する。 | |
| 228 | + * 出力書式:<br> | |
| 229 | + * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ | |
| 230 | + */ | |
| 231 | + @Override | |
| 232 | + public synchronized String format(final LogRecord aRecord) { | |
| 233 | + | |
| 234 | + final StringBuffer message = new StringBuffer(100); | |
| 235 | + | |
| 236 | + long millis = aRecord.getMillis(); | |
| 237 | + String time = String.format("%tF %<tT", millis); | |
| 238 | + | |
| 239 | + message.append(time); | |
| 240 | + message.append(' '); | |
| 241 | + | |
| 242 | + message.append(aRecord.getLevel()); | |
| 243 | + message.append('<'); | |
| 244 | + String methodName = aRecord.getSourceMethodName(); | |
| 245 | + message.append(methodName != null ? methodName : "N/A"); | |
| 246 | + message.append('>'); | |
| 247 | + | |
| 248 | + message.append(formatMessage(aRecord)); | |
| 249 | + message.append('\n'); | |
| 250 | + | |
| 251 | + // 例外エラーの場合、エラー内容とスタックトレース出力 | |
| 252 | + Throwable throwable = aRecord.getThrown(); | |
| 253 | + if (throwable != null) { | |
| 254 | + message.append(throwable.toString()); | |
| 255 | + message.append('\n'); | |
| 256 | + for (StackTraceElement trace : throwable.getStackTrace()) { | |
| 257 | + message.append('\t'); | |
| 258 | + message.append(trace.toString()); | |
| 259 | + message.append('\n'); | |
| 260 | + } | |
| 261 | + } | |
| 262 | + return message.toString(); | |
| 263 | + } | |
| 264 | +} |
| @@ -1,48 +0,0 @@ | ||
| 1 | - | |
| 2 | -package WebScraping; | |
| 3 | - | |
| 4 | -import static WebScraping.DebugProcess.logger; | |
| 5 | -import java.util.logging.Formatter; | |
| 6 | -import java.util.logging.Handler; | |
| 7 | -import java.util.logging.Logger; | |
| 8 | - | |
| 9 | -/** | |
| 10 | - * | |
| 11 | - * @author kgto | |
| 12 | - */ | |
| 13 | - | |
| 14 | - | |
| 15 | -public class DebugProcessT01 { | |
| 16 | - | |
| 17 | - public static void main(String[] args) { | |
| 18 | - | |
| 19 | - DebugProcessT01 test = new DebugProcessT01(); | |
| 20 | - test.testdebuglog_set(); | |
| 21 | - | |
| 22 | - System.out.println("LoggerName : " + logger.getName()); | |
| 23 | - System.out.println("LoggerLevel : " + logger.getLevel()); | |
| 24 | - System.out.println("Parent : " + logger.getParent().getName()); | |
| 25 | - | |
| 26 | - Handler[] handlers = logger.getHandlers(); | |
| 27 | - for(int i = 0 ; i < handlers.length ; i++) { | |
| 28 | - System.out.println(handlers[i] + "'s Level: " + handlers[i].getLevel()); | |
| 29 | - | |
| 30 | - Formatter formatter = handlers[i].getFormatter(); | |
| 31 | - System.out.println("\tFormatter: " + formatter.toString()); | |
| 32 | - } | |
| 33 | - | |
| 34 | - test.testhtmlinfo(); | |
| 35 | - } | |
| 36 | - | |
| 37 | - public DebugProcessT01() { | |
| 38 | - } | |
| 39 | - | |
| 40 | - void testdebuglog_set() { | |
| 41 | - DebugProcess.debuglog_set(); | |
| 42 | - } | |
| 43 | - | |
| 44 | - void testhtmlinfo() { | |
| 45 | - DebugProcess.htmlinfo("testhtmlinfo"); | |
| 46 | - } | |
| 47 | - | |
| 48 | -} |
| @@ -1,113 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -package WebScraping; | |
| 20 | - | |
| 21 | -import javax.swing.text.MutableAttributeSet; | |
| 22 | -import javax.swing.text.html.HTML; | |
| 23 | -import org.junit.AfterClass; | |
| 24 | -import org.junit.BeforeClass; | |
| 25 | -import org.junit.Test; | |
| 26 | -import static org.junit.Assert.*; | |
| 27 | - | |
| 28 | -import org.junit.AfterClass; | |
| 29 | -import org.junit.BeforeClass; | |
| 30 | -import org.junit.Test; | |
| 31 | -import static org.junit.Assert.*; | |
| 32 | - | |
| 33 | -/** | |
| 34 | - * | |
| 35 | - * @author kgto | |
| 36 | - */ | |
| 37 | - | |
| 38 | - | |
| 39 | -public class DebugProcessTest { | |
| 40 | - | |
| 41 | - public DebugProcessTest() { | |
| 42 | - } | |
| 43 | - | |
| 44 | - @BeforeClass | |
| 45 | - public static void setUpClass() { | |
| 46 | - } | |
| 47 | - | |
| 48 | - @AfterClass | |
| 49 | - public static void tearDownClass() { | |
| 50 | - } | |
| 51 | - | |
| 52 | - /** | |
| 53 | - * Test of debuglog_set method, of class DebugProcess. | |
| 54 | - */ | |
| 55 | - @Test | |
| 56 | - public void testDebuglog_set() { | |
| 57 | - System.out.println("debuglog_set"); | |
| 58 | - DebugProcess.debuglog_set(); | |
| 59 | - // TODO review the generated test code and remove the default call to fail. | |
| 60 | - fail("The test case is a prototype."); | |
| 61 | - } | |
| 62 | - | |
| 63 | - /** | |
| 64 | - * Test of debuglog_unset method, of class DebugProcess. | |
| 65 | - */ | |
| 66 | - @Test | |
| 67 | - public void testDebuglog_unset() { | |
| 68 | - System.out.println("debuglog_unset"); | |
| 69 | - DebugProcess.debuglog_unset(); | |
| 70 | - // TODO review the generated test code and remove the default call to fail. | |
| 71 | - fail("The test case is a prototype."); | |
| 72 | - } | |
| 73 | - | |
| 74 | - /** | |
| 75 | - * Test of htmlinfo method, of class DebugProcess. | |
| 76 | - */ | |
| 77 | - @Test | |
| 78 | - public void testHtmlinfo_4args() { | |
| 79 | - System.out.println("htmlinfo"); | |
| 80 | - HTML.Tag tag = null; | |
| 81 | - MutableAttributeSet attr = null; | |
| 82 | - String methodname = ""; | |
| 83 | - int count = 0; | |
| 84 | - DebugProcess.htmlinfo(tag, attr, methodname, count); | |
| 85 | - // TODO review the generated test code and remove the default call to fail. | |
| 86 | - fail("The test case is a prototype."); | |
| 87 | - } | |
| 88 | - | |
| 89 | - /** | |
| 90 | - * Test of htmlinfo method, of class DebugProcess. | |
| 91 | - */ | |
| 92 | - @Test | |
| 93 | - public void testHtmlinfo_String() { | |
| 94 | - System.out.println("htmlinfo"); | |
| 95 | - String str = ""; | |
| 96 | - DebugProcess.htmlinfo(str); | |
| 97 | - // TODO review the generated test code and remove the default call to fail. | |
| 98 | - fail("The test case is a prototype."); | |
| 99 | - } | |
| 100 | - | |
| 101 | - /** | |
| 102 | - * Test of htmlinfo method, of class DebugProcess. | |
| 103 | - */ | |
| 104 | - @Test | |
| 105 | - public void testHtmlinfo_charArr() { | |
| 106 | - System.out.println("htmlinfo"); | |
| 107 | - char[] data = null; | |
| 108 | - DebugProcess.htmlinfo(data); | |
| 109 | - // TODO review the generated test code and remove the default call to fail. | |
| 110 | - fail("The test case is a prototype."); | |
| 111 | - } | |
| 112 | - | |
| 113 | -} |
| @@ -1,66 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -package WebScraping; | |
| 20 | - | |
| 21 | -import java.util.logging.LogRecord; | |
| 22 | -import org.junit.AfterClass; | |
| 23 | -import org.junit.BeforeClass; | |
| 24 | -import org.junit.Test; | |
| 25 | -import static org.junit.Assert.*; | |
| 26 | - | |
| 27 | -import org.junit.AfterClass; | |
| 28 | -import org.junit.BeforeClass; | |
| 29 | -import org.junit.Test; | |
| 30 | -import static org.junit.Assert.*; | |
| 31 | - | |
| 32 | -/** | |
| 33 | - * | |
| 34 | - * @author kgto | |
| 35 | - */ | |
| 36 | - | |
| 37 | - | |
| 38 | -public class HtmlFormatterTest { | |
| 39 | - | |
| 40 | - public HtmlFormatterTest() { | |
| 41 | - } | |
| 42 | - | |
| 43 | - @BeforeClass | |
| 44 | - public static void setUpClass() { | |
| 45 | - } | |
| 46 | - | |
| 47 | - @AfterClass | |
| 48 | - public static void tearDownClass() { | |
| 49 | - } | |
| 50 | - | |
| 51 | - /** | |
| 52 | - * Test of format method, of class HtmlFormatter. | |
| 53 | - */ | |
| 54 | - @Test | |
| 55 | - public void testFormat() { | |
| 56 | - System.out.println("format"); | |
| 57 | - LogRecord aRecord = null; | |
| 58 | - HtmlFormatter instance = new HtmlFormatter(); | |
| 59 | - String expResult = ""; | |
| 60 | - String result = instance.format(aRecord); | |
| 61 | - assertEquals(expResult, result); | |
| 62 | - // TODO review the generated test code and remove the default call to fail. | |
| 63 | - fail("The test case is a prototype."); | |
| 64 | - } | |
| 65 | - | |
| 66 | -} |
| @@ -0,0 +1,46 @@ | ||
| 1 | + | |
| 2 | +package utility.test1; | |
| 3 | + | |
| 4 | +import webScraping.utility.SearchDataRW; | |
| 5 | +import java.io.File; | |
| 6 | +import webScraping.core.SearchData; | |
| 7 | + | |
| 8 | +/** | |
| 9 | + * ファイル読込みテスト | |
| 10 | + * @author kgto | |
| 11 | + */ | |
| 12 | +public class SearchDataRWT02 { | |
| 13 | + SearchDataRW sio = new SearchDataRW(); | |
| 14 | + | |
| 15 | + File file = new File("test1.xml"); | |
| 16 | + | |
| 17 | + /** | |
| 18 | + * @param args the command line arguments | |
| 19 | + */ | |
| 20 | + public static void main(String[] args) { | |
| 21 | + SearchDataRWT02 test01 = new SearchDataRWT02(); | |
| 22 | + test01.load01(); | |
| 23 | + } | |
| 24 | + | |
| 25 | + void SearchDataRWT01() { | |
| 26 | + } | |
| 27 | + | |
| 28 | + void load01() { | |
| 29 | + char spchar = '\t'; | |
| 30 | + | |
| 31 | + sio.load(file); | |
| 32 | + for(int i = 0; i < SearchData.size(); i++) { | |
| 33 | + SearchData sdat = SearchData.get(i); | |
| 34 | + | |
| 35 | + StringBuilder sbuf = new StringBuilder(); | |
| 36 | + sbuf.append(sdat.getitem()).append(spchar); | |
| 37 | + sbuf.append(sdat.getHtmltag()).append(spchar); | |
| 38 | + sbuf.append(sdat.getHtmlid()).append(spchar); | |
| 39 | + sbuf.append(sdat.getHtmlclass()).append(spchar); | |
| 40 | + sbuf.append(sdat.getaround()).append(spchar); | |
| 41 | + sbuf.append(sdat.getregexp()).append(spchar); | |
| 42 | + System.out.println(sbuf.toString()); | |
| 43 | + } | |
| 44 | + } | |
| 45 | + | |
| 46 | +} |
| @@ -0,0 +1,66 @@ | ||
| 1 | + | |
| 2 | +package utility.test1; | |
| 3 | + | |
| 4 | +import webScraping.utility.SearchDataRW; | |
| 5 | +import java.io.File; | |
| 6 | +import java.lang.reflect.InvocationTargetException; | |
| 7 | +import java.lang.reflect.Method; | |
| 8 | +import java.util.logging.Level; | |
| 9 | +import java.util.logging.Logger; | |
| 10 | + | |
| 11 | +/** | |
| 12 | + * ファイル書出し・読込みテスト | |
| 13 | + * @author kgto | |
| 14 | + */ | |
| 15 | +public class SearchDataRWT01 { | |
| 16 | + SearchDataRW sio = new SearchDataRW(); | |
| 17 | + | |
| 18 | + File file = new File("SearchDataRWT01.xml"); | |
| 19 | + | |
| 20 | + /** | |
| 21 | + * @param args the command line arguments | |
| 22 | + */ | |
| 23 | + public static void main(String[] args) { | |
| 24 | + SearchDataRWT01 test01 = new SearchDataRWT01(); | |
| 25 | + test01.save01(); | |
| 26 | + test01.load01(); | |
| 27 | + } | |
| 28 | + | |
| 29 | + void SearchDataRWT01() { | |
| 30 | + } | |
| 31 | + | |
| 32 | + void save01() { | |
| 33 | + try { | |
| 34 | + String str = "abc\ndef\nghi\n1111"; | |
| 35 | + | |
| 36 | + //sio.saveMsg404(str); | |
| 37 | + // リフレクション | |
| 38 | + Method method = SearchDataRW.class.getDeclaredMethod("saveMsg404", String.class); | |
| 39 | + method.setAccessible(true); | |
| 40 | + method.invoke(sio, str); | |
| 41 | + | |
| 42 | + sio.write(file); | |
| 43 | + | |
| 44 | + } catch (NoSuchMethodException | SecurityException | |
| 45 | + | IllegalAccessException | IllegalArgumentException | InvocationTargetException ex) { | |
| 46 | + Logger.getLogger(SearchDataRWT01.class.getName()).log(Level.SEVERE, null, ex); | |
| 47 | + } | |
| 48 | + } | |
| 49 | + | |
| 50 | + void load01() { | |
| 51 | + try { | |
| 52 | + Method method = SearchDataRW.class.getDeclaredMethod("loadMsg404"); | |
| 53 | + method.setAccessible(true); | |
| 54 | + Object obj = method.invoke(sio); | |
| 55 | + String str = (String)obj; | |
| 56 | + | |
| 57 | + System.out.println("loadMsg404 = " + str); | |
| 58 | + | |
| 59 | + | |
| 60 | + } catch (NoSuchMethodException | SecurityException | |
| 61 | + | IllegalAccessException | IllegalArgumentException | InvocationTargetException ex) { | |
| 62 | + Logger.getLogger(SearchDataRWT01.class.getName()).log(Level.SEVERE, null, ex); | |
| 63 | + } | |
| 64 | + } | |
| 65 | + | |
| 66 | +} |
| @@ -0,0 +1,16 @@ | ||
| 1 | +<?xml version="1.0" encoding="UTF-8" standalone="no"?><searchdata> | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | +<url>http://weather.yahoo.co.jp/weather/</url><searchlist listNo="1"><item>天気01</item><htmltag>li</htmltag><htmlclass>point pt1400</htmlclass></searchlist><searchlist listNo="2"><item>天気02</item><htmltag>li</htmltag><htmlclass>point pt1900</htmlclass></searchlist><searchlist listNo="3"><item>天気03</item><htmltag>li</htmltag><htmlclass>point pt3410</htmlclass></searchlist><searchlist listNo="4"><item>天気04</item><htmltag>li</htmltag><htmlclass>point pt4410</htmlclass></searchlist><searchlist listNo="5"><item>天気05</item><htmltag>li</htmltag><htmlclass>point pt5110</htmlclass></searchlist><searchlist listNo="6"><item>天気06</item><htmltag>li</htmltag><htmlclass>point pt5410</htmlclass></searchlist><searchlist listNo="7"><item>天気07</item><htmltag>li</htmltag><htmlclass>point pt5610</htmlclass></searchlist><searchlist listNo="8"><item>天気08</item><htmltag>li</htmltag><htmlclass>point pt6200</htmlclass></searchlist><searchlist listNo="9"><item>天気09</item><htmltag>li</htmltag><htmlclass>point pt6710</htmlclass></searchlist><searchlist listNo="10"><item>天気10</item><htmltag>li</htmltag><htmlclass>point pt7410</htmlclass></searchlist><searchlist listNo="11"><item>天気11</item><htmltag>li</htmltag><htmlclass>point pt8210</htmlclass></searchlist><searchlist listNo="12"><item>天気12</item><htmltag>li</htmltag><htmlclass>point pt8810</htmlclass></searchlist><searchlist listNo="13"><item>天気13</item><htmltag>li</htmltag><htmlclass>point pt9110</htmlclass></searchlist></searchdata> | |
| \ No newline at end of file |