作業部屋の使い方を試しています。
(empty log message)
| @@ -33,7 +33,9 @@ | ||
| 33 | 33 | * @author kgto |
| 34 | 34 | */ |
| 35 | 35 | class HtmlParserCallback extends HTMLEditorKit.ParserCallback { |
| 36 | - | |
| 36 | + /* ---------------------------------------------------------------------- * | |
| 37 | + * フィールド | |
| 38 | + * ---------------------------------------------------------------------- */ | |
| 37 | 39 | // Tag毎の階層 |
| 38 | 40 | HashMap<HTML.Tag,Integer> tagMap = new HashMap<>(); |
| 39 | 41 |
| @@ -54,6 +56,9 @@ | ||
| 54 | 56 | // 属性データ |
| 55 | 57 | AttributeData attrdata; |
| 56 | 58 | |
| 59 | + /* ---------------------------------------------------------------------- * | |
| 60 | + * コンストラクタ | |
| 61 | + * ---------------------------------------------------------------------- */ | |
| 57 | 62 | protected HtmlParserCallback(SearchData skey) { |
| 58 | 63 | |
| 59 | 64 | // キー情報展開 |
| @@ -64,10 +69,16 @@ | ||
| 64 | 69 | sData = new ArrayList(); |
| 65 | 70 | } |
| 66 | 71 | |
| 72 | + /* ---------------------------------------------------------------------- * | |
| 73 | + * Getter | |
| 74 | + * ---------------------------------------------------------------------- */ | |
| 67 | 75 | ArrayList getrtnData() { |
| 68 | 76 | return this.sData; |
| 69 | 77 | } |
| 70 | 78 | |
| 79 | + /* ---------------------------------------------------------------------- * | |
| 80 | + * メソッド | |
| 81 | + * ---------------------------------------------------------------------- */ | |
| 71 | 82 | @Override |
| 72 | 83 | public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ |
| 73 | 84 | // Tag毎の階層を保持 |
| @@ -32,20 +32,25 @@ | ||
| 32 | 32 | import javax.swing.text.html.parser.ParserDelegator; |
| 33 | 33 | |
| 34 | 34 | /** |
| 35 | - * | |
| 35 | + * HTMLパーサ. | |
| 36 | 36 | * @author kgto |
| 37 | 37 | */ |
| 38 | 38 | public class HtmlParser { |
| 39 | - | |
| 39 | + /* ---------------------------------------------------------------------- * | |
| 40 | + * フィールド | |
| 41 | + * ---------------------------------------------------------------------- */ | |
| 40 | 42 | URL url; |
| 41 | 43 | String pageData; |
| 42 | 44 | ArrayList sData; |
| 43 | 45 | |
| 44 | 46 | // 作業ワーク |
| 45 | - String htmltag; | |
| 46 | - String htmlid; | |
| 47 | - String htmlclass; | |
| 47 | + private String htmltag; | |
| 48 | + private String htmlid; | |
| 49 | + private String htmlclass; | |
| 48 | 50 | |
| 51 | + /* ---------------------------------------------------------------------- * | |
| 52 | + * コンストラクタ | |
| 53 | + * ---------------------------------------------------------------------- */ | |
| 49 | 54 | public HtmlParser(URL UrlAdress) { |
| 50 | 55 | DebugProcess.debuglog_set(); |
| 51 | 56 | this.url = UrlAdress; |
| @@ -68,15 +73,24 @@ | ||
| 68 | 73 | url = null; |
| 69 | 74 | } |
| 70 | 75 | |
| 76 | + /* ---------------------------------------------------------------------- * | |
| 77 | + * Getter | |
| 78 | + * ---------------------------------------------------------------------- */ | |
| 71 | 79 | public String getStringPageData() { |
| 72 | 80 | return pageData; |
| 73 | 81 | } |
| 74 | 82 | |
| 83 | + /* ---------------------------------------------------------------------- * | |
| 84 | + * Setter | |
| 85 | + * ---------------------------------------------------------------------- */ | |
| 75 | 86 | public void seturl(URL UrlAdress) { |
| 76 | 87 | this.url = UrlAdress; |
| 77 | 88 | getPageData(); |
| 78 | 89 | } |
| 79 | 90 | |
| 91 | + /* ---------------------------------------------------------------------- * | |
| 92 | + * メソッド | |
| 93 | + * ---------------------------------------------------------------------- */ | |
| 80 | 94 | public void seturl(String UrlAdress) { |
| 81 | 95 | try { |
| 82 | 96 | url = new URL(UrlAdress); |
| @@ -23,16 +23,10 @@ | ||
| 23 | 23 | package webScraping.utility; |
| 24 | 24 | |
| 25 | 25 | import webScraping.core.SearchData; |
| 26 | -import java.io.BufferedReader; | |
| 27 | -import java.io.BufferedWriter; | |
| 28 | 26 | import java.io.File; |
| 29 | -import java.io.FileInputStream; | |
| 30 | 27 | import java.io.FileNotFoundException; |
| 31 | 28 | import java.io.FileOutputStream; |
| 32 | 29 | import java.io.IOException; |
| 33 | -import java.io.InputStreamReader; | |
| 34 | -import java.io.OutputStreamWriter; | |
| 35 | -import java.util.ArrayList; | |
| 36 | 30 | import java.util.logging.Level; |
| 37 | 31 | import java.util.logging.Logger; |
| 38 | 32 | import javax.xml.parsers.DocumentBuilder; |
| @@ -52,20 +46,22 @@ | ||
| 52 | 46 | import org.xml.sax.SAXException; |
| 53 | 47 | |
| 54 | 48 | /** |
| 55 | - * | |
| 49 | + * 検索データ読込・保存. | |
| 56 | 50 | * @author kgto |
| 57 | 51 | */ |
| 58 | 52 | public class SearchDataRW { |
| 53 | + /* ---------------------------------------------------------------------- * | |
| 54 | + * フィールド | |
| 55 | + * ---------------------------------------------------------------------- */ | |
| 56 | + private String UrlAdress; | |
| 59 | 57 | |
| 60 | 58 | DocumentBuilder builder; |
| 61 | 59 | public Document document; |
| 62 | 60 | Element root; |
| 63 | 61 | |
| 64 | - private final String splitchar = "\t"; | |
| 65 | - | |
| 66 | - private String UrlAdress; | |
| 67 | - private ArrayList<SearchData> slist = new ArrayList<>(); | |
| 68 | - | |
| 62 | + /* ---------------------------------------------------------------------- * | |
| 63 | + * コンストラクタ | |
| 64 | + * ---------------------------------------------------------------------- */ | |
| 69 | 65 | public SearchDataRW() { |
| 70 | 66 | try { |
| 71 | 67 | DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
| @@ -76,32 +72,30 @@ | ||
| 76 | 72 | } |
| 77 | 73 | } |
| 78 | 74 | |
| 75 | + /* ---------------------------------------------------------------------- * | |
| 76 | + * Setter | |
| 77 | + * ---------------------------------------------------------------------- */ | |
| 79 | 78 | public void seturl(String UrlAdress) { |
| 80 | 79 | this.UrlAdress = UrlAdress; |
| 81 | 80 | } |
| 82 | 81 | |
| 83 | - public void setslist(ArrayList slist) { | |
| 84 | - this.slist = slist; | |
| 85 | - } | |
| 86 | - | |
| 82 | + /* ---------------------------------------------------------------------- * | |
| 83 | + * Getter | |
| 84 | + * ---------------------------------------------------------------------- */ | |
| 87 | 85 | public String geturl() { |
| 88 | 86 | return UrlAdress; |
| 89 | 87 | } |
| 90 | 88 | |
| 91 | - public ArrayList getslist() { | |
| 92 | - return slist; | |
| 93 | - } | |
| 94 | - | |
| 89 | + /* ---------------------------------------------------------------------- * | |
| 90 | + * メソッド | |
| 91 | + * ---------------------------------------------------------------------- */ | |
| 95 | 92 | /** |
| 96 | 93 | * 保存. |
| 97 | 94 | * @param file |
| 98 | 95 | */ |
| 99 | 96 | public void save(File file) { |
| 100 | - //saveCsv(file); | |
| 101 | - //saveXml(file); | |
| 102 | - | |
| 103 | 97 | saveUrl(UrlAdress); |
| 104 | - saveSearchList(slist); | |
| 98 | + saveSearchList(); | |
| 105 | 99 | write(file); |
| 106 | 100 | } |
| 107 | 101 |
| @@ -110,9 +104,6 @@ | ||
| 110 | 104 | * @param file |
| 111 | 105 | */ |
| 112 | 106 | public void load(File file) { |
| 113 | - //loadCsv(file); | |
| 114 | - //loadXml(file); | |
| 115 | - | |
| 116 | 107 | read(file); |
| 117 | 108 | loadUrl(); |
| 118 | 109 | loadSearchList(); |
| @@ -119,229 +110,7 @@ | ||
| 119 | 110 | } |
| 120 | 111 | |
| 121 | 112 | /* ---------------------------------------------------------------------- */ |
| 122 | - /** | |
| 123 | - * 保存(CSV形式). | |
| 124 | - * @param file | |
| 125 | - */ | |
| 126 | - public void saveCsv(File file) { | |
| 127 | - BufferedWriter bufferedwriter = null; | |
| 128 | - try { | |
| 129 | - //空のファイルを作成 | |
| 130 | - file.createNewFile(); | |
| 131 | - FileOutputStream fileoutputstream = new FileOutputStream(file); | |
| 132 | - OutputStreamWriter outputstreamwriter = new OutputStreamWriter(fileoutputstream, "UTF-8"); | |
| 133 | - bufferedwriter = new BufferedWriter(outputstreamwriter); | |
| 134 | - | |
| 135 | - // URL | |
| 136 | - bufferedwriter.write(UrlAdress); | |
| 137 | - bufferedwriter.write("\n"); | |
| 138 | - // 検索情報 | |
| 139 | - for(Object slist1 : slist) { | |
| 140 | - SearchData sdat = (SearchData)slist1; | |
| 141 | - // | |
| 142 | - StringBuilder str = new StringBuilder(); | |
| 143 | - str.append(sdat.getitem()).append(splitchar); | |
| 144 | - str.append(sdat.getHtmltag()).append(splitchar); | |
| 145 | - str.append(sdat.getHtmlid()).append(splitchar); | |
| 146 | - str.append(sdat.getHtmlclass()).append(splitchar); | |
| 147 | - str.append(sdat.getaround()).append(splitchar); | |
| 148 | - str.append(sdat.getregexp()).append("\n"); | |
| 149 | - // 書込み | |
| 150 | - bufferedwriter.write(str.toString()); | |
| 151 | - } | |
| 152 | - | |
| 153 | - } catch (IOException ex) { | |
| 154 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 155 | - } finally { | |
| 156 | - try { | |
| 157 | - if(bufferedwriter != null) { | |
| 158 | - bufferedwriter.close(); | |
| 159 | - } | |
| 160 | - | |
| 161 | - } catch (IOException ex) { | |
| 162 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 163 | - } | |
| 164 | - } | |
| 165 | - } | |
| 166 | 113 | |
| 167 | - /** | |
| 168 | - * 読込(CSV形式). | |
| 169 | - * @param file | |
| 170 | - */ | |
| 171 | - public void loadCsv(File file) { | |
| 172 | - slist = new ArrayList(); | |
| 173 | - | |
| 174 | - BufferedReader bufferedreader = null; | |
| 175 | - try { | |
| 176 | - FileInputStream fileinputstream = new FileInputStream(file); | |
| 177 | - InputStreamReader inputstreamreader = new InputStreamReader(fileinputstream, "UTF-8"); | |
| 178 | - bufferedreader = new BufferedReader(inputstreamreader); | |
| 179 | - | |
| 180 | - // URL | |
| 181 | - UrlAdress = bufferedreader.readLine(); | |
| 182 | - // 検索情報 | |
| 183 | - String rec; | |
| 184 | - while((rec = bufferedreader.readLine()) != null) { | |
| 185 | - String[] recary = rec.split(splitchar, -1); | |
| 186 | - SearchData sdat = new SearchData(); | |
| 187 | - sdat.setitem(recary[0]); | |
| 188 | - sdat.setHtmltag(recary[1]); | |
| 189 | - sdat.setHtmlid(recary[2]); | |
| 190 | - sdat.setHtmlclass(recary[3]); | |
| 191 | - sdat.setaround(recary[4]); | |
| 192 | - sdat.setregexp(recary[5]); | |
| 193 | - | |
| 194 | - slist.add(sdat); | |
| 195 | - } | |
| 196 | - | |
| 197 | - } catch(IOException ex) { | |
| 198 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 199 | - | |
| 200 | - } finally { | |
| 201 | - try { | |
| 202 | - if(bufferedreader != null) { | |
| 203 | - bufferedreader.close(); | |
| 204 | - } | |
| 205 | - | |
| 206 | - } catch (IOException ex) { | |
| 207 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 208 | - } | |
| 209 | - } | |
| 210 | - } | |
| 211 | - | |
| 212 | - /* ---------------------------------------------------------------------- */ | |
| 213 | - /** | |
| 214 | - * 保存(XML形式). | |
| 215 | - * @param file | |
| 216 | - */ | |
| 217 | - public void saveXml(File file) { | |
| 218 | - try { | |
| 219 | - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 220 | - DocumentBuilder wkBuilder = factory.newDocumentBuilder(); | |
| 221 | - DOMImplementation domImpl = wkBuilder.getDOMImplementation(); | |
| 222 | - | |
| 223 | - Document doc = domImpl.createDocument("","searchdata",null); | |
| 224 | - Element wkRoot = doc.getDocumentElement(); | |
| 225 | - | |
| 226 | - // URL | |
| 227 | - Element url = doc.createElement("url"); | |
| 228 | - url.appendChild(doc.createTextNode(UrlAdress)); | |
| 229 | - wkRoot.appendChild(url); | |
| 230 | - | |
| 231 | - // 検索情報 | |
| 232 | - for (Object slist1 : slist) { | |
| 233 | - SearchData sdat = (SearchData) slist1; | |
| 234 | - | |
| 235 | - Element cslist = doc.createElement("searchlist"); | |
| 236 | - Element item = doc.createElement("item"); | |
| 237 | - Element htmltag = doc.createElement("htmltag"); | |
| 238 | - Element htmlid = doc.createElement("htmlid"); | |
| 239 | - Element htmlclass = doc.createElement("htmlclass"); | |
| 240 | - Element around = doc.createElement("around"); | |
| 241 | - Element regexp = doc.createElement("regexp"); | |
| 242 | - | |
| 243 | - item.appendChild(doc.createTextNode(sdat.getitem())); | |
| 244 | - htmltag.appendChild(doc.createTextNode(sdat.getHtmltag())); | |
| 245 | - htmlid.appendChild(doc.createTextNode(sdat.getHtmlid())); | |
| 246 | - htmlclass.appendChild(doc.createTextNode(sdat.getHtmlclass())); | |
| 247 | - around.appendChild(doc.createTextNode(sdat.getaround())); | |
| 248 | - regexp.appendChild(doc.createTextNode(sdat.getregexp())); | |
| 249 | - | |
| 250 | - cslist.appendChild(item); | |
| 251 | - cslist.appendChild(htmltag); | |
| 252 | - cslist.appendChild(htmlid); | |
| 253 | - cslist.appendChild(htmlclass); | |
| 254 | - cslist.appendChild(around); | |
| 255 | - cslist.appendChild(regexp); | |
| 256 | - | |
| 257 | - wkRoot.appendChild(cslist); | |
| 258 | - } | |
| 259 | - // 出力 | |
| 260 | - TransformerFactory transFactory = TransformerFactory.newInstance(); | |
| 261 | - Transformer transformer = transFactory.newTransformer(); | |
| 262 | - | |
| 263 | - DOMSource source = new DOMSource(doc); | |
| 264 | - FileOutputStream os = new FileOutputStream(file); | |
| 265 | - StreamResult result = new StreamResult(os); | |
| 266 | - transformer.transform(source, result); | |
| 267 | - | |
| 268 | - } catch (ParserConfigurationException | FileNotFoundException ex) { | |
| 269 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 270 | - } catch (TransformerConfigurationException ex) { | |
| 271 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 272 | - } catch (TransformerException ex) { | |
| 273 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 274 | - } | |
| 275 | - } | |
| 276 | - | |
| 277 | - /** | |
| 278 | - * 読込(XML形式). | |
| 279 | - * @param file | |
| 280 | - */ | |
| 281 | - public void loadXml(File file) { | |
| 282 | - slist = new ArrayList(); | |
| 283 | - | |
| 284 | - try { | |
| 285 | - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 286 | - DocumentBuilder wkBuilder = factory.newDocumentBuilder(); | |
| 287 | - Document doc = wkBuilder.parse(file); | |
| 288 | - | |
| 289 | - // ルート要素の取得 | |
| 290 | - Element wkRoot = doc.getDocumentElement(); | |
| 291 | - | |
| 292 | - // URL | |
| 293 | - NodeList url = wkRoot.getElementsByTagName("url"); | |
| 294 | - Node urlnode = url.item(0); | |
| 295 | - UrlAdress = urlnode.getFirstChild().getNodeValue(); | |
| 296 | - | |
| 297 | - // 検索情報 | |
| 298 | - NodeList cslist = wkRoot.getElementsByTagName("searchlist"); | |
| 299 | - for(int i = 0; i < cslist.getLength(); i++) { | |
| 300 | - SearchData sdat = new SearchData(); | |
| 301 | - | |
| 302 | - Node slistnode = cslist.item(i); | |
| 303 | - Node child; | |
| 304 | - for (child = slistnode.getFirstChild(); child != null; child = child.getNextSibling()) { | |
| 305 | - if(child.getNodeType() == Node.ELEMENT_NODE) { | |
| 306 | - | |
| 307 | - String tag = child.getNodeName(); | |
| 308 | - String rtn = ""; | |
| 309 | - if(child.getFirstChild() != null) { | |
| 310 | - rtn = child.getFirstChild().getNodeValue(); | |
| 311 | - } | |
| 312 | - | |
| 313 | - switch (tag) { | |
| 314 | - case "item" : | |
| 315 | - sdat.setitem(rtn); | |
| 316 | - break; | |
| 317 | - case "htmltag" : | |
| 318 | - sdat.setHtmltag(rtn); | |
| 319 | - break; | |
| 320 | - case "htmlid" : | |
| 321 | - sdat.setHtmlid(rtn); | |
| 322 | - break; | |
| 323 | - case "htmlclass" : | |
| 324 | - sdat.setHtmlclass(rtn); | |
| 325 | - break; | |
| 326 | - case "around" : | |
| 327 | - sdat.setaround(rtn); | |
| 328 | - break; | |
| 329 | - case "regexp" : | |
| 330 | - sdat.setregexp(rtn); | |
| 331 | - break; | |
| 332 | - } | |
| 333 | - } | |
| 334 | - } | |
| 335 | - slist.add(sdat); | |
| 336 | - } | |
| 337 | - | |
| 338 | - } catch (ParserConfigurationException | SAXException | IOException ex) { | |
| 339 | - Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 340 | - } | |
| 341 | - } | |
| 342 | - | |
| 343 | - /* ---------------------------------------------------------------------- */ | |
| 344 | - | |
| 345 | 114 | void loadUrl() { |
| 346 | 115 | NodeList nodelist = root.getElementsByTagName("url"); |
| 347 | 116 | Node node = nodelist.item(0); |
| @@ -349,7 +118,6 @@ | ||
| 349 | 118 | } |
| 350 | 119 | |
| 351 | 120 | public void loadSearchList() { |
| 352 | - slist.clear(); | |
| 353 | 121 | SearchData.clear(); |
| 354 | 122 | |
| 355 | 123 | NodeList nodelist = root.getElementsByTagName("searchlist"); |
| @@ -393,7 +161,6 @@ | ||
| 393 | 161 | } |
| 394 | 162 | } |
| 395 | 163 | } |
| 396 | - if(sdatflg) slist.add(sdat); | |
| 397 | 164 | if(sdatflg) SearchData.add(sdat); |
| 398 | 165 | } |
| 399 | 166 | } |
| @@ -430,13 +197,13 @@ | ||
| 430 | 197 | root.appendChild(url); |
| 431 | 198 | } |
| 432 | 199 | |
| 433 | - void saveSearchList(ArrayList slist) { | |
| 200 | + void saveSearchList() { | |
| 434 | 201 | checkdoc(); |
| 435 | 202 | removeElement("searchlist"); // 既にElementが存在してた場合、一度削除 |
| 436 | 203 | |
| 437 | 204 | int count = 0; |
| 438 | - for (Object slist1 : slist) { | |
| 439 | - SearchData sdat = (SearchData) slist1; | |
| 205 | + for(int i = 0; i < SearchData.size(); i++) { | |
| 206 | + SearchData sdat = SearchData.get(i); | |
| 440 | 207 | |
| 441 | 208 | Element cslist = document.createElement("searchlist"); |
| 442 | 209 | cslist.setAttribute("listNo", String.valueOf(++count)); |