作業部屋の使い方を試しています。
パッケージ名変更
| @@ -1,48 +0,0 @@ | ||
| 1 | - | |
| 2 | -package WebScraping; | |
| 3 | - | |
| 4 | -import static WebScraping.DebugProcess.logger; | |
| 5 | -import java.util.logging.Formatter; | |
| 6 | -import java.util.logging.Handler; | |
| 7 | -import java.util.logging.Logger; | |
| 8 | - | |
| 9 | -/** | |
| 10 | - * | |
| 11 | - * @author kgto | |
| 12 | - */ | |
| 13 | - | |
| 14 | - | |
| 15 | -public class DebugProcessT01 { | |
| 16 | - | |
| 17 | - public static void main(String[] args) { | |
| 18 | - | |
| 19 | - DebugProcessT01 test = new DebugProcessT01(); | |
| 20 | - test.testdebuglog_set(); | |
| 21 | - | |
| 22 | - System.out.println("LoggerName : " + logger.getName()); | |
| 23 | - System.out.println("LoggerLevel : " + logger.getLevel()); | |
| 24 | - System.out.println("Parent : " + logger.getParent().getName()); | |
| 25 | - | |
| 26 | - Handler[] handlers = logger.getHandlers(); | |
| 27 | - for(int i = 0 ; i < handlers.length ; i++) { | |
| 28 | - System.out.println(handlers[i] + "'s Level: " + handlers[i].getLevel()); | |
| 29 | - | |
| 30 | - Formatter formatter = handlers[i].getFormatter(); | |
| 31 | - System.out.println("\tFormatter: " + formatter.toString()); | |
| 32 | - } | |
| 33 | - | |
| 34 | - test.testhtmlinfo(); | |
| 35 | - } | |
| 36 | - | |
| 37 | - public DebugProcessT01() { | |
| 38 | - } | |
| 39 | - | |
| 40 | - void testdebuglog_set() { | |
| 41 | - DebugProcess.debuglog_set(); | |
| 42 | - } | |
| 43 | - | |
| 44 | - void testhtmlinfo() { | |
| 45 | - DebugProcess.htmlinfo("testhtmlinfo"); | |
| 46 | - } | |
| 47 | - | |
| 48 | -} |
| @@ -1,66 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -package WebScraping; | |
| 20 | - | |
| 21 | -import java.util.logging.LogRecord; | |
| 22 | -import org.junit.AfterClass; | |
| 23 | -import org.junit.BeforeClass; | |
| 24 | -import org.junit.Test; | |
| 25 | -import static org.junit.Assert.*; | |
| 26 | - | |
| 27 | -import org.junit.AfterClass; | |
| 28 | -import org.junit.BeforeClass; | |
| 29 | -import org.junit.Test; | |
| 30 | -import static org.junit.Assert.*; | |
| 31 | - | |
| 32 | -/** | |
| 33 | - * | |
| 34 | - * @author kgto | |
| 35 | - */ | |
| 36 | - | |
| 37 | - | |
| 38 | -public class HtmlFormatterTest { | |
| 39 | - | |
| 40 | - public HtmlFormatterTest() { | |
| 41 | - } | |
| 42 | - | |
| 43 | - @BeforeClass | |
| 44 | - public static void setUpClass() { | |
| 45 | - } | |
| 46 | - | |
| 47 | - @AfterClass | |
| 48 | - public static void tearDownClass() { | |
| 49 | - } | |
| 50 | - | |
| 51 | - /** | |
| 52 | - * Test of format method, of class HtmlFormatter. | |
| 53 | - */ | |
| 54 | - @Test | |
| 55 | - public void testFormat() { | |
| 56 | - System.out.println("format"); | |
| 57 | - LogRecord aRecord = null; | |
| 58 | - HtmlFormatter instance = new HtmlFormatter(); | |
| 59 | - String expResult = ""; | |
| 60 | - String result = instance.format(aRecord); | |
| 61 | - assertEquals(expResult, result); | |
| 62 | - // TODO review the generated test code and remove the default call to fail. | |
| 63 | - fail("The test case is a prototype."); | |
| 64 | - } | |
| 65 | - | |
| 66 | -} |
| @@ -1,113 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -package WebScraping; | |
| 20 | - | |
| 21 | -import javax.swing.text.MutableAttributeSet; | |
| 22 | -import javax.swing.text.html.HTML; | |
| 23 | -import org.junit.AfterClass; | |
| 24 | -import org.junit.BeforeClass; | |
| 25 | -import org.junit.Test; | |
| 26 | -import static org.junit.Assert.*; | |
| 27 | - | |
| 28 | -import org.junit.AfterClass; | |
| 29 | -import org.junit.BeforeClass; | |
| 30 | -import org.junit.Test; | |
| 31 | -import static org.junit.Assert.*; | |
| 32 | - | |
| 33 | -/** | |
| 34 | - * | |
| 35 | - * @author kgto | |
| 36 | - */ | |
| 37 | - | |
| 38 | - | |
| 39 | -public class DebugProcessTest { | |
| 40 | - | |
| 41 | - public DebugProcessTest() { | |
| 42 | - } | |
| 43 | - | |
| 44 | - @BeforeClass | |
| 45 | - public static void setUpClass() { | |
| 46 | - } | |
| 47 | - | |
| 48 | - @AfterClass | |
| 49 | - public static void tearDownClass() { | |
| 50 | - } | |
| 51 | - | |
| 52 | - /** | |
| 53 | - * Test of debuglog_set method, of class DebugProcess. | |
| 54 | - */ | |
| 55 | - @Test | |
| 56 | - public void testDebuglog_set() { | |
| 57 | - System.out.println("debuglog_set"); | |
| 58 | - DebugProcess.debuglog_set(); | |
| 59 | - // TODO review the generated test code and remove the default call to fail. | |
| 60 | - fail("The test case is a prototype."); | |
| 61 | - } | |
| 62 | - | |
| 63 | - /** | |
| 64 | - * Test of debuglog_unset method, of class DebugProcess. | |
| 65 | - */ | |
| 66 | - @Test | |
| 67 | - public void testDebuglog_unset() { | |
| 68 | - System.out.println("debuglog_unset"); | |
| 69 | - DebugProcess.debuglog_unset(); | |
| 70 | - // TODO review the generated test code and remove the default call to fail. | |
| 71 | - fail("The test case is a prototype."); | |
| 72 | - } | |
| 73 | - | |
| 74 | - /** | |
| 75 | - * Test of htmlinfo method, of class DebugProcess. | |
| 76 | - */ | |
| 77 | - @Test | |
| 78 | - public void testHtmlinfo_4args() { | |
| 79 | - System.out.println("htmlinfo"); | |
| 80 | - HTML.Tag tag = null; | |
| 81 | - MutableAttributeSet attr = null; | |
| 82 | - String methodname = ""; | |
| 83 | - int count = 0; | |
| 84 | - DebugProcess.htmlinfo(tag, attr, methodname, count); | |
| 85 | - // TODO review the generated test code and remove the default call to fail. | |
| 86 | - fail("The test case is a prototype."); | |
| 87 | - } | |
| 88 | - | |
| 89 | - /** | |
| 90 | - * Test of htmlinfo method, of class DebugProcess. | |
| 91 | - */ | |
| 92 | - @Test | |
| 93 | - public void testHtmlinfo_String() { | |
| 94 | - System.out.println("htmlinfo"); | |
| 95 | - String str = ""; | |
| 96 | - DebugProcess.htmlinfo(str); | |
| 97 | - // TODO review the generated test code and remove the default call to fail. | |
| 98 | - fail("The test case is a prototype."); | |
| 99 | - } | |
| 100 | - | |
| 101 | - /** | |
| 102 | - * Test of htmlinfo method, of class DebugProcess. | |
| 103 | - */ | |
| 104 | - @Test | |
| 105 | - public void testHtmlinfo_charArr() { | |
| 106 | - System.out.println("htmlinfo"); | |
| 107 | - char[] data = null; | |
| 108 | - DebugProcess.htmlinfo(data); | |
| 109 | - // TODO review the generated test code and remove the default call to fail. | |
| 110 | - fail("The test case is a prototype."); | |
| 111 | - } | |
| 112 | - | |
| 113 | -} |
| @@ -0,0 +1,114 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +package webScraping.core; | |
| 20 | + | |
| 21 | +import webScraping.core.DebugProcess; | |
| 22 | +import javax.swing.text.MutableAttributeSet; | |
| 23 | +import javax.swing.text.html.HTML; | |
| 24 | +import org.junit.AfterClass; | |
| 25 | +import org.junit.BeforeClass; | |
| 26 | +import org.junit.Test; | |
| 27 | +import static org.junit.Assert.*; | |
| 28 | + | |
| 29 | +import org.junit.AfterClass; | |
| 30 | +import org.junit.BeforeClass; | |
| 31 | +import org.junit.Test; | |
| 32 | +import static org.junit.Assert.*; | |
| 33 | + | |
| 34 | +/** | |
| 35 | + * | |
| 36 | + * @author kgto | |
| 37 | + */ | |
| 38 | + | |
| 39 | + | |
| 40 | +public class DebugProcessTest { | |
| 41 | + | |
| 42 | + public DebugProcessTest() { | |
| 43 | + } | |
| 44 | + | |
| 45 | + @BeforeClass | |
| 46 | + public static void setUpClass() { | |
| 47 | + } | |
| 48 | + | |
| 49 | + @AfterClass | |
| 50 | + public static void tearDownClass() { | |
| 51 | + } | |
| 52 | + | |
| 53 | + /** | |
| 54 | + * Test of debuglog_set method, of class DebugProcess. | |
| 55 | + */ | |
| 56 | + @Test | |
| 57 | + public void testDebuglog_set() { | |
| 58 | + System.out.println("debuglog_set"); | |
| 59 | + DebugProcess.debuglog_set(); | |
| 60 | + // TODO review the generated test code and remove the default call to fail. | |
| 61 | + fail("The test case is a prototype."); | |
| 62 | + } | |
| 63 | + | |
| 64 | + /** | |
| 65 | + * Test of debuglog_unset method, of class DebugProcess. | |
| 66 | + */ | |
| 67 | + @Test | |
| 68 | + public void testDebuglog_unset() { | |
| 69 | + System.out.println("debuglog_unset"); | |
| 70 | + DebugProcess.debuglog_unset(); | |
| 71 | + // TODO review the generated test code and remove the default call to fail. | |
| 72 | + fail("The test case is a prototype."); | |
| 73 | + } | |
| 74 | + | |
| 75 | + /** | |
| 76 | + * Test of htmlinfo method, of class DebugProcess. | |
| 77 | + */ | |
| 78 | + @Test | |
| 79 | + public void testHtmlinfo_4args() { | |
| 80 | + System.out.println("htmlinfo"); | |
| 81 | + HTML.Tag tag = null; | |
| 82 | + MutableAttributeSet attr = null; | |
| 83 | + String methodname = ""; | |
| 84 | + int count = 0; | |
| 85 | + DebugProcess.htmlinfo(tag, attr, methodname, count); | |
| 86 | + // TODO review the generated test code and remove the default call to fail. | |
| 87 | + fail("The test case is a prototype."); | |
| 88 | + } | |
| 89 | + | |
| 90 | + /** | |
| 91 | + * Test of htmlinfo method, of class DebugProcess. | |
| 92 | + */ | |
| 93 | + @Test | |
| 94 | + public void testHtmlinfo_String() { | |
| 95 | + System.out.println("htmlinfo"); | |
| 96 | + String str = ""; | |
| 97 | + DebugProcess.htmlinfo(str); | |
| 98 | + // TODO review the generated test code and remove the default call to fail. | |
| 99 | + fail("The test case is a prototype."); | |
| 100 | + } | |
| 101 | + | |
| 102 | + /** | |
| 103 | + * Test of htmlinfo method, of class DebugProcess. | |
| 104 | + */ | |
| 105 | + @Test | |
| 106 | + public void testHtmlinfo_charArr() { | |
| 107 | + System.out.println("htmlinfo"); | |
| 108 | + char[] data = null; | |
| 109 | + DebugProcess.htmlinfo(data); | |
| 110 | + // TODO review the generated test code and remove the default call to fail. | |
| 111 | + fail("The test case is a prototype."); | |
| 112 | + } | |
| 113 | + | |
| 114 | +} |
| @@ -0,0 +1,49 @@ | ||
| 1 | + | |
| 2 | +package webScraping.core; | |
| 3 | + | |
| 4 | +import webScraping.core.DebugProcess; | |
| 5 | +import static webScraping.core.DebugProcess.logger; | |
| 6 | +import java.util.logging.Formatter; | |
| 7 | +import java.util.logging.Handler; | |
| 8 | +import java.util.logging.Logger; | |
| 9 | + | |
| 10 | +/** | |
| 11 | + * | |
| 12 | + * @author kgto | |
| 13 | + */ | |
| 14 | + | |
| 15 | + | |
| 16 | +public class DebugProcessT01 { | |
| 17 | + | |
| 18 | + public static void main(String[] args) { | |
| 19 | + | |
| 20 | + DebugProcessT01 test = new DebugProcessT01(); | |
| 21 | + test.testdebuglog_set(); | |
| 22 | + | |
| 23 | + System.out.println("LoggerName : " + logger.getName()); | |
| 24 | + System.out.println("LoggerLevel : " + logger.getLevel()); | |
| 25 | + System.out.println("Parent : " + logger.getParent().getName()); | |
| 26 | + | |
| 27 | + Handler[] handlers = logger.getHandlers(); | |
| 28 | + for(int i = 0 ; i < handlers.length ; i++) { | |
| 29 | + System.out.println(handlers[i] + "'s Level: " + handlers[i].getLevel()); | |
| 30 | + | |
| 31 | + Formatter formatter = handlers[i].getFormatter(); | |
| 32 | + System.out.println("\tFormatter: " + formatter.toString()); | |
| 33 | + } | |
| 34 | + | |
| 35 | + test.testhtmlinfo(); | |
| 36 | + } | |
| 37 | + | |
| 38 | + public DebugProcessT01() { | |
| 39 | + } | |
| 40 | + | |
| 41 | + void testdebuglog_set() { | |
| 42 | + DebugProcess.debuglog_set(); | |
| 43 | + } | |
| 44 | + | |
| 45 | + void testhtmlinfo() { | |
| 46 | + DebugProcess.htmlinfo("testhtmlinfo"); | |
| 47 | + } | |
| 48 | + | |
| 49 | +} |
| @@ -0,0 +1,16 @@ | ||
| 1 | +<?xml version="1.0" encoding="UTF-8" standalone="no"?><searchdata> | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | +<url>http://weather.yahoo.co.jp/weather/</url><searchlist listNo="1"><item>天気01</item><htmltag>li</htmltag><htmlclass>point pt1400</htmlclass></searchlist><searchlist listNo="2"><item>天気02</item><htmltag>li</htmltag><htmlclass>point pt1900</htmlclass></searchlist><searchlist listNo="3"><item>天気03</item><htmltag>li</htmltag><htmlclass>point pt3410</htmlclass></searchlist><searchlist listNo="4"><item>天気04</item><htmltag>li</htmltag><htmlclass>point pt4410</htmlclass></searchlist><searchlist listNo="5"><item>天気05</item><htmltag>li</htmltag><htmlclass>point pt5110</htmlclass></searchlist><searchlist listNo="6"><item>天気06</item><htmltag>li</htmltag><htmlclass>point pt5410</htmlclass></searchlist><searchlist listNo="7"><item>天気07</item><htmltag>li</htmltag><htmlclass>point pt5610</htmlclass></searchlist><searchlist listNo="8"><item>天気08</item><htmltag>li</htmltag><htmlclass>point pt6200</htmlclass></searchlist><searchlist listNo="9"><item>天気09</item><htmltag>li</htmltag><htmlclass>point pt6710</htmlclass></searchlist><searchlist listNo="10"><item>天気10</item><htmltag>li</htmltag><htmlclass>point pt7410</htmlclass></searchlist><searchlist listNo="11"><item>天気11</item><htmltag>li</htmltag><htmlclass>point pt8210</htmlclass></searchlist><searchlist listNo="12"><item>天気12</item><htmltag>li</htmltag><htmlclass>point pt8810</htmlclass></searchlist><searchlist listNo="13"><item>天気13</item><htmltag>li</htmltag><htmlclass>point pt9110</htmlclass></searchlist></searchdata> | |
| \ No newline at end of file |
| @@ -1,163 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package WebScraping; | |
| 24 | - | |
| 25 | -import java.util.ArrayList; | |
| 26 | -import java.util.Enumeration; | |
| 27 | -import javax.swing.text.MutableAttributeSet; | |
| 28 | -import javax.swing.text.html.HTML; | |
| 29 | - | |
| 30 | -/** | |
| 31 | - * HTMLタグの属性情報を保持する. | |
| 32 | - * @author kgto | |
| 33 | - */ | |
| 34 | -public class AttributeData { | |
| 35 | - | |
| 36 | - public AttributeData() { | |
| 37 | - AttrList = new ArrayList(); | |
| 38 | - size = 0; | |
| 39 | - } | |
| 40 | - | |
| 41 | - /** | |
| 42 | - * 属性情報追加. | |
| 43 | - * @param tag | |
| 44 | - * @param attr | |
| 45 | - */ | |
| 46 | - public void add(HTML.Tag tag, MutableAttributeSet attr) { | |
| 47 | - | |
| 48 | - int tagcount = tagcnt(tag); | |
| 49 | - ++tagcount; | |
| 50 | - | |
| 51 | - Enumeration e = attr.getAttributeNames(); | |
| 52 | - while(e.hasMoreElements()) { | |
| 53 | - Object obj = e.nextElement(); | |
| 54 | - | |
| 55 | - AttrData a = new AttrData(); | |
| 56 | - a.tag = tag; | |
| 57 | - a.count = tagcount; | |
| 58 | - a.attrname = obj.toString(); | |
| 59 | - a.attrvalue = attr.getAttribute(obj).toString(); | |
| 60 | - | |
| 61 | - AttrList.add(a); | |
| 62 | - size = AttrList.size(); | |
| 63 | - } | |
| 64 | - | |
| 65 | - } | |
| 66 | - | |
| 67 | - /** | |
| 68 | - * 属性情報検索. | |
| 69 | - * @param tag | |
| 70 | - * @param attrname | |
| 71 | - * @param attrvalue | |
| 72 | - * @return | |
| 73 | - */ | |
| 74 | - public boolean search(HTML.Tag tag, String attrname, String attrvalue) { | |
| 75 | - boolean ret = false; | |
| 76 | - for (Object AttrList1 : AttrList) { | |
| 77 | - AttrData a = (AttrData)AttrList1; | |
| 78 | - if(a.tag == tag) { | |
| 79 | - if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) { | |
| 80 | - ret = true; | |
| 81 | - } | |
| 82 | - } | |
| 83 | - } | |
| 84 | - return ret; | |
| 85 | - } | |
| 86 | - | |
| 87 | - public boolean searchId(HTML.Tag tag, String attrvalue) { | |
| 88 | - return search(tag, "id", attrvalue); | |
| 89 | - } | |
| 90 | - | |
| 91 | - public boolean searchClass(HTML.Tag tag, String attrvalue) { | |
| 92 | - return search(tag, "class", attrvalue); | |
| 93 | - } | |
| 94 | - | |
| 95 | - /** | |
| 96 | - * 属性の値を取得する. | |
| 97 | - * @param tag | |
| 98 | - * @param attrname | |
| 99 | - * @return | |
| 100 | - */ | |
| 101 | - public ArrayList getvale(HTML.Tag tag, String attrname) { | |
| 102 | - ArrayList ret = new ArrayList(); | |
| 103 | - for (Object AttrList1 : AttrList) { | |
| 104 | - AttrData a = (AttrData)AttrList1; | |
| 105 | - if(a.tag == tag) { | |
| 106 | - if(a.attrname.equals(attrname)) { | |
| 107 | - ret.add(a.attrvalue); | |
| 108 | - } | |
| 109 | - } | |
| 110 | - } | |
| 111 | - return ret; | |
| 112 | - } | |
| 113 | - | |
| 114 | - /** | |
| 115 | - * 引数で渡されたTAGの最新カウント数を返す. | |
| 116 | - * @param tag | |
| 117 | - * @return | |
| 118 | - */ | |
| 119 | - private int tagcnt(HTML.Tag tag) { | |
| 120 | - int wkcnt = 0; | |
| 121 | - for (Object AttrList1 : AttrList) { | |
| 122 | - AttrData a = (AttrData)AttrList1; | |
| 123 | - if(a.tag == tag) { | |
| 124 | - if(wkcnt < a.count) { | |
| 125 | - wkcnt = a.count; | |
| 126 | - } | |
| 127 | - } | |
| 128 | - } | |
| 129 | - return wkcnt; | |
| 130 | - } | |
| 131 | - | |
| 132 | - // AttrList の内容を返すメソッド | |
| 133 | - public HTML.Tag gettag(int i) { | |
| 134 | - AttrData a = (AttrData)AttrList.get(i); | |
| 135 | - return a.tag; | |
| 136 | - } | |
| 137 | - | |
| 138 | - public int getcount(int i) { | |
| 139 | - AttrData a = (AttrData)AttrList.get(i); | |
| 140 | - return a.count; | |
| 141 | - } | |
| 142 | - | |
| 143 | - public String getattrname(int i) { | |
| 144 | - AttrData a = (AttrData)AttrList.get(i); | |
| 145 | - return a.attrname; | |
| 146 | - } | |
| 147 | - | |
| 148 | - public String getattrvalue(int i) { | |
| 149 | - AttrData a = (AttrData)AttrList.get(i); | |
| 150 | - return a.attrvalue; | |
| 151 | - } | |
| 152 | - | |
| 153 | - // フィールド変数 | |
| 154 | - public class AttrData { | |
| 155 | - public HTML.Tag tag; | |
| 156 | - public int count; | |
| 157 | - public String attrname; | |
| 158 | - public String attrvalue; | |
| 159 | - } | |
| 160 | - public ArrayList AttrList; | |
| 161 | - public int size; // AttrListのサイズ | |
| 162 | - | |
| 163 | -} |
| @@ -1,264 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package WebScraping; | |
| 24 | - | |
| 25 | -import java.io.File; | |
| 26 | -import java.io.FileInputStream; | |
| 27 | -import java.io.FileNotFoundException; | |
| 28 | -import java.io.IOException; | |
| 29 | -import java.util.logging.FileHandler; | |
| 30 | -import java.util.logging.Formatter; | |
| 31 | -import java.util.logging.Handler; | |
| 32 | -import java.util.logging.Level; | |
| 33 | -import java.util.logging.LogManager; | |
| 34 | -import java.util.logging.LogRecord; | |
| 35 | -import java.util.logging.Logger; | |
| 36 | -import javax.swing.text.MutableAttributeSet; | |
| 37 | -import javax.swing.text.html.HTML; | |
| 38 | - | |
| 39 | -/** | |
| 40 | - * デバック情報. | |
| 41 | - * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。 | |
| 42 | - * @author kgto | |
| 43 | - */ | |
| 44 | -public class DebugProcess { | |
| 45 | - // 設定ファイル名 | |
| 46 | - protected static final String configurationFilename = "Debug.prop"; | |
| 47 | - // ロガー名 | |
| 48 | - protected static final Logger logger = Logger.getLogger("WebScraping"); | |
| 49 | - // ログ出力デフォルトレベル | |
| 50 | - protected static final Level loggerlevel = Level.FINEST; | |
| 51 | - | |
| 52 | - | |
| 53 | - /** | |
| 54 | - * ログ出力設定. | |
| 55 | - * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、 | |
| 56 | - * ファイルハンドラの設定と出力書式の設定を行う。 | |
| 57 | - */ | |
| 58 | - public static void debuglog_set() { | |
| 59 | - try { | |
| 60 | - initLogConfiguration(); | |
| 61 | - | |
| 62 | - if(Level.ALL.equals(logger.getLevel())) { | |
| 63 | - //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2)); | |
| 64 | - logger.addHandler(new FileHandler("WebScraping%g.log", true)); | |
| 65 | - } | |
| 66 | - setFomatter(); | |
| 67 | - | |
| 68 | - } catch (IOException | SecurityException ex) { | |
| 69 | - Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 70 | - } | |
| 71 | - } | |
| 72 | - | |
| 73 | - /** | |
| 74 | - * ログ出力設定解除. | |
| 75 | - */ | |
| 76 | - public static void debuglog_unset() { | |
| 77 | - } | |
| 78 | - | |
| 79 | - | |
| 80 | - /** | |
| 81 | - * デバック出力(HTML解析-タグ&属性). | |
| 82 | - * HTMLのタグと属性の解析状態を出力する。 | |
| 83 | - * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br> | |
| 84 | - * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br> | |
| 85 | - * @param tag タグ | |
| 86 | - * @param attr 属性 | |
| 87 | - * @param methodname このメソッドを呼び出した親メソッド名 | |
| 88 | - * @param count HTMLタグの階層レベル | |
| 89 | - */ | |
| 90 | - public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr, | |
| 91 | - String methodname, int count) { | |
| 92 | - | |
| 93 | - // ログ出力レベルチェック | |
| 94 | - if(logger.getLevel() == null) { | |
| 95 | - return; | |
| 96 | - } | |
| 97 | - if(logger.getLevel().intValue() > loggerlevel.intValue()) { | |
| 98 | - return; | |
| 99 | - } | |
| 100 | - | |
| 101 | - // 編集処理 | |
| 102 | - char kbn = ' '; | |
| 103 | - if("handleStartTag".equals(methodname)) { | |
| 104 | - kbn = 'F'; | |
| 105 | - } | |
| 106 | - if("handleEndTag".equals(methodname)) { | |
| 107 | - kbn = 'E'; | |
| 108 | - } | |
| 109 | - if("handleSimpleTag".equals(methodname)) { | |
| 110 | - kbn = 'S'; | |
| 111 | - } | |
| 112 | - | |
| 113 | - StringBuilder strBuf = new StringBuilder(80); | |
| 114 | - strBuf.append(count).append(" : "); | |
| 115 | - strBuf.append(kbn).append(" : "); | |
| 116 | - strBuf.append(tag.toString()); | |
| 117 | - // 属性情報 | |
| 118 | - if(attr != null) { | |
| 119 | - if(attr.getAttributeCount() != 0) { | |
| 120 | - AttributeData handleAttrData = new AttributeData(); | |
| 121 | - handleAttrData.add(tag, attr); | |
| 122 | - for(int i = 0; i < handleAttrData.size; i++) { | |
| 123 | - strBuf.append(" ["); | |
| 124 | - strBuf.append(handleAttrData.getattrname(i)); | |
| 125 | - strBuf.append("]"); | |
| 126 | - strBuf.append(handleAttrData.getcount(i)); | |
| 127 | - strBuf.append(" = "); | |
| 128 | - strBuf.append(handleAttrData.getattrvalue(i)); | |
| 129 | - } | |
| 130 | - } | |
| 131 | - } | |
| 132 | - | |
| 133 | - logger.log(loggerlevel, strBuf.toString()); | |
| 134 | - } | |
| 135 | - | |
| 136 | - /** | |
| 137 | - * デバック出力(メッセージ). | |
| 138 | - * 引数に渡された任意のメッセージを出力する。 | |
| 139 | - * @param str メッセージ | |
| 140 | - * @param methodname このメソッドを呼び出した親メソッド名 | |
| 141 | - */ | |
| 142 | - public static void htmlinfo(String str, String methodname) { | |
| 143 | - logger.log(loggerlevel, str); | |
| 144 | - } | |
| 145 | - | |
| 146 | - public static void htmlinfo(String str) { | |
| 147 | - logger.log(loggerlevel, str); | |
| 148 | - } | |
| 149 | - | |
| 150 | - /** | |
| 151 | - * デバック出力(HTML解析-本文). | |
| 152 | - * 本文の内容を出力する。 | |
| 153 | - * @param data 本文(HTML内の文字列) | |
| 154 | - * @param methodname このメソッドを呼び出した親メソッド名 | |
| 155 | - */ | |
| 156 | - public static void htmlinfo(char[] data, String methodname) { | |
| 157 | - String dat = new String(data); | |
| 158 | - logger.log(loggerlevel, dat); | |
| 159 | - } | |
| 160 | - | |
| 161 | - public static void htmlinfo(char[] data) { | |
| 162 | - String dat = new String(data); | |
| 163 | - logger.log(loggerlevel, dat); | |
| 164 | - } | |
| 165 | - | |
| 166 | - /** | |
| 167 | - * デバック出力(検索キー). | |
| 168 | - * 検索キー(SearchData)の内容を出力する。 | |
| 169 | - * @param skey | |
| 170 | - */ | |
| 171 | - public static void searchDatainfo(SearchData skey) { | |
| 172 | - | |
| 173 | - StringBuilder strBuf = new StringBuilder(30); | |
| 174 | - strBuf.append("SearchData KEY tag["); | |
| 175 | - strBuf.append(skey.getHtmltag()); | |
| 176 | - strBuf.append("] ID["); | |
| 177 | - strBuf.append(skey.getHtmlid()); | |
| 178 | - strBuf.append("] CLASS["); | |
| 179 | - strBuf.append(skey.getHtmlclass()); | |
| 180 | - strBuf.append("]\n"); | |
| 181 | - | |
| 182 | - logger.log(loggerlevel, strBuf.toString()); | |
| 183 | - } | |
| 184 | - | |
| 185 | - /** | |
| 186 | - * ログ出力設定ファイルチェック. | |
| 187 | - * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。 | |
| 188 | - */ | |
| 189 | - private static void initLogConfiguration() { | |
| 190 | - | |
| 191 | - File file = new File(configurationFilename); | |
| 192 | - try { | |
| 193 | - if(file.exists()) { | |
| 194 | - FileInputStream inputStream = new FileInputStream(file); | |
| 195 | - // 設定ファイルの読み込み | |
| 196 | - LogManager.getLogManager().readConfiguration(inputStream); | |
| 197 | - } | |
| 198 | - | |
| 199 | - } catch (FileNotFoundException ex) { | |
| 200 | - Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 201 | - } catch (IOException ex) { | |
| 202 | - Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 203 | - } | |
| 204 | - } | |
| 205 | - | |
| 206 | - /** | |
| 207 | - * ログ出力フォーマッター設定. | |
| 208 | - * ファイルへログ出力時の書式を設定する。 | |
| 209 | - */ | |
| 210 | - private static void setFomatter() { | |
| 211 | - Handler[] handlers = logger.getHandlers(); | |
| 212 | - for(int i = 0 ; i < handlers.length ; i++) { | |
| 213 | - if(handlers[i] instanceof java.util.logging.FileHandler) { | |
| 214 | - handlers[i].setFormatter(new HtmlFormatter()); | |
| 215 | - } | |
| 216 | - } | |
| 217 | - } | |
| 218 | - | |
| 219 | -} | |
| 220 | - | |
| 221 | -/** | |
| 222 | - * ログ出力フォーマッター. | |
| 223 | - * @author kgto | |
| 224 | - */ | |
| 225 | -class HtmlFormatter extends Formatter { | |
| 226 | - /** | |
| 227 | - * Logの出力文字列を生成する。 | |
| 228 | - * 出力書式:<br> | |
| 229 | - * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ | |
| 230 | - */ | |
| 231 | - @Override | |
| 232 | - public synchronized String format(final LogRecord aRecord) { | |
| 233 | - | |
| 234 | - final StringBuffer message = new StringBuffer(100); | |
| 235 | - | |
| 236 | - long millis = aRecord.getMillis(); | |
| 237 | - String time = String.format("%tF %<tT", millis); | |
| 238 | - | |
| 239 | - message.append(time); | |
| 240 | - message.append(' '); | |
| 241 | - | |
| 242 | - message.append(aRecord.getLevel()); | |
| 243 | - message.append('<'); | |
| 244 | - String methodName = aRecord.getSourceMethodName(); | |
| 245 | - message.append(methodName != null ? methodName : "N/A"); | |
| 246 | - message.append('>'); | |
| 247 | - | |
| 248 | - message.append(formatMessage(aRecord)); | |
| 249 | - message.append('\n'); | |
| 250 | - | |
| 251 | - // 例外エラーの場合、エラー内容とスタックトレース出力 | |
| 252 | - Throwable throwable = aRecord.getThrown(); | |
| 253 | - if (throwable != null) { | |
| 254 | - message.append(throwable.toString()); | |
| 255 | - message.append('\n'); | |
| 256 | - for (StackTraceElement trace : throwable.getStackTrace()) { | |
| 257 | - message.append('\t'); | |
| 258 | - message.append(trace.toString()); | |
| 259 | - message.append('\n'); | |
| 260 | - } | |
| 261 | - } | |
| 262 | - return message.toString(); | |
| 263 | - } | |
| 264 | -} |
| @@ -1,255 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package WebScraping; | |
| 24 | - | |
| 25 | -import java.io.*; | |
| 26 | -import java.net.*; | |
| 27 | -import java.util.ArrayList; | |
| 28 | -import java.util.logging.Level; | |
| 29 | -import java.util.logging.Logger; | |
| 30 | -import java.util.regex.Matcher; | |
| 31 | -import java.util.regex.Pattern; | |
| 32 | -import javax.swing.text.html.parser.ParserDelegator; | |
| 33 | - | |
| 34 | -/** | |
| 35 | - * | |
| 36 | - * @author kgto | |
| 37 | - */ | |
| 38 | -public class HtmlParser { | |
| 39 | - | |
| 40 | - URL url; | |
| 41 | - String pageData; | |
| 42 | - ArrayList sData; | |
| 43 | - | |
| 44 | - // 作業ワーク | |
| 45 | - String htmltag; | |
| 46 | - String htmlid; | |
| 47 | - String htmlclass; | |
| 48 | - | |
| 49 | - public HtmlParser(URL UrlAdress) { | |
| 50 | - DebugProcess.debuglog_set(); | |
| 51 | - this.url = UrlAdress; | |
| 52 | - getPageData(); | |
| 53 | - } | |
| 54 | - | |
| 55 | - public HtmlParser(String UrlAdress) { | |
| 56 | - DebugProcess.debuglog_set(); | |
| 57 | - try { | |
| 58 | - url = new URL(UrlAdress); | |
| 59 | - getPageData(); | |
| 60 | - | |
| 61 | - } catch (MalformedURLException ex) { | |
| 62 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 63 | - } | |
| 64 | - } | |
| 65 | - | |
| 66 | - public HtmlParser() { | |
| 67 | - DebugProcess.debuglog_set(); | |
| 68 | - url = null; | |
| 69 | - } | |
| 70 | - | |
| 71 | - public String getStringPageData() { | |
| 72 | - return pageData; | |
| 73 | - } | |
| 74 | - | |
| 75 | - public void seturl(URL UrlAdress) { | |
| 76 | - this.url = UrlAdress; | |
| 77 | - getPageData(); | |
| 78 | - } | |
| 79 | - | |
| 80 | - public void seturl(String UrlAdress) { | |
| 81 | - try { | |
| 82 | - url = new URL(UrlAdress); | |
| 83 | - getPageData(); | |
| 84 | - | |
| 85 | - } catch (MalformedURLException ex) { | |
| 86 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 87 | - } | |
| 88 | - } | |
| 89 | - | |
| 90 | - /** | |
| 91 | - * HTMLページ内検索. | |
| 92 | - * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、 | |
| 93 | - * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を | |
| 94 | - * 行った結果を返す。<br> | |
| 95 | - * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br> | |
| 96 | - * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br> | |
| 97 | - * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。 | |
| 98 | - * @param skey 検索キーデータ(SearchData) | |
| 99 | - * @return String 検索キーに一致するデータの文字列 | |
| 100 | - */ | |
| 101 | - public String search(SearchData skey) { | |
| 102 | - | |
| 103 | - // htmlページ内を検索 | |
| 104 | - if(isHtmlkeyEq(skey) == false) { | |
| 105 | - searchPageData(skey); | |
| 106 | - } | |
| 107 | - /* | |
| 108 | - around 出現位置指定 入力有り:指定された位置の情報のみ返す。 | |
| 109 | - 入力無し:取得した全ての情報を返す。 | |
| 110 | - */ | |
| 111 | - String regexp = skey.getregexp(); | |
| 112 | - if(skey.getaround().length() > 0) { | |
| 113 | - int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換 | |
| 114 | - if(wkAround < sData.size()) { | |
| 115 | - String str = (String)sData.get(wkAround); | |
| 116 | - String rtn = RegularExpression(str, regexp); | |
| 117 | - return rtn; | |
| 118 | - } | |
| 119 | - } else { | |
| 120 | - StringBuilder strbuf = new StringBuilder(); | |
| 121 | - for (Object sData1 : sData) { | |
| 122 | - String str = (String)sData1; | |
| 123 | - String rtn = RegularExpression(str, regexp); | |
| 124 | - if(strbuf.length() > 0) { | |
| 125 | - strbuf.append("\t"); | |
| 126 | - } | |
| 127 | - strbuf.append(rtn); | |
| 128 | - } | |
| 129 | - return strbuf.toString(); | |
| 130 | - } | |
| 131 | - return null; | |
| 132 | - } | |
| 133 | - | |
| 134 | - /** | |
| 135 | - * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する. | |
| 136 | - * @param skey HTMLタグ/ID/CLASSが格納された検索キー | |
| 137 | - * @return boolean HTMLタグ/ID/CLASS値が一致する時、true | |
| 138 | - */ | |
| 139 | - boolean isHtmlkeyEq(SearchData skey) { | |
| 140 | - | |
| 141 | - String stag = skey.getHtmltag(); | |
| 142 | - String sid = skey.getHtmlid(); | |
| 143 | - String sclass = skey.getHtmlclass(); | |
| 144 | - | |
| 145 | - boolean rtn = true; | |
| 146 | - | |
| 147 | - // htmltag | |
| 148 | - if(htmltag == null) { | |
| 149 | - rtn = false; | |
| 150 | - } else { | |
| 151 | - if(htmltag.equals(stag) == false) { | |
| 152 | - rtn = false; | |
| 153 | - } | |
| 154 | - } | |
| 155 | - | |
| 156 | - // htmlid | |
| 157 | - if(htmlid == null) { | |
| 158 | - rtn = false; | |
| 159 | - } else { | |
| 160 | - if(htmlid.equals(sid) == false) { | |
| 161 | - rtn = false; | |
| 162 | - } | |
| 163 | - } | |
| 164 | - | |
| 165 | - // htmlclass | |
| 166 | - if(htmlclass == null) { | |
| 167 | - rtn = false; | |
| 168 | - } else { | |
| 169 | - if(htmlclass.equals(sclass) == false) { | |
| 170 | - rtn = false; | |
| 171 | - } | |
| 172 | - } | |
| 173 | - | |
| 174 | - if(!rtn) { | |
| 175 | - htmltag = stag; | |
| 176 | - htmlid = sid; | |
| 177 | - htmlclass = sclass; | |
| 178 | - } | |
| 179 | - | |
| 180 | - return rtn; | |
| 181 | - } | |
| 182 | - | |
| 183 | - /** | |
| 184 | - * 正規表現検索. | |
| 185 | - * @param strdata | |
| 186 | - * @param regexp | |
| 187 | - * @return | |
| 188 | - */ | |
| 189 | - String RegularExpression(String strdata, String regexp) { | |
| 190 | - String expdata = null; | |
| 191 | - | |
| 192 | - //regexpのチェック | |
| 193 | - if(regexp.isEmpty()) { | |
| 194 | - expdata = strdata; | |
| 195 | - return expdata; | |
| 196 | - } | |
| 197 | - | |
| 198 | - //正規表現検索 | |
| 199 | - Pattern ptn = Pattern.compile(regexp); | |
| 200 | - Matcher matchdata = ptn.matcher(strdata); | |
| 201 | - if (matchdata.find()) { | |
| 202 | - if(matchdata.groupCount() >= 1) { | |
| 203 | - expdata = matchdata.group(1); | |
| 204 | - } | |
| 205 | - } | |
| 206 | - return expdata; | |
| 207 | - } | |
| 208 | - | |
| 209 | - /** | |
| 210 | - * インターネット接続. | |
| 211 | - */ | |
| 212 | - private void getPageData() { | |
| 213 | - try { | |
| 214 | - //URL url = new URL(UrlAdress); | |
| 215 | - HttpURLConnection con = (HttpURLConnection)url.openConnection(); | |
| 216 | - con.setRequestMethod("GET"); | |
| 217 | - BufferedReader reader = new BufferedReader( | |
| 218 | - new InputStreamReader(con.getInputStream(), "utf-8")); | |
| 219 | - String wkline; | |
| 220 | - StringBuilder sb = new StringBuilder(); | |
| 221 | - while((wkline = reader.readLine()) != null) { | |
| 222 | - sb.append(wkline).append("\n"); | |
| 223 | - } | |
| 224 | - pageData = sb.toString(); | |
| 225 | - | |
| 226 | - con.disconnect(); | |
| 227 | - } | |
| 228 | - catch(IOException ex) { | |
| 229 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 230 | - } | |
| 231 | - } | |
| 232 | - | |
| 233 | - /** | |
| 234 | - * HTMLパーサ. | |
| 235 | - * @param skey | |
| 236 | - */ | |
| 237 | - private void searchPageData(SearchData skey) { | |
| 238 | - | |
| 239 | - DebugProcess.searchDatainfo(skey); | |
| 240 | - | |
| 241 | - Reader reader; | |
| 242 | - try { | |
| 243 | - reader = new BufferedReader(new StringReader(pageData)); | |
| 244 | - HtmlParserCallback cb = new HtmlParserCallback(skey); | |
| 245 | - ParserDelegator pd = new ParserDelegator(); | |
| 246 | - pd.parse(reader, cb, true); | |
| 247 | - reader.close(); | |
| 248 | - | |
| 249 | - sData = cb.getrtnData(); | |
| 250 | - | |
| 251 | - } catch (IOException ex) { | |
| 252 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 253 | - } | |
| 254 | - } | |
| 255 | -} |
| @@ -1,113 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package WebScraping; | |
| 24 | - | |
| 25 | -/** | |
| 26 | - * 検索データ. | |
| 27 | - * @author kgto | |
| 28 | - */ | |
| 29 | -public class SearchData { | |
| 30 | - | |
| 31 | - private String item; | |
| 32 | - private String htmltag; | |
| 33 | - private String htmlid; | |
| 34 | - private String htmlclass; | |
| 35 | - private String around; | |
| 36 | - private String regexp; | |
| 37 | - | |
| 38 | - public SearchData() { | |
| 39 | - initialize(); | |
| 40 | - } | |
| 41 | - | |
| 42 | - public SearchData(SearchData dat) { | |
| 43 | - this.item = dat.getitem(); | |
| 44 | - this.htmltag = dat.getHtmltag(); | |
| 45 | - this.htmlid = dat.getHtmlid(); | |
| 46 | - this.htmlclass = dat.getHtmlclass(); | |
| 47 | - this.around = dat.getaround(); | |
| 48 | - this.regexp = dat.getregexp(); | |
| 49 | - } | |
| 50 | - | |
| 51 | - /** | |
| 52 | - * データ初期化. | |
| 53 | - */ | |
| 54 | - public final void initialize() { | |
| 55 | - this.item = ""; | |
| 56 | - this.htmltag = ""; | |
| 57 | - this.htmlid = ""; | |
| 58 | - this.htmlclass = ""; | |
| 59 | - this.around = ""; | |
| 60 | - this.regexp = ""; | |
| 61 | - } | |
| 62 | - | |
| 63 | - // Setter | |
| 64 | - public void setitem(String item) { | |
| 65 | - this.item = item; | |
| 66 | - } | |
| 67 | - | |
| 68 | - public void setHtmltag(String htmltag) { | |
| 69 | - this.htmltag = htmltag; | |
| 70 | - } | |
| 71 | - | |
| 72 | - public void setHtmlid(String htmlid) { | |
| 73 | - this.htmlid = htmlid; | |
| 74 | - } | |
| 75 | - | |
| 76 | - public void setHtmlclass(String htmlclass) { | |
| 77 | - this.htmlclass = htmlclass; | |
| 78 | - } | |
| 79 | - | |
| 80 | - public void setaround(String around) { | |
| 81 | - this.around = around; | |
| 82 | - } | |
| 83 | - | |
| 84 | - public void setregexp(String regexp) { | |
| 85 | - this.regexp = regexp; | |
| 86 | - } | |
| 87 | - | |
| 88 | - // Getter | |
| 89 | - public String getitem() { | |
| 90 | - return item; | |
| 91 | - } | |
| 92 | - | |
| 93 | - public String getHtmltag() { | |
| 94 | - return htmltag; | |
| 95 | - } | |
| 96 | - | |
| 97 | - public String getHtmlid() { | |
| 98 | - return htmlid; | |
| 99 | - } | |
| 100 | - | |
| 101 | - public String getHtmlclass() { | |
| 102 | - return htmlclass; | |
| 103 | - } | |
| 104 | - | |
| 105 | - public String getaround() { | |
| 106 | - return around; | |
| 107 | - } | |
| 108 | - | |
| 109 | - public String getregexp() { | |
| 110 | - return regexp; | |
| 111 | - } | |
| 112 | - | |
| 113 | -} |
| @@ -1,211 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package WebScraping; | |
| 24 | - | |
| 25 | -import java.util.ArrayList; | |
| 26 | -import java.util.HashMap; | |
| 27 | -import javax.swing.text.MutableAttributeSet; | |
| 28 | -import javax.swing.text.html.HTML; | |
| 29 | -import javax.swing.text.html.HTMLEditorKit; | |
| 30 | - | |
| 31 | -/** | |
| 32 | - * HTMLパーサ部品. | |
| 33 | - * @author kgto | |
| 34 | - */ | |
| 35 | -class HtmlParserCallback extends HTMLEditorKit.ParserCallback { | |
| 36 | - | |
| 37 | - // Tag毎の階層 | |
| 38 | - HashMap<HTML.Tag,Integer> tagMap = new HashMap<>(); | |
| 39 | - | |
| 40 | - // serach key 情報 | |
| 41 | - String keytag; | |
| 42 | - String keyid; | |
| 43 | - String keyclass; | |
| 44 | - | |
| 45 | - // serach key と一致時の情報退避 | |
| 46 | - int bufCount = 0; | |
| 47 | - HTML.Tag bufTag = null; | |
| 48 | - // serach key と一致時の情報格納ワーク | |
| 49 | - StringBuilder bufText; | |
| 50 | - | |
| 51 | - // serach key と一致時のデータ一覧 | |
| 52 | - ArrayList sData; | |
| 53 | - | |
| 54 | - // 属性データ | |
| 55 | - AttributeData attrdata; | |
| 56 | - | |
| 57 | - protected HtmlParserCallback(SearchData skey) { | |
| 58 | - | |
| 59 | - // キー情報展開 | |
| 60 | - keytag = skey.getHtmltag(); | |
| 61 | - keyid = skey.getHtmlid(); | |
| 62 | - keyclass = skey.getHtmlclass(); | |
| 63 | - | |
| 64 | - sData = new ArrayList(); | |
| 65 | - } | |
| 66 | - | |
| 67 | - ArrayList getrtnData() { | |
| 68 | - return this.sData; | |
| 69 | - } | |
| 70 | - | |
| 71 | - @Override | |
| 72 | - public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
| 73 | - // Tag毎の階層を保持 | |
| 74 | - int count = 1; | |
| 75 | - if(tagMap.containsKey(tag)) { | |
| 76 | - count = tagMap.get(tag); | |
| 77 | - count++; | |
| 78 | - } | |
| 79 | - tagMap.put(tag, count); | |
| 80 | - | |
| 81 | - // 属性解析 | |
| 82 | - AttributeData handleStartattrdata = new AttributeData(); | |
| 83 | - handleStartattrdata.add(tag, attr); | |
| 84 | - | |
| 85 | - DebugProcess.htmlinfo(tag, attr, "handleStartTag", count); | |
| 86 | - | |
| 87 | - if(bufCount == 0) { | |
| 88 | - if(tag.toString().equals(keytag)) { | |
| 89 | - //if(serachAttribute(attr)) { | |
| 90 | - if(serachAttribute(tag, handleStartattrdata)) { | |
| 91 | - bufCount = count; | |
| 92 | - bufTag = tag; | |
| 93 | - attrdata = new AttributeData(); | |
| 94 | - bufText = new StringBuilder(); | |
| 95 | - } | |
| 96 | - } | |
| 97 | - } | |
| 98 | - if(bufCount > 0) { | |
| 99 | - attrdata.add(tag, attr); | |
| 100 | - } | |
| 101 | - } | |
| 102 | - | |
| 103 | - @Override | |
| 104 | - public void handleEndTag(HTML.Tag tag, int pos){ | |
| 105 | - // Tag毎の階層を取得 | |
| 106 | - int count = 0; | |
| 107 | - if(tagMap.containsKey(tag)) { | |
| 108 | - count = tagMap.get(tag); | |
| 109 | - } | |
| 110 | - | |
| 111 | - DebugProcess.htmlinfo(tag, null, "handleEndTag", count); | |
| 112 | - | |
| 113 | - if(tag.equals(bufTag) && count <= bufCount) { | |
| 114 | - | |
| 115 | - // 溜め込んだ一致情報をリストへ格納 | |
| 116 | - sData.add(bufText.toString()); | |
| 117 | - | |
| 118 | - // 退避したserach keyとの一致情報クリア | |
| 119 | - bufCount = 0; | |
| 120 | - bufTag = null; | |
| 121 | - bufText = null; | |
| 122 | - } | |
| 123 | - | |
| 124 | - // Tag毎の階層減算 | |
| 125 | - tagMap.put(tag, --count); | |
| 126 | - } | |
| 127 | - | |
| 128 | - @Override | |
| 129 | - public void handleText(char[] data, int pos){ | |
| 130 | - | |
| 131 | - DebugProcess.htmlinfo(data, "handleText"); | |
| 132 | - | |
| 133 | - String splitchar = "\t"; | |
| 134 | - //制御文字の削除 | |
| 135 | - // 0xa0 | |
| 136 | - StringBuilder buf = new StringBuilder(); | |
| 137 | - for(int i = 0; i < data.length; i++) { | |
| 138 | - if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) { | |
| 139 | - buf.append(data[i]); | |
| 140 | - } | |
| 141 | - } | |
| 142 | - if(bufCount > 0) { | |
| 143 | - if(bufText.length() > 0) { | |
| 144 | - bufText.append(splitchar); | |
| 145 | - } | |
| 146 | - bufText.append(buf.toString()); | |
| 147 | - } | |
| 148 | - } | |
| 149 | - | |
| 150 | - @Override | |
| 151 | - public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
| 152 | - if(bufCount > 0) { | |
| 153 | - attrdata.add(tag, attr); | |
| 154 | - } | |
| 155 | - DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0); | |
| 156 | - } | |
| 157 | - | |
| 158 | - /** | |
| 159 | - * ページ内のID/CLASS値と検索キーを比較する. | |
| 160 | - * @param attr ページのMutableAttributeSet | |
| 161 | - * @return boolean 検索キーと一致の時、true | |
| 162 | - */ | |
| 163 | - boolean serachAttribute(MutableAttributeSet attr) { | |
| 164 | - String currentID = (String)attr.getAttribute(HTML.Attribute.ID); | |
| 165 | - String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS); | |
| 166 | - | |
| 167 | - if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
| 168 | - if(keyid.equals(currentID) && keyclass.equals(currentClass)) { | |
| 169 | - return true; | |
| 170 | - } | |
| 171 | - } | |
| 172 | - | |
| 173 | - if(keyid.isEmpty() == false) { | |
| 174 | - if(keyid.equals(currentID)) { | |
| 175 | - return true; | |
| 176 | - } | |
| 177 | - } | |
| 178 | - | |
| 179 | - if(keyclass.isEmpty() == false) { | |
| 180 | - if(keyclass.equals(currentClass)) { | |
| 181 | - return true; | |
| 182 | - } | |
| 183 | - } | |
| 184 | - | |
| 185 | - return false; | |
| 186 | - } | |
| 187 | - | |
| 188 | - /** | |
| 189 | - * ページ内のID/CLASS値と検索キーを比較する. | |
| 190 | - * @param tag | |
| 191 | - * @param attrdata | |
| 192 | - * @return boolean 検索キーと一致の時、true | |
| 193 | - */ | |
| 194 | - boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) { | |
| 195 | - // ID と CLASS の両方にキー入力有りの場合 | |
| 196 | - if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
| 197 | - if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) { | |
| 198 | - return true; | |
| 199 | - } | |
| 200 | - } | |
| 201 | - // ID のキーチェック | |
| 202 | - if(keyid.isEmpty() == false) { | |
| 203 | - return attrdata.searchId(tag, keyid); | |
| 204 | - } | |
| 205 | - // CLASS のキーチェック | |
| 206 | - if(keyclass.isEmpty() == false) { | |
| 207 | - return attrdata.searchClass(tag, keyclass); | |
| 208 | - } | |
| 209 | - return false; | |
| 210 | - } | |
| 211 | -} |
| @@ -0,0 +1,530 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.utility; | |
| 24 | + | |
| 25 | +import webScraping.core.SearchData; | |
| 26 | +import java.io.BufferedReader; | |
| 27 | +import java.io.BufferedWriter; | |
| 28 | +import java.io.File; | |
| 29 | +import java.io.FileInputStream; | |
| 30 | +import java.io.FileNotFoundException; | |
| 31 | +import java.io.FileOutputStream; | |
| 32 | +import java.io.IOException; | |
| 33 | +import java.io.InputStreamReader; | |
| 34 | +import java.io.OutputStreamWriter; | |
| 35 | +import java.util.ArrayList; | |
| 36 | +import java.util.logging.Level; | |
| 37 | +import java.util.logging.Logger; | |
| 38 | +import javax.xml.parsers.DocumentBuilder; | |
| 39 | +import javax.xml.parsers.DocumentBuilderFactory; | |
| 40 | +import javax.xml.parsers.ParserConfigurationException; | |
| 41 | +import javax.xml.transform.Transformer; | |
| 42 | +import javax.xml.transform.TransformerConfigurationException; | |
| 43 | +import javax.xml.transform.TransformerException; | |
| 44 | +import javax.xml.transform.TransformerFactory; | |
| 45 | +import javax.xml.transform.dom.DOMSource; | |
| 46 | +import javax.xml.transform.stream.StreamResult; | |
| 47 | +import org.w3c.dom.DOMImplementation; | |
| 48 | +import org.w3c.dom.Document; | |
| 49 | +import org.w3c.dom.Element; | |
| 50 | +import org.w3c.dom.Node; | |
| 51 | +import org.w3c.dom.NodeList; | |
| 52 | +import org.xml.sax.SAXException; | |
| 53 | + | |
| 54 | +/** | |
| 55 | + * | |
| 56 | + * @author kgto | |
| 57 | + */ | |
| 58 | +public class SearchDataRW { | |
| 59 | + | |
| 60 | + DocumentBuilder builder; | |
| 61 | + public Document document; | |
| 62 | + Element root; | |
| 63 | + | |
| 64 | + private final String splitchar = "\t"; | |
| 65 | + | |
| 66 | + private String UrlAdress; | |
| 67 | + private ArrayList slist; | |
| 68 | + | |
| 69 | + public SearchDataRW() { | |
| 70 | + try { | |
| 71 | + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 72 | + builder = factory.newDocumentBuilder(); | |
| 73 | + | |
| 74 | + } catch (ParserConfigurationException ex) { | |
| 75 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 76 | + } | |
| 77 | + } | |
| 78 | + | |
| 79 | + public void seturl(String UrlAdress) { | |
| 80 | + this.UrlAdress = UrlAdress; | |
| 81 | + } | |
| 82 | + | |
| 83 | + public void setslist(ArrayList slist) { | |
| 84 | + this.slist = slist; | |
| 85 | + } | |
| 86 | + | |
| 87 | + public String geturl() { | |
| 88 | + return UrlAdress; | |
| 89 | + } | |
| 90 | + | |
| 91 | + public ArrayList getslist() { | |
| 92 | + return slist; | |
| 93 | + } | |
| 94 | + | |
| 95 | + /** | |
| 96 | + * 保存. | |
| 97 | + * @param file | |
| 98 | + */ | |
| 99 | + public void save(File file) { | |
| 100 | + //saveCsv(file); | |
| 101 | + //saveXml(file); | |
| 102 | + | |
| 103 | + saveUrl(UrlAdress); | |
| 104 | + saveSearchList(slist); | |
| 105 | + write(file); | |
| 106 | + } | |
| 107 | + | |
| 108 | + /** | |
| 109 | + * 読込. | |
| 110 | + * @param file | |
| 111 | + */ | |
| 112 | + public void load(File file) { | |
| 113 | + //loadCsv(file); | |
| 114 | + //loadXml(file); | |
| 115 | + | |
| 116 | + read(file); | |
| 117 | + UrlAdress = loadUrl(); | |
| 118 | + slist = loadSearchList(); | |
| 119 | + } | |
| 120 | + | |
| 121 | + /* ---------------------------------------------------------------------- */ | |
| 122 | + /** | |
| 123 | + * 保存(CSV形式). | |
| 124 | + * @param file | |
| 125 | + */ | |
| 126 | + public void saveCsv(File file) { | |
| 127 | + BufferedWriter bufferedwriter = null; | |
| 128 | + try { | |
| 129 | + //空のファイルを作成 | |
| 130 | + file.createNewFile(); | |
| 131 | + FileOutputStream fileoutputstream = new FileOutputStream(file); | |
| 132 | + OutputStreamWriter outputstreamwriter = new OutputStreamWriter(fileoutputstream, "UTF-8"); | |
| 133 | + bufferedwriter = new BufferedWriter(outputstreamwriter); | |
| 134 | + | |
| 135 | + // URL | |
| 136 | + bufferedwriter.write(UrlAdress); | |
| 137 | + bufferedwriter.write("\n"); | |
| 138 | + // 検索情報 | |
| 139 | + for(Object slist1 : slist) { | |
| 140 | + SearchData sdat = (SearchData)slist1; | |
| 141 | + // | |
| 142 | + StringBuilder str = new StringBuilder(); | |
| 143 | + str.append(sdat.getitem()).append(splitchar); | |
| 144 | + str.append(sdat.getHtmltag()).append(splitchar); | |
| 145 | + str.append(sdat.getHtmlid()).append(splitchar); | |
| 146 | + str.append(sdat.getHtmlclass()).append(splitchar); | |
| 147 | + str.append(sdat.getaround()).append(splitchar); | |
| 148 | + str.append(sdat.getregexp()).append("\n"); | |
| 149 | + // 書込み | |
| 150 | + bufferedwriter.write(str.toString()); | |
| 151 | + } | |
| 152 | + | |
| 153 | + } catch (IOException ex) { | |
| 154 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 155 | + } finally { | |
| 156 | + try { | |
| 157 | + if(bufferedwriter != null) { | |
| 158 | + bufferedwriter.close(); | |
| 159 | + } | |
| 160 | + | |
| 161 | + } catch (IOException ex) { | |
| 162 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 163 | + } | |
| 164 | + } | |
| 165 | + } | |
| 166 | + | |
| 167 | + /** | |
| 168 | + * 読込(CSV形式). | |
| 169 | + * @param file | |
| 170 | + */ | |
| 171 | + public void loadCsv(File file) { | |
| 172 | + slist = new ArrayList(); | |
| 173 | + | |
| 174 | + BufferedReader bufferedreader = null; | |
| 175 | + try { | |
| 176 | + FileInputStream fileinputstream = new FileInputStream(file); | |
| 177 | + InputStreamReader inputstreamreader = new InputStreamReader(fileinputstream, "UTF-8"); | |
| 178 | + bufferedreader = new BufferedReader(inputstreamreader); | |
| 179 | + | |
| 180 | + // URL | |
| 181 | + UrlAdress = bufferedreader.readLine(); | |
| 182 | + // 検索情報 | |
| 183 | + String rec; | |
| 184 | + while((rec = bufferedreader.readLine()) != null) { | |
| 185 | + String[] recary = rec.split(splitchar, -1); | |
| 186 | + SearchData sdat = new SearchData(); | |
| 187 | + sdat.setitem(recary[0]); | |
| 188 | + sdat.setHtmltag(recary[1]); | |
| 189 | + sdat.setHtmlid(recary[2]); | |
| 190 | + sdat.setHtmlclass(recary[3]); | |
| 191 | + sdat.setaround(recary[4]); | |
| 192 | + sdat.setregexp(recary[5]); | |
| 193 | + | |
| 194 | + slist.add(sdat); | |
| 195 | + } | |
| 196 | + | |
| 197 | + } catch(IOException ex) { | |
| 198 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 199 | + | |
| 200 | + } finally { | |
| 201 | + try { | |
| 202 | + if(bufferedreader != null) { | |
| 203 | + bufferedreader.close(); | |
| 204 | + } | |
| 205 | + | |
| 206 | + } catch (IOException ex) { | |
| 207 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 208 | + } | |
| 209 | + } | |
| 210 | + } | |
| 211 | + | |
| 212 | + /* ---------------------------------------------------------------------- */ | |
| 213 | + /** | |
| 214 | + * 保存(XML形式). | |
| 215 | + * @param file | |
| 216 | + */ | |
| 217 | + public void saveXml(File file) { | |
| 218 | + try { | |
| 219 | + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 220 | + DocumentBuilder wkBuilder = factory.newDocumentBuilder(); | |
| 221 | + DOMImplementation domImpl = wkBuilder.getDOMImplementation(); | |
| 222 | + | |
| 223 | + Document doc = domImpl.createDocument("","searchdata",null); | |
| 224 | + Element wkRoot = doc.getDocumentElement(); | |
| 225 | + | |
| 226 | + // URL | |
| 227 | + Element url = doc.createElement("url"); | |
| 228 | + url.appendChild(doc.createTextNode(UrlAdress)); | |
| 229 | + wkRoot.appendChild(url); | |
| 230 | + | |
| 231 | + // 検索情報 | |
| 232 | + for (Object slist1 : slist) { | |
| 233 | + SearchData sdat = (SearchData) slist1; | |
| 234 | + | |
| 235 | + Element cslist = doc.createElement("searchlist"); | |
| 236 | + Element item = doc.createElement("item"); | |
| 237 | + Element htmltag = doc.createElement("htmltag"); | |
| 238 | + Element htmlid = doc.createElement("htmlid"); | |
| 239 | + Element htmlclass = doc.createElement("htmlclass"); | |
| 240 | + Element around = doc.createElement("around"); | |
| 241 | + Element regexp = doc.createElement("regexp"); | |
| 242 | + | |
| 243 | + item.appendChild(doc.createTextNode(sdat.getitem())); | |
| 244 | + htmltag.appendChild(doc.createTextNode(sdat.getHtmltag())); | |
| 245 | + htmlid.appendChild(doc.createTextNode(sdat.getHtmlid())); | |
| 246 | + htmlclass.appendChild(doc.createTextNode(sdat.getHtmlclass())); | |
| 247 | + around.appendChild(doc.createTextNode(sdat.getaround())); | |
| 248 | + regexp.appendChild(doc.createTextNode(sdat.getregexp())); | |
| 249 | + | |
| 250 | + cslist.appendChild(item); | |
| 251 | + cslist.appendChild(htmltag); | |
| 252 | + cslist.appendChild(htmlid); | |
| 253 | + cslist.appendChild(htmlclass); | |
| 254 | + cslist.appendChild(around); | |
| 255 | + cslist.appendChild(regexp); | |
| 256 | + | |
| 257 | + wkRoot.appendChild(cslist); | |
| 258 | + } | |
| 259 | + // 出力 | |
| 260 | + TransformerFactory transFactory = TransformerFactory.newInstance(); | |
| 261 | + Transformer transformer = transFactory.newTransformer(); | |
| 262 | + | |
| 263 | + DOMSource source = new DOMSource(doc); | |
| 264 | + FileOutputStream os = new FileOutputStream(file); | |
| 265 | + StreamResult result = new StreamResult(os); | |
| 266 | + transformer.transform(source, result); | |
| 267 | + | |
| 268 | + } catch (ParserConfigurationException | FileNotFoundException ex) { | |
| 269 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 270 | + } catch (TransformerConfigurationException ex) { | |
| 271 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 272 | + } catch (TransformerException ex) { | |
| 273 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 274 | + } | |
| 275 | + } | |
| 276 | + | |
| 277 | + /** | |
| 278 | + * 読込(XML形式). | |
| 279 | + * @param file | |
| 280 | + */ | |
| 281 | + public void loadXml(File file) { | |
| 282 | + slist = new ArrayList(); | |
| 283 | + | |
| 284 | + try { | |
| 285 | + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
| 286 | + DocumentBuilder wkBuilder = factory.newDocumentBuilder(); | |
| 287 | + Document doc = wkBuilder.parse(file); | |
| 288 | + | |
| 289 | + // ルート要素の取得 | |
| 290 | + Element wkRoot = doc.getDocumentElement(); | |
| 291 | + | |
| 292 | + // URL | |
| 293 | + NodeList url = wkRoot.getElementsByTagName("url"); | |
| 294 | + Node urlnode = url.item(0); | |
| 295 | + UrlAdress = urlnode.getFirstChild().getNodeValue(); | |
| 296 | + | |
| 297 | + // 検索情報 | |
| 298 | + NodeList cslist = wkRoot.getElementsByTagName("searchlist"); | |
| 299 | + for(int i = 0; i < cslist.getLength(); i++) { | |
| 300 | + SearchData sdat = new SearchData(); | |
| 301 | + | |
| 302 | + Node slistnode = cslist.item(i); | |
| 303 | + Node child; | |
| 304 | + for (child = slistnode.getFirstChild(); child != null; child = child.getNextSibling()) { | |
| 305 | + if(child.getNodeType() == Node.ELEMENT_NODE) { | |
| 306 | + | |
| 307 | + String tag = child.getNodeName(); | |
| 308 | + String rtn = ""; | |
| 309 | + if(child.getFirstChild() != null) { | |
| 310 | + rtn = child.getFirstChild().getNodeValue(); | |
| 311 | + } | |
| 312 | + | |
| 313 | + switch (tag) { | |
| 314 | + case "item" : | |
| 315 | + sdat.setitem(rtn); | |
| 316 | + break; | |
| 317 | + case "htmltag" : | |
| 318 | + sdat.setHtmltag(rtn); | |
| 319 | + break; | |
| 320 | + case "htmlid" : | |
| 321 | + sdat.setHtmlid(rtn); | |
| 322 | + break; | |
| 323 | + case "htmlclass" : | |
| 324 | + sdat.setHtmlclass(rtn); | |
| 325 | + break; | |
| 326 | + case "around" : | |
| 327 | + sdat.setaround(rtn); | |
| 328 | + break; | |
| 329 | + case "regexp" : | |
| 330 | + sdat.setregexp(rtn); | |
| 331 | + break; | |
| 332 | + } | |
| 333 | + } | |
| 334 | + } | |
| 335 | + slist.add(sdat); | |
| 336 | + } | |
| 337 | + | |
| 338 | + } catch (ParserConfigurationException | SAXException | IOException ex) { | |
| 339 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 340 | + } | |
| 341 | + } | |
| 342 | + | |
| 343 | + /* ---------------------------------------------------------------------- */ | |
| 344 | + | |
| 345 | + public String loadUrl() { | |
| 346 | + String urladdress; | |
| 347 | + | |
| 348 | + NodeList nodelist = root.getElementsByTagName("url"); | |
| 349 | + Node node = nodelist.item(0); | |
| 350 | + urladdress = node.getFirstChild().getNodeValue(); | |
| 351 | + | |
| 352 | + return urladdress; | |
| 353 | + } | |
| 354 | + | |
| 355 | + public ArrayList<SearchData> loadSearchList() { | |
| 356 | + ArrayList<SearchData> sdatlst = new ArrayList<>(); | |
| 357 | + | |
| 358 | + NodeList nodelist = root.getElementsByTagName("searchlist"); | |
| 359 | + for(int i = 0; i < nodelist.getLength(); i++) { | |
| 360 | + Node childnode = nodelist.item(i); | |
| 361 | + | |
| 362 | + boolean sdatflg = false; | |
| 363 | + SearchData sdat = new SearchData(); | |
| 364 | + | |
| 365 | + //NodeList childnodelist = childnode.getChildNodes(); | |
| 366 | + //for(int j = 0; j < childnodelist.getLength(); j++) { | |
| 367 | + // Node child = childnodelist.item(j); | |
| 368 | + | |
| 369 | + for (Node child = childnode.getFirstChild(); | |
| 370 | + child != null; child = child.getNextSibling()) { | |
| 371 | + | |
| 372 | + if(child.getNodeType() == Node.ELEMENT_NODE) { | |
| 373 | + | |
| 374 | + String tag = child.getNodeName(); | |
| 375 | + String rtn = ""; | |
| 376 | + if(child.getFirstChild() != null) { | |
| 377 | + rtn = child.getFirstChild().getNodeValue(); | |
| 378 | + } | |
| 379 | + | |
| 380 | + switch (tag) { | |
| 381 | + case "item" : | |
| 382 | + sdat.setitem(rtn); | |
| 383 | + sdatflg = true; | |
| 384 | + break; | |
| 385 | + case "htmltag" : | |
| 386 | + sdat.setHtmltag(rtn); | |
| 387 | + sdatflg = true; | |
| 388 | + break; | |
| 389 | + case "htmlid" : | |
| 390 | + sdat.setHtmlid(rtn); | |
| 391 | + sdatflg = true; | |
| 392 | + break; | |
| 393 | + case "htmlclass" : | |
| 394 | + sdat.setHtmlclass(rtn); | |
| 395 | + sdatflg = true; | |
| 396 | + break; | |
| 397 | + case "around" : | |
| 398 | + sdat.setaround(rtn); | |
| 399 | + sdatflg = true; | |
| 400 | + break; | |
| 401 | + case "regexp" : | |
| 402 | + sdat.setregexp(rtn); | |
| 403 | + sdatflg = true; | |
| 404 | + break; | |
| 405 | + } | |
| 406 | + } | |
| 407 | + } | |
| 408 | + if(sdatflg) sdatlst.add(sdat); | |
| 409 | + } | |
| 410 | + return sdatlst; | |
| 411 | + } | |
| 412 | + | |
| 413 | + public Element loadElement(String elementTagName) { | |
| 414 | + NodeList nodelist = root.getElementsByTagName(elementTagName); | |
| 415 | + Element element = (Element)nodelist.item(0); | |
| 416 | + | |
| 417 | + return element; | |
| 418 | + } | |
| 419 | + | |
| 420 | + /* ---------------------------------------------------------------------- */ | |
| 421 | + | |
| 422 | + public void saveUrl(String urladdress) { | |
| 423 | + checkdoc(); | |
| 424 | + removeElement("url"); // 既にElementが存在してた場合、一度削除 | |
| 425 | + | |
| 426 | + Element url = document.createElement("url"); | |
| 427 | + url.appendChild(document.createTextNode(urladdress)); | |
| 428 | + root.appendChild(url); | |
| 429 | + } | |
| 430 | + | |
| 431 | + public void saveSearchList(ArrayList slist) { | |
| 432 | + checkdoc(); | |
| 433 | + removeElement("searchlist"); // 既にElementが存在してた場合、一度削除 | |
| 434 | + | |
| 435 | + int count = 0; | |
| 436 | + for (Object slist1 : slist) { | |
| 437 | + SearchData sdat = (SearchData) slist1; | |
| 438 | + | |
| 439 | + Element cslist = document.createElement("searchlist"); | |
| 440 | + cslist.setAttribute("listNo", String.valueOf(++count)); | |
| 441 | + | |
| 442 | + addChild(cslist, "item", sdat.getitem()); | |
| 443 | + addChild(cslist, "htmltag", sdat.getHtmltag()); | |
| 444 | + addChild(cslist, "htmlid", sdat.getHtmlid()); | |
| 445 | + addChild(cslist, "htmlclass", sdat.getHtmlclass()); | |
| 446 | + addChild(cslist, "around", sdat.getaround()); | |
| 447 | + addChild(cslist, "regexp", sdat.getregexp()); | |
| 448 | + | |
| 449 | + root.appendChild(cslist); | |
| 450 | + } | |
| 451 | + } | |
| 452 | + | |
| 453 | + public void saveElement(Element element) { | |
| 454 | + checkdoc(); | |
| 455 | + removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除 | |
| 456 | + | |
| 457 | + root.appendChild(element); | |
| 458 | + } | |
| 459 | + | |
| 460 | + /* ---------------------------------------------------------------------- */ | |
| 461 | + | |
| 462 | + private void addChild(Element cslist, String keyword, String data) { | |
| 463 | + if(!data.isEmpty()) { | |
| 464 | + Element element = document.createElement(keyword); | |
| 465 | + element.appendChild(document.createTextNode(data)); | |
| 466 | + cslist.appendChild(element); | |
| 467 | + } | |
| 468 | + } | |
| 469 | + | |
| 470 | + private void removeElement(String elementTagName) { | |
| 471 | + int nodeSize; | |
| 472 | + do { | |
| 473 | + NodeList nodelist = document.getElementsByTagName(elementTagName); | |
| 474 | + nodeSize = nodelist.getLength(); | |
| 475 | + for(int i = 0; i < nodelist.getLength(); i++) { | |
| 476 | + Node node = nodelist.item(i); | |
| 477 | + root.removeChild(node); | |
| 478 | + } | |
| 479 | + } while(nodeSize > 0); | |
| 480 | + } | |
| 481 | + | |
| 482 | + /** | |
| 483 | + * ドキュメントチェック. | |
| 484 | + * 新規の場合やXMLファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。 | |
| 485 | + * 既読の場合、ルートエレメントの取得を行う。 | |
| 486 | + */ | |
| 487 | + public void checkdoc() { | |
| 488 | + if(document == null) { | |
| 489 | + DOMImplementation domImpl = builder.getDOMImplementation(); | |
| 490 | + document = domImpl.createDocument("","searchdata",null); | |
| 491 | + } | |
| 492 | + root = document.getDocumentElement(); | |
| 493 | + } | |
| 494 | + | |
| 495 | + /** | |
| 496 | + * XML読込み. | |
| 497 | + * @param file | |
| 498 | + */ | |
| 499 | + public void read(File file) { | |
| 500 | + try { | |
| 501 | + document = builder.parse(file); | |
| 502 | + root = document.getDocumentElement(); | |
| 503 | + | |
| 504 | + } catch (SAXException | IOException ex) { | |
| 505 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 506 | + } | |
| 507 | + } | |
| 508 | + | |
| 509 | + /** | |
| 510 | + * XML書込み. | |
| 511 | + * @param file | |
| 512 | + */ | |
| 513 | + public void write(File file) { | |
| 514 | + try { | |
| 515 | + TransformerFactory transFactory = TransformerFactory.newInstance(); | |
| 516 | + Transformer transformer = transFactory.newTransformer(); | |
| 517 | + | |
| 518 | + DOMSource source = new DOMSource(document); | |
| 519 | + FileOutputStream os = new FileOutputStream(file); | |
| 520 | + StreamResult result = new StreamResult(os); | |
| 521 | + transformer.transform(source, result); | |
| 522 | + | |
| 523 | + } catch (TransformerConfigurationException ex) { | |
| 524 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 525 | + } catch (FileNotFoundException | TransformerException ex) { | |
| 526 | + Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex); | |
| 527 | + } | |
| 528 | + } | |
| 529 | + | |
| 530 | +} |
| @@ -0,0 +1,508 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | +package webScraping.utility; | |
| 23 | + | |
| 24 | +import webScraping.core.HtmlParser; | |
| 25 | +import webScraping.core.SearchData; | |
| 26 | +import java.awt.Desktop; | |
| 27 | +import java.io.File; | |
| 28 | +import java.io.IOException; | |
| 29 | +import java.net.URI; | |
| 30 | +import java.net.URISyntaxException; | |
| 31 | +import java.util.*; | |
| 32 | +import java.util.logging.Level; | |
| 33 | +import java.util.logging.Logger; | |
| 34 | +import javax.swing.JFileChooser; | |
| 35 | +import javax.swing.filechooser.FileFilter; | |
| 36 | +import javax.swing.filechooser.FileNameExtensionFilter; | |
| 37 | +import org.jdesktop.observablecollections.ObservableCollections; | |
| 38 | + | |
| 39 | +/** | |
| 40 | + * HTMLページ上の特定の項目を検索し、その項目内容の値を取得する. | |
| 41 | + * @author kgto | |
| 42 | + */ | |
| 43 | +public class HtmlSearch extends javax.swing.JFrame { | |
| 44 | + | |
| 45 | + private final SearchDataRW sio = new SearchDataRW(); | |
| 46 | + | |
| 47 | + private ArrayList slist = new ArrayList(); | |
| 48 | + private List serachDataList = ObservableCollections.observableList(slist); | |
| 49 | + | |
| 50 | + /** | |
| 51 | + * Creates new form Frame1 | |
| 52 | + */ | |
| 53 | + public HtmlSearch() { | |
| 54 | + initComponents(); | |
| 55 | + | |
| 56 | + // カレントディレクトリ取得 | |
| 57 | + String dir = System.getProperty("user.dir"); | |
| 58 | + File file = new java.io.File(dir + "\\data"); | |
| 59 | + jFileChooser1.setCurrentDirectory(file); | |
| 60 | + | |
| 61 | + FileFilter filter1 = new FileNameExtensionFilter("XMLファイル", "xml"); | |
| 62 | + FileFilter filter2 = new FileNameExtensionFilter("TEXTファイル", "txt"); | |
| 63 | + jFileChooser1.addChoosableFileFilter(filter1); | |
| 64 | + jFileChooser1.addChoosableFileFilter(filter2); | |
| 65 | + jFileChooser1.setFileFilter(filter1); | |
| 66 | + | |
| 67 | + } | |
| 68 | + | |
| 69 | + public List getSerachDataList() { | |
| 70 | + return this.serachDataList; | |
| 71 | + } | |
| 72 | + | |
| 73 | + public void setSerachDataList(List serachDataList) { | |
| 74 | + this.serachDataList = serachDataList; | |
| 75 | + } | |
| 76 | + | |
| 77 | + /** | |
| 78 | + * This method is called from within the constructor to initialize the form. | |
| 79 | + * WARNING: Do NOT modify this code. The content of this method is always | |
| 80 | + * regenerated by the Form Editor. | |
| 81 | + */ | |
| 82 | + @SuppressWarnings("unchecked") | |
| 83 | + // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents | |
| 84 | + private void initComponents() { | |
| 85 | + bindingGroup = new org.jdesktop.beansbinding.BindingGroup(); | |
| 86 | + | |
| 87 | + jFileChooser1 = new javax.swing.JFileChooser(); | |
| 88 | + jRadioButton1 = new javax.swing.JRadioButton(); | |
| 89 | + jLabel1 = new javax.swing.JLabel(); | |
| 90 | + jTxtUrl = new javax.swing.JTextField(); | |
| 91 | + jBtnSearch = new javax.swing.JButton(); | |
| 92 | + jTabbedPane1 = new javax.swing.JTabbedPane(); | |
| 93 | + jPanelTab1 = new javax.swing.JPanel(); | |
| 94 | + jScrollPane1 = new javax.swing.JScrollPane(); | |
| 95 | + jTable1 = new javax.swing.JTable(); | |
| 96 | + jBtnRowIns = new javax.swing.JButton(); | |
| 97 | + jBtnRowDel = new javax.swing.JButton(); | |
| 98 | + jBtnRowCpy = new javax.swing.JButton(); | |
| 99 | + jPanelTab2 = new javax.swing.JPanel(); | |
| 100 | + jScrollPaneLabel = new javax.swing.JScrollPane(); | |
| 101 | + jTxtLabel = new javax.swing.JTextArea(); | |
| 102 | + jScrollPane404msg = new javax.swing.JScrollPane(); | |
| 103 | + jTxt404msg = new javax.swing.JTextArea(); | |
| 104 | + jPanelRtn = new javax.swing.JPanel(); | |
| 105 | + jScrollPaneRtn = new javax.swing.JScrollPane(); | |
| 106 | + jTxtRtn = new javax.swing.JTextArea(); | |
| 107 | + jMenuBar1 = new javax.swing.JMenuBar(); | |
| 108 | + jMenu1 = new javax.swing.JMenu(); | |
| 109 | + jMenuLoad = new javax.swing.JMenuItem(); | |
| 110 | + jMenuSave = new javax.swing.JMenuItem(); | |
| 111 | + jMenu3 = new javax.swing.JMenu(); | |
| 112 | + jMenuItem1 = new javax.swing.JMenuItem(); | |
| 113 | + jMenu2 = new javax.swing.JMenu(); | |
| 114 | + | |
| 115 | + jFileChooser1.setCurrentDirectory(null); | |
| 116 | + jFileChooser1.setDialogTitle(""); | |
| 117 | + | |
| 118 | + jRadioButton1.setText("jRadioButton1"); | |
| 119 | + | |
| 120 | + setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); | |
| 121 | + setTitle("タグ検索"); | |
| 122 | + | |
| 123 | + jLabel1.setText(" URL:"); | |
| 124 | + | |
| 125 | + jBtnSearch.setText("検索"); | |
| 126 | + jBtnSearch.addActionListener(new java.awt.event.ActionListener() { | |
| 127 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 128 | + jBtnSearchActionPerformed(evt); | |
| 129 | + } | |
| 130 | + }); | |
| 131 | + | |
| 132 | + jPanelTab1.setBorder(javax.swing.BorderFactory.createTitledBorder("検索情報")); | |
| 133 | + | |
| 134 | + jTable1.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION); | |
| 135 | + jTable1.getTableHeader().setReorderingAllowed(false); | |
| 136 | + | |
| 137 | + org.jdesktop.beansbinding.ELProperty eLProperty = org.jdesktop.beansbinding.ELProperty.create("${serachDataList}"); | |
| 138 | + org.jdesktop.swingbinding.JTableBinding jTableBinding = org.jdesktop.swingbinding.SwingBindings.createJTableBinding(org.jdesktop.beansbinding.AutoBinding.UpdateStrategy.READ_WRITE, this, eLProperty, jTable1); | |
| 139 | + org.jdesktop.swingbinding.JTableBinding.ColumnBinding columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${item}")); | |
| 140 | + columnBinding.setColumnName("項目名"); | |
| 141 | + columnBinding.setColumnClass(String.class); | |
| 142 | + columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${htmltag}")); | |
| 143 | + columnBinding.setColumnName("タグ"); | |
| 144 | + columnBinding.setColumnClass(String.class); | |
| 145 | + columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${htmlid}")); | |
| 146 | + columnBinding.setColumnName("ID"); | |
| 147 | + columnBinding.setColumnClass(String.class); | |
| 148 | + columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${htmlclass}")); | |
| 149 | + columnBinding.setColumnName("クラス"); | |
| 150 | + columnBinding.setColumnClass(String.class); | |
| 151 | + columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${around}")); | |
| 152 | + columnBinding.setColumnName("位置"); | |
| 153 | + columnBinding.setColumnClass(String.class); | |
| 154 | + columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${regexp}")); | |
| 155 | + columnBinding.setColumnName("抽出条件"); | |
| 156 | + columnBinding.setColumnClass(String.class); | |
| 157 | + bindingGroup.addBinding(jTableBinding); | |
| 158 | + jTableBinding.bind(); | |
| 159 | + jScrollPane1.setViewportView(jTable1); | |
| 160 | + | |
| 161 | + jBtnRowIns.setText("行挿入"); | |
| 162 | + jBtnRowIns.addActionListener(new java.awt.event.ActionListener() { | |
| 163 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 164 | + jBtnRowInsActionPerformed(evt); | |
| 165 | + } | |
| 166 | + }); | |
| 167 | + | |
| 168 | + jBtnRowDel.setText("行削除"); | |
| 169 | + jBtnRowDel.addActionListener(new java.awt.event.ActionListener() { | |
| 170 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 171 | + jBtnRowDelActionPerformed(evt); | |
| 172 | + } | |
| 173 | + }); | |
| 174 | + | |
| 175 | + jBtnRowCpy.setText("行コピー"); | |
| 176 | + jBtnRowCpy.addActionListener(new java.awt.event.ActionListener() { | |
| 177 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 178 | + jBtnRowCpyActionPerformed(evt); | |
| 179 | + } | |
| 180 | + }); | |
| 181 | + | |
| 182 | + javax.swing.GroupLayout jPanelTab1Layout = new javax.swing.GroupLayout(jPanelTab1); | |
| 183 | + jPanelTab1.setLayout(jPanelTab1Layout); | |
| 184 | + jPanelTab1Layout.setHorizontalGroup( | |
| 185 | + jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 186 | + .addGroup(jPanelTab1Layout.createSequentialGroup() | |
| 187 | + .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
| 188 | + .addComponent(jBtnRowCpy) | |
| 189 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 190 | + .addComponent(jBtnRowDel) | |
| 191 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 192 | + .addComponent(jBtnRowIns)) | |
| 193 | + .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 0, Short.MAX_VALUE) | |
| 194 | + ); | |
| 195 | + jPanelTab1Layout.setVerticalGroup( | |
| 196 | + jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 197 | + .addGroup(jPanelTab1Layout.createSequentialGroup() | |
| 198 | + .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 173, Short.MAX_VALUE) | |
| 199 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 200 | + .addGroup(jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) | |
| 201 | + .addComponent(jBtnRowDel) | |
| 202 | + .addComponent(jBtnRowIns) | |
| 203 | + .addComponent(jBtnRowCpy))) | |
| 204 | + ); | |
| 205 | + | |
| 206 | + jTabbedPane1.addTab("キー設定", jPanelTab1); | |
| 207 | + | |
| 208 | + jPanelTab2.setBorder(javax.swing.BorderFactory.createTitledBorder("メッセージ")); | |
| 209 | + | |
| 210 | + jScrollPaneLabel.setHorizontalScrollBarPolicy(javax.swing.ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER); | |
| 211 | + jScrollPaneLabel.setVerticalScrollBarPolicy(javax.swing.ScrollPaneConstants.VERTICAL_SCROLLBAR_NEVER); | |
| 212 | + | |
| 213 | + jTxtLabel.setEditable(false); | |
| 214 | + jTxtLabel.setBackground(java.awt.Color.lightGray); | |
| 215 | + jTxtLabel.setColumns(20); | |
| 216 | + jTxtLabel.setFont(new java.awt.Font("MS UI Gothic", 0, 12)); // NOI18N | |
| 217 | + jTxtLabel.setLineWrap(true); | |
| 218 | + jTxtLabel.setRows(2); | |
| 219 | + jTxtLabel.setText("取得ページに以下のメッセージが含まれていた場合、対象データが取得出来なかったと通知します。"); | |
| 220 | + jTxtLabel.setAutoscrolls(false); | |
| 221 | + jTxtLabel.setBorder(null); | |
| 222 | + jTxtLabel.setCursor(new java.awt.Cursor(java.awt.Cursor.DEFAULT_CURSOR)); | |
| 223 | + jTxtLabel.setFocusable(false); | |
| 224 | + jTxtLabel.setHighlighter(null); | |
| 225 | + jTxtLabel.setKeymap(null); | |
| 226 | + jTxtLabel.setOpaque(false); | |
| 227 | + jTxtLabel.setRequestFocusEnabled(false); | |
| 228 | + jTxtLabel.setVerifyInputWhenFocusTarget(false); | |
| 229 | + jScrollPaneLabel.setViewportView(jTxtLabel); | |
| 230 | + | |
| 231 | + jTxt404msg.setColumns(20); | |
| 232 | + jTxt404msg.setRows(3); | |
| 233 | + jTxt404msg.setText("一致する銘柄は見つかりませんでした\n"); | |
| 234 | + jScrollPane404msg.setViewportView(jTxt404msg); | |
| 235 | + | |
| 236 | + javax.swing.GroupLayout jPanelTab2Layout = new javax.swing.GroupLayout(jPanelTab2); | |
| 237 | + jPanelTab2.setLayout(jPanelTab2Layout); | |
| 238 | + jPanelTab2Layout.setHorizontalGroup( | |
| 239 | + jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 240 | + .addComponent(jScrollPane404msg) | |
| 241 | + .addGroup(javax.swing.GroupLayout.Alignment.TRAILING, jPanelTab2Layout.createSequentialGroup() | |
| 242 | + .addContainerGap() | |
| 243 | + .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.DEFAULT_SIZE, 359, Short.MAX_VALUE) | |
| 244 | + .addContainerGap()) | |
| 245 | + ); | |
| 246 | + jPanelTab2Layout.setVerticalGroup( | |
| 247 | + jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 248 | + .addGroup(jPanelTab2Layout.createSequentialGroup() | |
| 249 | + .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.PREFERRED_SIZE, 38, javax.swing.GroupLayout.PREFERRED_SIZE) | |
| 250 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) | |
| 251 | + .addComponent(jScrollPane404msg)) | |
| 252 | + ); | |
| 253 | + | |
| 254 | + jTabbedPane1.addTab("結果無し判定", jPanelTab2); | |
| 255 | + | |
| 256 | + jPanelRtn.setBorder(javax.swing.BorderFactory.createTitledBorder("検索結果")); | |
| 257 | + | |
| 258 | + jTxtRtn.setColumns(20); | |
| 259 | + jTxtRtn.setRows(5); | |
| 260 | + jScrollPaneRtn.setViewportView(jTxtRtn); | |
| 261 | + | |
| 262 | + javax.swing.GroupLayout jPanelRtnLayout = new javax.swing.GroupLayout(jPanelRtn); | |
| 263 | + jPanelRtn.setLayout(jPanelRtnLayout); | |
| 264 | + jPanelRtnLayout.setHorizontalGroup( | |
| 265 | + jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 266 | + .addComponent(jScrollPaneRtn) | |
| 267 | + ); | |
| 268 | + jPanelRtnLayout.setVerticalGroup( | |
| 269 | + jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 270 | + .addComponent(jScrollPaneRtn, javax.swing.GroupLayout.DEFAULT_SIZE, 163, Short.MAX_VALUE) | |
| 271 | + ); | |
| 272 | + | |
| 273 | + jMenu1.setText("ファイル"); | |
| 274 | + | |
| 275 | + jMenuLoad.setText("LOAD"); | |
| 276 | + jMenuLoad.addActionListener(new java.awt.event.ActionListener() { | |
| 277 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 278 | + jMenuLoadActionPerformed(evt); | |
| 279 | + } | |
| 280 | + }); | |
| 281 | + jMenu1.add(jMenuLoad); | |
| 282 | + | |
| 283 | + jMenuSave.setText("SAVE"); | |
| 284 | + jMenuSave.addActionListener(new java.awt.event.ActionListener() { | |
| 285 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 286 | + jMenuSaveActionPerformed(evt); | |
| 287 | + } | |
| 288 | + }); | |
| 289 | + jMenu1.add(jMenuSave); | |
| 290 | + | |
| 291 | + jMenuBar1.add(jMenu1); | |
| 292 | + | |
| 293 | + jMenu3.setText("ツール"); | |
| 294 | + | |
| 295 | + jMenuItem1.setText("ブラウザで表示"); | |
| 296 | + jMenuItem1.addActionListener(new java.awt.event.ActionListener() { | |
| 297 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
| 298 | + jMenuItem1ActionPerformed(evt); | |
| 299 | + } | |
| 300 | + }); | |
| 301 | + jMenu3.add(jMenuItem1); | |
| 302 | + | |
| 303 | + jMenuBar1.add(jMenu3); | |
| 304 | + | |
| 305 | + jMenu2.setText("検索"); | |
| 306 | + jMenu2.addMouseListener(new java.awt.event.MouseAdapter() { | |
| 307 | + public void mouseClicked(java.awt.event.MouseEvent evt) { | |
| 308 | + jMenu2MouseClicked(evt); | |
| 309 | + } | |
| 310 | + }); | |
| 311 | + jMenuBar1.add(jMenu2); | |
| 312 | + | |
| 313 | + setJMenuBar(jMenuBar1); | |
| 314 | + | |
| 315 | + javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane()); | |
| 316 | + getContentPane().setLayout(layout); | |
| 317 | + layout.setHorizontalGroup( | |
| 318 | + layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 319 | + .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
| 320 | + .addGroup(layout.createSequentialGroup() | |
| 321 | + .addComponent(jLabel1) | |
| 322 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 323 | + .addComponent(jTxtUrl) | |
| 324 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 325 | + .addComponent(jBtnSearch)) | |
| 326 | + .addComponent(jTabbedPane1) | |
| 327 | + ); | |
| 328 | + layout.setVerticalGroup( | |
| 329 | + layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
| 330 | + .addGroup(layout.createSequentialGroup() | |
| 331 | + .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) | |
| 332 | + .addComponent(jLabel1) | |
| 333 | + .addComponent(jTxtUrl, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE) | |
| 334 | + .addComponent(jBtnSearch)) | |
| 335 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 336 | + .addComponent(jTabbedPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 250, javax.swing.GroupLayout.PREFERRED_SIZE) | |
| 337 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
| 338 | + .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
| 339 | + .addContainerGap()) | |
| 340 | + ); | |
| 341 | + | |
| 342 | + bindingGroup.bind(); | |
| 343 | + | |
| 344 | + pack(); | |
| 345 | + }// </editor-fold>//GEN-END:initComponents | |
| 346 | + | |
| 347 | + private void jBtnRowInsActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowInsActionPerformed | |
| 348 | + int SelectedRow = jTable1.getSelectedRow(); | |
| 349 | + SearchData sdat = new SearchData(); | |
| 350 | + | |
| 351 | + if(SelectedRow >= 0) { | |
| 352 | + this.serachDataList.add(SelectedRow, sdat); | |
| 353 | + } else { | |
| 354 | + this.serachDataList.add(sdat); | |
| 355 | + } | |
| 356 | + }//GEN-LAST:event_jBtnRowInsActionPerformed | |
| 357 | + | |
| 358 | + private void jBtnRowDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowDelActionPerformed | |
| 359 | + int SelectedRow = jTable1.getSelectedRow(); | |
| 360 | + if(!(SelectedRow < 0)) { | |
| 361 | + this.serachDataList.remove(SelectedRow); | |
| 362 | + } | |
| 363 | + }//GEN-LAST:event_jBtnRowDelActionPerformed | |
| 364 | + | |
| 365 | + private void jMenuLoadActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuLoadActionPerformed | |
| 366 | + jFileChooser1.setDialogTitle("読込"); | |
| 367 | + int selected = jFileChooser1.showOpenDialog(this); | |
| 368 | + if (selected == JFileChooser.APPROVE_OPTION) { | |
| 369 | + File file = jFileChooser1.getSelectedFile(); | |
| 370 | + serachDataList.clear(); | |
| 371 | + sio.load(file); | |
| 372 | + jTxtUrl.setText(sio.geturl()); | |
| 373 | + serachDataList.addAll(sio.getslist()); | |
| 374 | + } | |
| 375 | + }//GEN-LAST:event_jMenuLoadActionPerformed | |
| 376 | + | |
| 377 | + private void jMenuSaveActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuSaveActionPerformed | |
| 378 | + jFileChooser1.setDialogTitle("保存"); | |
| 379 | + int selected = jFileChooser1.showSaveDialog(this); | |
| 380 | + if (selected == JFileChooser.APPROVE_OPTION) { | |
| 381 | + File file = jFileChooser1.getSelectedFile(); | |
| 382 | + sio.seturl(jTxtUrl.getText()); | |
| 383 | + sio.setslist(slist); | |
| 384 | + sio.save(file); | |
| 385 | + } | |
| 386 | + }//GEN-LAST:event_jMenuSaveActionPerformed | |
| 387 | + | |
| 388 | + private void jMenu2MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jMenu2MouseClicked | |
| 389 | + Search_execution(); | |
| 390 | + }//GEN-LAST:event_jMenu2MouseClicked | |
| 391 | + | |
| 392 | + private void jBtnRowCpyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowCpyActionPerformed | |
| 393 | + int SelectedRow = jTable1.getSelectedRow(); | |
| 394 | + if(SelectedRow >= 0) { | |
| 395 | + SearchData SelectData = (SearchData)slist.get(SelectedRow); | |
| 396 | + SearchData Cpydata = new SearchData(SelectData); | |
| 397 | + this.serachDataList.add(SelectedRow, Cpydata); | |
| 398 | + } | |
| 399 | + }//GEN-LAST:event_jBtnRowCpyActionPerformed | |
| 400 | + | |
| 401 | + private void jBtnSearchActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnSearchActionPerformed | |
| 402 | + Search_execution(); | |
| 403 | + }//GEN-LAST:event_jBtnSearchActionPerformed | |
| 404 | + | |
| 405 | + private void jMenuItem1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuItem1ActionPerformed | |
| 406 | + Desktop desktop = Desktop.getDesktop(); | |
| 407 | + String uriString = jTxtUrl.getText(); | |
| 408 | + try { | |
| 409 | + URI uri = new URI(uriString); | |
| 410 | + desktop.browse(uri); | |
| 411 | + | |
| 412 | + } catch (URISyntaxException | IOException ex) { | |
| 413 | + Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex); | |
| 414 | + } | |
| 415 | + }//GEN-LAST:event_jMenuItem1ActionPerformed | |
| 416 | + | |
| 417 | + /** | |
| 418 | + * 検索実行. | |
| 419 | + */ | |
| 420 | + void Search_execution() { | |
| 421 | + jTxtRtn.setText(null); | |
| 422 | + HtmlParser par = new HtmlParser(jTxtUrl.getText()); | |
| 423 | + | |
| 424 | + // データ無し(404)判定 | |
| 425 | + String strdata = par.getStringPageData(); | |
| 426 | + String text = jTxt404msg.getText(); | |
| 427 | + String[] strsearch = text.split("\n"); | |
| 428 | + for(String strsearch1 : strsearch) { | |
| 429 | + if(strdata.contains(strsearch1)) { | |
| 430 | + jTxtRtn.append(strsearch1); | |
| 431 | + return; | |
| 432 | + } | |
| 433 | + } | |
| 434 | + | |
| 435 | + // 検索結果 | |
| 436 | + for (Object slist1 : slist) { | |
| 437 | + SearchData sdata = (SearchData)slist1; | |
| 438 | + String ans = sdata.getitem(); | |
| 439 | + String rtn = par.search(sdata); | |
| 440 | + jTxtRtn.append(ans + "\t" + rtn + "\r\n"); | |
| 441 | + } | |
| 442 | + jTxtRtn.setCaretPosition(0); | |
| 443 | + } | |
| 444 | + | |
| 445 | + /** | |
| 446 | + * @param args the command line arguments | |
| 447 | + */ | |
| 448 | + public static void main(String args[]) { | |
| 449 | + /* Set the Nimbus look and feel */ | |
| 450 | + //<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) "> | |
| 451 | + /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel. | |
| 452 | + * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html | |
| 453 | + */ | |
| 454 | + try { | |
| 455 | + for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) { | |
| 456 | + if ("Nimbus".equals(info.getName())) { | |
| 457 | + javax.swing.UIManager.setLookAndFeel(info.getClassName()); | |
| 458 | + break; | |
| 459 | + } | |
| 460 | + } | |
| 461 | + } catch (ClassNotFoundException | |
| 462 | + | InstantiationException | |
| 463 | + | IllegalAccessException | |
| 464 | + | javax.swing.UnsupportedLookAndFeelException ex) { | |
| 465 | + java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); | |
| 466 | + } | |
| 467 | + //</editor-fold> | |
| 468 | + | |
| 469 | + /* Create and display the form */ | |
| 470 | + java.awt.EventQueue.invokeLater(new Runnable() { | |
| 471 | + @Override | |
| 472 | + public void run() { | |
| 473 | + new HtmlSearch().setVisible(true); | |
| 474 | + } | |
| 475 | + }); | |
| 476 | + } | |
| 477 | + | |
| 478 | + // Variables declaration - do not modify//GEN-BEGIN:variables | |
| 479 | + private javax.swing.JButton jBtnRowCpy; | |
| 480 | + private javax.swing.JButton jBtnRowDel; | |
| 481 | + private javax.swing.JButton jBtnRowIns; | |
| 482 | + private javax.swing.JButton jBtnSearch; | |
| 483 | + private javax.swing.JFileChooser jFileChooser1; | |
| 484 | + private javax.swing.JLabel jLabel1; | |
| 485 | + private javax.swing.JMenu jMenu1; | |
| 486 | + private javax.swing.JMenu jMenu2; | |
| 487 | + private javax.swing.JMenu jMenu3; | |
| 488 | + private javax.swing.JMenuBar jMenuBar1; | |
| 489 | + private javax.swing.JMenuItem jMenuItem1; | |
| 490 | + private javax.swing.JMenuItem jMenuLoad; | |
| 491 | + private javax.swing.JMenuItem jMenuSave; | |
| 492 | + private javax.swing.JPanel jPanelRtn; | |
| 493 | + private javax.swing.JPanel jPanelTab1; | |
| 494 | + private javax.swing.JPanel jPanelTab2; | |
| 495 | + private javax.swing.JRadioButton jRadioButton1; | |
| 496 | + private javax.swing.JScrollPane jScrollPane1; | |
| 497 | + private javax.swing.JScrollPane jScrollPane404msg; | |
| 498 | + private javax.swing.JScrollPane jScrollPaneLabel; | |
| 499 | + private javax.swing.JScrollPane jScrollPaneRtn; | |
| 500 | + private javax.swing.JTabbedPane jTabbedPane1; | |
| 501 | + private javax.swing.JTable jTable1; | |
| 502 | + private javax.swing.JTextArea jTxt404msg; | |
| 503 | + private javax.swing.JTextArea jTxtLabel; | |
| 504 | + private javax.swing.JTextArea jTxtRtn; | |
| 505 | + private javax.swing.JTextField jTxtUrl; | |
| 506 | + private org.jdesktop.beansbinding.BindingGroup bindingGroup; | |
| 507 | + // End of variables declaration//GEN-END:variables | |
| 508 | +} |
| @@ -0,0 +1,163 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.core; | |
| 24 | + | |
| 25 | +import java.util.ArrayList; | |
| 26 | +import java.util.Enumeration; | |
| 27 | +import javax.swing.text.MutableAttributeSet; | |
| 28 | +import javax.swing.text.html.HTML; | |
| 29 | + | |
| 30 | +/** | |
| 31 | + * HTMLタグの属性情報を保持する. | |
| 32 | + * @author kgto | |
| 33 | + */ | |
| 34 | +public class AttributeData { | |
| 35 | + | |
| 36 | + public AttributeData() { | |
| 37 | + AttrList = new ArrayList(); | |
| 38 | + size = 0; | |
| 39 | + } | |
| 40 | + | |
| 41 | + /** | |
| 42 | + * 属性情報追加. | |
| 43 | + * @param tag | |
| 44 | + * @param attr | |
| 45 | + */ | |
| 46 | + public void add(HTML.Tag tag, MutableAttributeSet attr) { | |
| 47 | + | |
| 48 | + int tagcount = tagcnt(tag); | |
| 49 | + ++tagcount; | |
| 50 | + | |
| 51 | + Enumeration e = attr.getAttributeNames(); | |
| 52 | + while(e.hasMoreElements()) { | |
| 53 | + Object obj = e.nextElement(); | |
| 54 | + | |
| 55 | + AttrData a = new AttrData(); | |
| 56 | + a.tag = tag; | |
| 57 | + a.count = tagcount; | |
| 58 | + a.attrname = obj.toString(); | |
| 59 | + a.attrvalue = attr.getAttribute(obj).toString(); | |
| 60 | + | |
| 61 | + AttrList.add(a); | |
| 62 | + size = AttrList.size(); | |
| 63 | + } | |
| 64 | + | |
| 65 | + } | |
| 66 | + | |
| 67 | + /** | |
| 68 | + * 属性情報検索. | |
| 69 | + * @param tag | |
| 70 | + * @param attrname | |
| 71 | + * @param attrvalue | |
| 72 | + * @return | |
| 73 | + */ | |
| 74 | + public boolean search(HTML.Tag tag, String attrname, String attrvalue) { | |
| 75 | + boolean ret = false; | |
| 76 | + for (Object AttrList1 : AttrList) { | |
| 77 | + AttrData a = (AttrData)AttrList1; | |
| 78 | + if(a.tag == tag) { | |
| 79 | + if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) { | |
| 80 | + ret = true; | |
| 81 | + } | |
| 82 | + } | |
| 83 | + } | |
| 84 | + return ret; | |
| 85 | + } | |
| 86 | + | |
| 87 | + public boolean searchId(HTML.Tag tag, String attrvalue) { | |
| 88 | + return search(tag, "id", attrvalue); | |
| 89 | + } | |
| 90 | + | |
| 91 | + public boolean searchClass(HTML.Tag tag, String attrvalue) { | |
| 92 | + return search(tag, "class", attrvalue); | |
| 93 | + } | |
| 94 | + | |
| 95 | + /** | |
| 96 | + * 属性の値を取得する. | |
| 97 | + * @param tag | |
| 98 | + * @param attrname | |
| 99 | + * @return | |
| 100 | + */ | |
| 101 | + public ArrayList getvale(HTML.Tag tag, String attrname) { | |
| 102 | + ArrayList ret = new ArrayList(); | |
| 103 | + for (Object AttrList1 : AttrList) { | |
| 104 | + AttrData a = (AttrData)AttrList1; | |
| 105 | + if(a.tag == tag) { | |
| 106 | + if(a.attrname.equals(attrname)) { | |
| 107 | + ret.add(a.attrvalue); | |
| 108 | + } | |
| 109 | + } | |
| 110 | + } | |
| 111 | + return ret; | |
| 112 | + } | |
| 113 | + | |
| 114 | + /** | |
| 115 | + * 引数で渡されたTAGの最新カウント数を返す. | |
| 116 | + * @param tag | |
| 117 | + * @return | |
| 118 | + */ | |
| 119 | + private int tagcnt(HTML.Tag tag) { | |
| 120 | + int wkcnt = 0; | |
| 121 | + for (Object AttrList1 : AttrList) { | |
| 122 | + AttrData a = (AttrData)AttrList1; | |
| 123 | + if(a.tag == tag) { | |
| 124 | + if(wkcnt < a.count) { | |
| 125 | + wkcnt = a.count; | |
| 126 | + } | |
| 127 | + } | |
| 128 | + } | |
| 129 | + return wkcnt; | |
| 130 | + } | |
| 131 | + | |
| 132 | + // AttrList の内容を返すメソッド | |
| 133 | + public HTML.Tag gettag(int i) { | |
| 134 | + AttrData a = (AttrData)AttrList.get(i); | |
| 135 | + return a.tag; | |
| 136 | + } | |
| 137 | + | |
| 138 | + public int getcount(int i) { | |
| 139 | + AttrData a = (AttrData)AttrList.get(i); | |
| 140 | + return a.count; | |
| 141 | + } | |
| 142 | + | |
| 143 | + public String getattrname(int i) { | |
| 144 | + AttrData a = (AttrData)AttrList.get(i); | |
| 145 | + return a.attrname; | |
| 146 | + } | |
| 147 | + | |
| 148 | + public String getattrvalue(int i) { | |
| 149 | + AttrData a = (AttrData)AttrList.get(i); | |
| 150 | + return a.attrvalue; | |
| 151 | + } | |
| 152 | + | |
| 153 | + // フィールド変数 | |
| 154 | + public class AttrData { | |
| 155 | + public HTML.Tag tag; | |
| 156 | + public int count; | |
| 157 | + public String attrname; | |
| 158 | + public String attrvalue; | |
| 159 | + } | |
| 160 | + public ArrayList AttrList; | |
| 161 | + public int size; // AttrListのサイズ | |
| 162 | + | |
| 163 | +} |
| @@ -0,0 +1,211 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.core; | |
| 24 | + | |
| 25 | +import java.util.ArrayList; | |
| 26 | +import java.util.HashMap; | |
| 27 | +import javax.swing.text.MutableAttributeSet; | |
| 28 | +import javax.swing.text.html.HTML; | |
| 29 | +import javax.swing.text.html.HTMLEditorKit; | |
| 30 | + | |
| 31 | +/** | |
| 32 | + * HTMLパーサ部品. | |
| 33 | + * @author kgto | |
| 34 | + */ | |
| 35 | +class HtmlParserCallback extends HTMLEditorKit.ParserCallback { | |
| 36 | + | |
| 37 | + // Tag毎の階層 | |
| 38 | + HashMap<HTML.Tag,Integer> tagMap = new HashMap<>(); | |
| 39 | + | |
| 40 | + // serach key 情報 | |
| 41 | + String keytag; | |
| 42 | + String keyid; | |
| 43 | + String keyclass; | |
| 44 | + | |
| 45 | + // serach key と一致時の情報退避 | |
| 46 | + int bufCount = 0; | |
| 47 | + HTML.Tag bufTag = null; | |
| 48 | + // serach key と一致時の情報格納ワーク | |
| 49 | + StringBuilder bufText; | |
| 50 | + | |
| 51 | + // serach key と一致時のデータ一覧 | |
| 52 | + ArrayList sData; | |
| 53 | + | |
| 54 | + // 属性データ | |
| 55 | + AttributeData attrdata; | |
| 56 | + | |
| 57 | + protected HtmlParserCallback(SearchData skey) { | |
| 58 | + | |
| 59 | + // キー情報展開 | |
| 60 | + keytag = skey.getHtmltag(); | |
| 61 | + keyid = skey.getHtmlid(); | |
| 62 | + keyclass = skey.getHtmlclass(); | |
| 63 | + | |
| 64 | + sData = new ArrayList(); | |
| 65 | + } | |
| 66 | + | |
| 67 | + ArrayList getrtnData() { | |
| 68 | + return this.sData; | |
| 69 | + } | |
| 70 | + | |
| 71 | + @Override | |
| 72 | + public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
| 73 | + // Tag毎の階層を保持 | |
| 74 | + int count = 1; | |
| 75 | + if(tagMap.containsKey(tag)) { | |
| 76 | + count = tagMap.get(tag); | |
| 77 | + count++; | |
| 78 | + } | |
| 79 | + tagMap.put(tag, count); | |
| 80 | + | |
| 81 | + // 属性解析 | |
| 82 | + AttributeData handleStartattrdata = new AttributeData(); | |
| 83 | + handleStartattrdata.add(tag, attr); | |
| 84 | + | |
| 85 | + DebugProcess.htmlinfo(tag, attr, "handleStartTag", count); | |
| 86 | + | |
| 87 | + if(bufCount == 0) { | |
| 88 | + if(tag.toString().equals(keytag)) { | |
| 89 | + //if(serachAttribute(attr)) { | |
| 90 | + if(serachAttribute(tag, handleStartattrdata)) { | |
| 91 | + bufCount = count; | |
| 92 | + bufTag = tag; | |
| 93 | + attrdata = new AttributeData(); | |
| 94 | + bufText = new StringBuilder(); | |
| 95 | + } | |
| 96 | + } | |
| 97 | + } | |
| 98 | + if(bufCount > 0) { | |
| 99 | + attrdata.add(tag, attr); | |
| 100 | + } | |
| 101 | + } | |
| 102 | + | |
| 103 | + @Override | |
| 104 | + public void handleEndTag(HTML.Tag tag, int pos){ | |
| 105 | + // Tag毎の階層を取得 | |
| 106 | + int count = 0; | |
| 107 | + if(tagMap.containsKey(tag)) { | |
| 108 | + count = tagMap.get(tag); | |
| 109 | + } | |
| 110 | + | |
| 111 | + DebugProcess.htmlinfo(tag, null, "handleEndTag", count); | |
| 112 | + | |
| 113 | + if(tag.equals(bufTag) && count <= bufCount) { | |
| 114 | + | |
| 115 | + // 溜め込んだ一致情報をリストへ格納 | |
| 116 | + sData.add(bufText.toString()); | |
| 117 | + | |
| 118 | + // 退避したserach keyとの一致情報クリア | |
| 119 | + bufCount = 0; | |
| 120 | + bufTag = null; | |
| 121 | + bufText = null; | |
| 122 | + } | |
| 123 | + | |
| 124 | + // Tag毎の階層減算 | |
| 125 | + tagMap.put(tag, --count); | |
| 126 | + } | |
| 127 | + | |
| 128 | + @Override | |
| 129 | + public void handleText(char[] data, int pos){ | |
| 130 | + | |
| 131 | + DebugProcess.htmlinfo(data, "handleText"); | |
| 132 | + | |
| 133 | + String splitchar = "\t"; | |
| 134 | + //制御文字の削除 | |
| 135 | + // 0xa0 | |
| 136 | + StringBuilder buf = new StringBuilder(); | |
| 137 | + for(int i = 0; i < data.length; i++) { | |
| 138 | + if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) { | |
| 139 | + buf.append(data[i]); | |
| 140 | + } | |
| 141 | + } | |
| 142 | + if(bufCount > 0) { | |
| 143 | + if(bufText.length() > 0) { | |
| 144 | + bufText.append(splitchar); | |
| 145 | + } | |
| 146 | + bufText.append(buf.toString()); | |
| 147 | + } | |
| 148 | + } | |
| 149 | + | |
| 150 | + @Override | |
| 151 | + public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
| 152 | + if(bufCount > 0) { | |
| 153 | + attrdata.add(tag, attr); | |
| 154 | + } | |
| 155 | + DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0); | |
| 156 | + } | |
| 157 | + | |
| 158 | + /** | |
| 159 | + * ページ内のID/CLASS値と検索キーを比較する. | |
| 160 | + * @param attr ページのMutableAttributeSet | |
| 161 | + * @return boolean 検索キーと一致の時、true | |
| 162 | + */ | |
| 163 | + boolean serachAttribute(MutableAttributeSet attr) { | |
| 164 | + String currentID = (String)attr.getAttribute(HTML.Attribute.ID); | |
| 165 | + String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS); | |
| 166 | + | |
| 167 | + if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
| 168 | + if(keyid.equals(currentID) && keyclass.equals(currentClass)) { | |
| 169 | + return true; | |
| 170 | + } | |
| 171 | + } | |
| 172 | + | |
| 173 | + if(keyid.isEmpty() == false) { | |
| 174 | + if(keyid.equals(currentID)) { | |
| 175 | + return true; | |
| 176 | + } | |
| 177 | + } | |
| 178 | + | |
| 179 | + if(keyclass.isEmpty() == false) { | |
| 180 | + if(keyclass.equals(currentClass)) { | |
| 181 | + return true; | |
| 182 | + } | |
| 183 | + } | |
| 184 | + | |
| 185 | + return false; | |
| 186 | + } | |
| 187 | + | |
| 188 | + /** | |
| 189 | + * ページ内のID/CLASS値と検索キーを比較する. | |
| 190 | + * @param tag | |
| 191 | + * @param attrdata | |
| 192 | + * @return boolean 検索キーと一致の時、true | |
| 193 | + */ | |
| 194 | + boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) { | |
| 195 | + // ID と CLASS の両方にキー入力有りの場合 | |
| 196 | + if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
| 197 | + if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) { | |
| 198 | + return true; | |
| 199 | + } | |
| 200 | + } | |
| 201 | + // ID のキーチェック | |
| 202 | + if(keyid.isEmpty() == false) { | |
| 203 | + return attrdata.searchId(tag, keyid); | |
| 204 | + } | |
| 205 | + // CLASS のキーチェック | |
| 206 | + if(keyclass.isEmpty() == false) { | |
| 207 | + return attrdata.searchClass(tag, keyclass); | |
| 208 | + } | |
| 209 | + return false; | |
| 210 | + } | |
| 211 | +} |
| @@ -0,0 +1,113 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.core; | |
| 24 | + | |
| 25 | +/** | |
| 26 | + * 検索データ. | |
| 27 | + * @author kgto | |
| 28 | + */ | |
| 29 | +public class SearchData { | |
| 30 | + | |
| 31 | + private String item; | |
| 32 | + private String htmltag; | |
| 33 | + private String htmlid; | |
| 34 | + private String htmlclass; | |
| 35 | + private String around; | |
| 36 | + private String regexp; | |
| 37 | + | |
| 38 | + public SearchData() { | |
| 39 | + initialize(); | |
| 40 | + } | |
| 41 | + | |
| 42 | + public SearchData(SearchData dat) { | |
| 43 | + this.item = dat.getitem(); | |
| 44 | + this.htmltag = dat.getHtmltag(); | |
| 45 | + this.htmlid = dat.getHtmlid(); | |
| 46 | + this.htmlclass = dat.getHtmlclass(); | |
| 47 | + this.around = dat.getaround(); | |
| 48 | + this.regexp = dat.getregexp(); | |
| 49 | + } | |
| 50 | + | |
| 51 | + /** | |
| 52 | + * データ初期化. | |
| 53 | + */ | |
| 54 | + public final void initialize() { | |
| 55 | + this.item = ""; | |
| 56 | + this.htmltag = ""; | |
| 57 | + this.htmlid = ""; | |
| 58 | + this.htmlclass = ""; | |
| 59 | + this.around = ""; | |
| 60 | + this.regexp = ""; | |
| 61 | + } | |
| 62 | + | |
| 63 | + // Setter | |
| 64 | + public void setitem(String item) { | |
| 65 | + this.item = item; | |
| 66 | + } | |
| 67 | + | |
| 68 | + public void setHtmltag(String htmltag) { | |
| 69 | + this.htmltag = htmltag; | |
| 70 | + } | |
| 71 | + | |
| 72 | + public void setHtmlid(String htmlid) { | |
| 73 | + this.htmlid = htmlid; | |
| 74 | + } | |
| 75 | + | |
| 76 | + public void setHtmlclass(String htmlclass) { | |
| 77 | + this.htmlclass = htmlclass; | |
| 78 | + } | |
| 79 | + | |
| 80 | + public void setaround(String around) { | |
| 81 | + this.around = around; | |
| 82 | + } | |
| 83 | + | |
| 84 | + public void setregexp(String regexp) { | |
| 85 | + this.regexp = regexp; | |
| 86 | + } | |
| 87 | + | |
| 88 | + // Getter | |
| 89 | + public String getitem() { | |
| 90 | + return item; | |
| 91 | + } | |
| 92 | + | |
| 93 | + public String getHtmltag() { | |
| 94 | + return htmltag; | |
| 95 | + } | |
| 96 | + | |
| 97 | + public String getHtmlid() { | |
| 98 | + return htmlid; | |
| 99 | + } | |
| 100 | + | |
| 101 | + public String getHtmlclass() { | |
| 102 | + return htmlclass; | |
| 103 | + } | |
| 104 | + | |
| 105 | + public String getaround() { | |
| 106 | + return around; | |
| 107 | + } | |
| 108 | + | |
| 109 | + public String getregexp() { | |
| 110 | + return regexp; | |
| 111 | + } | |
| 112 | + | |
| 113 | +} |
| @@ -0,0 +1,255 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.core; | |
| 24 | + | |
| 25 | +import java.io.*; | |
| 26 | +import java.net.*; | |
| 27 | +import java.util.ArrayList; | |
| 28 | +import java.util.logging.Level; | |
| 29 | +import java.util.logging.Logger; | |
| 30 | +import java.util.regex.Matcher; | |
| 31 | +import java.util.regex.Pattern; | |
| 32 | +import javax.swing.text.html.parser.ParserDelegator; | |
| 33 | + | |
| 34 | +/** | |
| 35 | + * | |
| 36 | + * @author kgto | |
| 37 | + */ | |
| 38 | +public class HtmlParser { | |
| 39 | + | |
| 40 | + URL url; | |
| 41 | + String pageData; | |
| 42 | + ArrayList sData; | |
| 43 | + | |
| 44 | + // 作業ワーク | |
| 45 | + String htmltag; | |
| 46 | + String htmlid; | |
| 47 | + String htmlclass; | |
| 48 | + | |
| 49 | + public HtmlParser(URL UrlAdress) { | |
| 50 | + DebugProcess.debuglog_set(); | |
| 51 | + this.url = UrlAdress; | |
| 52 | + getPageData(); | |
| 53 | + } | |
| 54 | + | |
| 55 | + public HtmlParser(String UrlAdress) { | |
| 56 | + DebugProcess.debuglog_set(); | |
| 57 | + try { | |
| 58 | + url = new URL(UrlAdress); | |
| 59 | + getPageData(); | |
| 60 | + | |
| 61 | + } catch (MalformedURLException ex) { | |
| 62 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 63 | + } | |
| 64 | + } | |
| 65 | + | |
| 66 | + public HtmlParser() { | |
| 67 | + DebugProcess.debuglog_set(); | |
| 68 | + url = null; | |
| 69 | + } | |
| 70 | + | |
| 71 | + public String getStringPageData() { | |
| 72 | + return pageData; | |
| 73 | + } | |
| 74 | + | |
| 75 | + public void seturl(URL UrlAdress) { | |
| 76 | + this.url = UrlAdress; | |
| 77 | + getPageData(); | |
| 78 | + } | |
| 79 | + | |
| 80 | + public void seturl(String UrlAdress) { | |
| 81 | + try { | |
| 82 | + url = new URL(UrlAdress); | |
| 83 | + getPageData(); | |
| 84 | + | |
| 85 | + } catch (MalformedURLException ex) { | |
| 86 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 87 | + } | |
| 88 | + } | |
| 89 | + | |
| 90 | + /** | |
| 91 | + * HTMLページ内検索. | |
| 92 | + * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、 | |
| 93 | + * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を | |
| 94 | + * 行った結果を返す。<br> | |
| 95 | + * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br> | |
| 96 | + * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br> | |
| 97 | + * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。 | |
| 98 | + * @param skey 検索キーデータ(SearchData) | |
| 99 | + * @return String 検索キーに一致するデータの文字列 | |
| 100 | + */ | |
| 101 | + public String search(SearchData skey) { | |
| 102 | + | |
| 103 | + // htmlページ内を検索 | |
| 104 | + if(isHtmlkeyEq(skey) == false) { | |
| 105 | + searchPageData(skey); | |
| 106 | + } | |
| 107 | + /* | |
| 108 | + around 出現位置指定 入力有り:指定された位置の情報のみ返す。 | |
| 109 | + 入力無し:取得した全ての情報を返す。 | |
| 110 | + */ | |
| 111 | + String regexp = skey.getregexp(); | |
| 112 | + if(skey.getaround().length() > 0) { | |
| 113 | + int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換 | |
| 114 | + if(wkAround < sData.size()) { | |
| 115 | + String str = (String)sData.get(wkAround); | |
| 116 | + String rtn = RegularExpression(str, regexp); | |
| 117 | + return rtn; | |
| 118 | + } | |
| 119 | + } else { | |
| 120 | + StringBuilder strbuf = new StringBuilder(); | |
| 121 | + for (Object sData1 : sData) { | |
| 122 | + String str = (String)sData1; | |
| 123 | + String rtn = RegularExpression(str, regexp); | |
| 124 | + if(strbuf.length() > 0) { | |
| 125 | + strbuf.append("\t"); | |
| 126 | + } | |
| 127 | + strbuf.append(rtn); | |
| 128 | + } | |
| 129 | + return strbuf.toString(); | |
| 130 | + } | |
| 131 | + return null; | |
| 132 | + } | |
| 133 | + | |
| 134 | + /** | |
| 135 | + * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する. | |
| 136 | + * @param skey HTMLタグ/ID/CLASSが格納された検索キー | |
| 137 | + * @return boolean HTMLタグ/ID/CLASS値が一致する時、true | |
| 138 | + */ | |
| 139 | + boolean isHtmlkeyEq(SearchData skey) { | |
| 140 | + | |
| 141 | + String stag = skey.getHtmltag(); | |
| 142 | + String sid = skey.getHtmlid(); | |
| 143 | + String sclass = skey.getHtmlclass(); | |
| 144 | + | |
| 145 | + boolean rtn = true; | |
| 146 | + | |
| 147 | + // htmltag | |
| 148 | + if(htmltag == null) { | |
| 149 | + rtn = false; | |
| 150 | + } else { | |
| 151 | + if(htmltag.equals(stag) == false) { | |
| 152 | + rtn = false; | |
| 153 | + } | |
| 154 | + } | |
| 155 | + | |
| 156 | + // htmlid | |
| 157 | + if(htmlid == null) { | |
| 158 | + rtn = false; | |
| 159 | + } else { | |
| 160 | + if(htmlid.equals(sid) == false) { | |
| 161 | + rtn = false; | |
| 162 | + } | |
| 163 | + } | |
| 164 | + | |
| 165 | + // htmlclass | |
| 166 | + if(htmlclass == null) { | |
| 167 | + rtn = false; | |
| 168 | + } else { | |
| 169 | + if(htmlclass.equals(sclass) == false) { | |
| 170 | + rtn = false; | |
| 171 | + } | |
| 172 | + } | |
| 173 | + | |
| 174 | + if(!rtn) { | |
| 175 | + htmltag = stag; | |
| 176 | + htmlid = sid; | |
| 177 | + htmlclass = sclass; | |
| 178 | + } | |
| 179 | + | |
| 180 | + return rtn; | |
| 181 | + } | |
| 182 | + | |
| 183 | + /** | |
| 184 | + * 正規表現検索. | |
| 185 | + * @param strdata | |
| 186 | + * @param regexp | |
| 187 | + * @return | |
| 188 | + */ | |
| 189 | + String RegularExpression(String strdata, String regexp) { | |
| 190 | + String expdata = null; | |
| 191 | + | |
| 192 | + //regexpのチェック | |
| 193 | + if(regexp.isEmpty()) { | |
| 194 | + expdata = strdata; | |
| 195 | + return expdata; | |
| 196 | + } | |
| 197 | + | |
| 198 | + //正規表現検索 | |
| 199 | + Pattern ptn = Pattern.compile(regexp); | |
| 200 | + Matcher matchdata = ptn.matcher(strdata); | |
| 201 | + if (matchdata.find()) { | |
| 202 | + if(matchdata.groupCount() >= 1) { | |
| 203 | + expdata = matchdata.group(1); | |
| 204 | + } | |
| 205 | + } | |
| 206 | + return expdata; | |
| 207 | + } | |
| 208 | + | |
| 209 | + /** | |
| 210 | + * インターネット接続. | |
| 211 | + */ | |
| 212 | + private void getPageData() { | |
| 213 | + try { | |
| 214 | + //URL url = new URL(UrlAdress); | |
| 215 | + HttpURLConnection con = (HttpURLConnection)url.openConnection(); | |
| 216 | + con.setRequestMethod("GET"); | |
| 217 | + BufferedReader reader = new BufferedReader( | |
| 218 | + new InputStreamReader(con.getInputStream(), "utf-8")); | |
| 219 | + String wkline; | |
| 220 | + StringBuilder sb = new StringBuilder(); | |
| 221 | + while((wkline = reader.readLine()) != null) { | |
| 222 | + sb.append(wkline).append("\n"); | |
| 223 | + } | |
| 224 | + pageData = sb.toString(); | |
| 225 | + | |
| 226 | + con.disconnect(); | |
| 227 | + } | |
| 228 | + catch(IOException ex) { | |
| 229 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 230 | + } | |
| 231 | + } | |
| 232 | + | |
| 233 | + /** | |
| 234 | + * HTMLパーサ. | |
| 235 | + * @param skey | |
| 236 | + */ | |
| 237 | + private void searchPageData(SearchData skey) { | |
| 238 | + | |
| 239 | + DebugProcess.searchDatainfo(skey); | |
| 240 | + | |
| 241 | + Reader reader; | |
| 242 | + try { | |
| 243 | + reader = new BufferedReader(new StringReader(pageData)); | |
| 244 | + HtmlParserCallback cb = new HtmlParserCallback(skey); | |
| 245 | + ParserDelegator pd = new ParserDelegator(); | |
| 246 | + pd.parse(reader, cb, true); | |
| 247 | + reader.close(); | |
| 248 | + | |
| 249 | + sData = cb.getrtnData(); | |
| 250 | + | |
| 251 | + } catch (IOException ex) { | |
| 252 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
| 253 | + } | |
| 254 | + } | |
| 255 | +} |
| @@ -0,0 +1,264 @@ | ||
| 1 | +/* | |
| 2 | + * Copyright (C) 2014 kgto. | |
| 3 | + * | |
| 4 | + * This library is free software; you can redistribute it and/or | |
| 5 | + * modify it under the terms of the GNU Lesser General Public | |
| 6 | + * License as published by the Free Software Foundation; either | |
| 7 | + * version 2.1 of the License, or (at your option) any later version. | |
| 8 | + * | |
| 9 | + * This library is distributed in the hope that it will be useful, | |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | + * Lesser General Public License for more details. | |
| 13 | + * | |
| 14 | + * You should have received a copy of the GNU Lesser General Public | |
| 15 | + * License along with this library; if not, write to the Free Software | |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | + * MA 02110-1301 USA | |
| 18 | + */ | |
| 19 | +/* | |
| 20 | + * $Id$ | |
| 21 | + */ | |
| 22 | + | |
| 23 | +package webScraping.core; | |
| 24 | + | |
| 25 | +import java.io.File; | |
| 26 | +import java.io.FileInputStream; | |
| 27 | +import java.io.FileNotFoundException; | |
| 28 | +import java.io.IOException; | |
| 29 | +import java.util.logging.FileHandler; | |
| 30 | +import java.util.logging.Formatter; | |
| 31 | +import java.util.logging.Handler; | |
| 32 | +import java.util.logging.Level; | |
| 33 | +import java.util.logging.LogManager; | |
| 34 | +import java.util.logging.LogRecord; | |
| 35 | +import java.util.logging.Logger; | |
| 36 | +import javax.swing.text.MutableAttributeSet; | |
| 37 | +import javax.swing.text.html.HTML; | |
| 38 | + | |
| 39 | +/** | |
| 40 | + * デバック情報. | |
| 41 | + * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。 | |
| 42 | + * @author kgto | |
| 43 | + */ | |
| 44 | +public class DebugProcess { | |
| 45 | + // 設定ファイル名 | |
| 46 | + protected static final String configurationFilename = "Debug.prop"; | |
| 47 | + // ロガー名 | |
| 48 | + protected static final Logger logger = Logger.getLogger("WebScraping"); | |
| 49 | + // ログ出力デフォルトレベル | |
| 50 | + protected static final Level loggerlevel = Level.FINEST; | |
| 51 | + | |
| 52 | + | |
| 53 | + /** | |
| 54 | + * ログ出力設定. | |
| 55 | + * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、 | |
| 56 | + * ファイルハンドラの設定と出力書式の設定を行う。 | |
| 57 | + */ | |
| 58 | + public static void debuglog_set() { | |
| 59 | + try { | |
| 60 | + initLogConfiguration(); | |
| 61 | + | |
| 62 | + if(Level.ALL.equals(logger.getLevel())) { | |
| 63 | + //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2)); | |
| 64 | + logger.addHandler(new FileHandler("WebScraping%g.log", true)); | |
| 65 | + } | |
| 66 | + setFomatter(); | |
| 67 | + | |
| 68 | + } catch (IOException | SecurityException ex) { | |
| 69 | + Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 70 | + } | |
| 71 | + } | |
| 72 | + | |
| 73 | + /** | |
| 74 | + * ログ出力設定解除. | |
| 75 | + */ | |
| 76 | + public static void debuglog_unset() { | |
| 77 | + } | |
| 78 | + | |
| 79 | + | |
| 80 | + /** | |
| 81 | + * デバック出力(HTML解析-タグ&属性). | |
| 82 | + * HTMLのタグと属性の解析状態を出力する。 | |
| 83 | + * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br> | |
| 84 | + * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br> | |
| 85 | + * @param tag タグ | |
| 86 | + * @param attr 属性 | |
| 87 | + * @param methodname このメソッドを呼び出した親メソッド名 | |
| 88 | + * @param count HTMLタグの階層レベル | |
| 89 | + */ | |
| 90 | + public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr, | |
| 91 | + String methodname, int count) { | |
| 92 | + | |
| 93 | + // ログ出力レベルチェック | |
| 94 | + if(logger.getLevel() == null) { | |
| 95 | + return; | |
| 96 | + } | |
| 97 | + if(logger.getLevel().intValue() > loggerlevel.intValue()) { | |
| 98 | + return; | |
| 99 | + } | |
| 100 | + | |
| 101 | + // 編集処理 | |
| 102 | + char kbn = ' '; | |
| 103 | + if("handleStartTag".equals(methodname)) { | |
| 104 | + kbn = 'F'; | |
| 105 | + } | |
| 106 | + if("handleEndTag".equals(methodname)) { | |
| 107 | + kbn = 'E'; | |
| 108 | + } | |
| 109 | + if("handleSimpleTag".equals(methodname)) { | |
| 110 | + kbn = 'S'; | |
| 111 | + } | |
| 112 | + | |
| 113 | + StringBuilder strBuf = new StringBuilder(80); | |
| 114 | + strBuf.append(count).append(" : "); | |
| 115 | + strBuf.append(kbn).append(" : "); | |
| 116 | + strBuf.append(tag.toString()); | |
| 117 | + // 属性情報 | |
| 118 | + if(attr != null) { | |
| 119 | + if(attr.getAttributeCount() != 0) { | |
| 120 | + AttributeData handleAttrData = new AttributeData(); | |
| 121 | + handleAttrData.add(tag, attr); | |
| 122 | + for(int i = 0; i < handleAttrData.size; i++) { | |
| 123 | + strBuf.append(" ["); | |
| 124 | + strBuf.append(handleAttrData.getattrname(i)); | |
| 125 | + strBuf.append("]"); | |
| 126 | + strBuf.append(handleAttrData.getcount(i)); | |
| 127 | + strBuf.append(" = "); | |
| 128 | + strBuf.append(handleAttrData.getattrvalue(i)); | |
| 129 | + } | |
| 130 | + } | |
| 131 | + } | |
| 132 | + | |
| 133 | + logger.log(loggerlevel, strBuf.toString()); | |
| 134 | + } | |
| 135 | + | |
| 136 | + /** | |
| 137 | + * デバック出力(メッセージ). | |
| 138 | + * 引数に渡された任意のメッセージを出力する。 | |
| 139 | + * @param str メッセージ | |
| 140 | + * @param methodname このメソッドを呼び出した親メソッド名 | |
| 141 | + */ | |
| 142 | + public static void htmlinfo(String str, String methodname) { | |
| 143 | + logger.log(loggerlevel, str); | |
| 144 | + } | |
| 145 | + | |
| 146 | + public static void htmlinfo(String str) { | |
| 147 | + logger.log(loggerlevel, str); | |
| 148 | + } | |
| 149 | + | |
| 150 | + /** | |
| 151 | + * デバック出力(HTML解析-本文). | |
| 152 | + * 本文の内容を出力する。 | |
| 153 | + * @param data 本文(HTML内の文字列) | |
| 154 | + * @param methodname このメソッドを呼び出した親メソッド名 | |
| 155 | + */ | |
| 156 | + public static void htmlinfo(char[] data, String methodname) { | |
| 157 | + String dat = new String(data); | |
| 158 | + logger.log(loggerlevel, dat); | |
| 159 | + } | |
| 160 | + | |
| 161 | + public static void htmlinfo(char[] data) { | |
| 162 | + String dat = new String(data); | |
| 163 | + logger.log(loggerlevel, dat); | |
| 164 | + } | |
| 165 | + | |
| 166 | + /** | |
| 167 | + * デバック出力(検索キー). | |
| 168 | + * 検索キー(SearchData)の内容を出力する。 | |
| 169 | + * @param skey | |
| 170 | + */ | |
| 171 | + public static void searchDatainfo(SearchData skey) { | |
| 172 | + | |
| 173 | + StringBuilder strBuf = new StringBuilder(30); | |
| 174 | + strBuf.append("SearchData KEY tag["); | |
| 175 | + strBuf.append(skey.getHtmltag()); | |
| 176 | + strBuf.append("] ID["); | |
| 177 | + strBuf.append(skey.getHtmlid()); | |
| 178 | + strBuf.append("] CLASS["); | |
| 179 | + strBuf.append(skey.getHtmlclass()); | |
| 180 | + strBuf.append("]\n"); | |
| 181 | + | |
| 182 | + logger.log(loggerlevel, strBuf.toString()); | |
| 183 | + } | |
| 184 | + | |
| 185 | + /** | |
| 186 | + * ログ出力設定ファイルチェック. | |
| 187 | + * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。 | |
| 188 | + */ | |
| 189 | + private static void initLogConfiguration() { | |
| 190 | + | |
| 191 | + File file = new File(configurationFilename); | |
| 192 | + try { | |
| 193 | + if(file.exists()) { | |
| 194 | + FileInputStream inputStream = new FileInputStream(file); | |
| 195 | + // 設定ファイルの読み込み | |
| 196 | + LogManager.getLogManager().readConfiguration(inputStream); | |
| 197 | + } | |
| 198 | + | |
| 199 | + } catch (FileNotFoundException ex) { | |
| 200 | + Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 201 | + } catch (IOException ex) { | |
| 202 | + Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
| 203 | + } | |
| 204 | + } | |
| 205 | + | |
| 206 | + /** | |
| 207 | + * ログ出力フォーマッター設定. | |
| 208 | + * ファイルへログ出力時の書式を設定する。 | |
| 209 | + */ | |
| 210 | + private static void setFomatter() { | |
| 211 | + Handler[] handlers = logger.getHandlers(); | |
| 212 | + for(int i = 0 ; i < handlers.length ; i++) { | |
| 213 | + if(handlers[i] instanceof java.util.logging.FileHandler) { | |
| 214 | + handlers[i].setFormatter(new HtmlFormatter()); | |
| 215 | + } | |
| 216 | + } | |
| 217 | + } | |
| 218 | + | |
| 219 | +} | |
| 220 | + | |
| 221 | +/** | |
| 222 | + * ログ出力フォーマッター. | |
| 223 | + * @author kgto | |
| 224 | + */ | |
| 225 | +class HtmlFormatter extends Formatter { | |
| 226 | + /** | |
| 227 | + * Logの出力文字列を生成する。 | |
| 228 | + * 出力書式:<br> | |
| 229 | + * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ | |
| 230 | + */ | |
| 231 | + @Override | |
| 232 | + public synchronized String format(final LogRecord aRecord) { | |
| 233 | + | |
| 234 | + final StringBuffer message = new StringBuffer(100); | |
| 235 | + | |
| 236 | + long millis = aRecord.getMillis(); | |
| 237 | + String time = String.format("%tF %<tT", millis); | |
| 238 | + | |
| 239 | + message.append(time); | |
| 240 | + message.append(' '); | |
| 241 | + | |
| 242 | + message.append(aRecord.getLevel()); | |
| 243 | + message.append('<'); | |
| 244 | + String methodName = aRecord.getSourceMethodName(); | |
| 245 | + message.append(methodName != null ? methodName : "N/A"); | |
| 246 | + message.append('>'); | |
| 247 | + | |
| 248 | + message.append(formatMessage(aRecord)); | |
| 249 | + message.append('\n'); | |
| 250 | + | |
| 251 | + // 例外エラーの場合、エラー内容とスタックトレース出力 | |
| 252 | + Throwable throwable = aRecord.getThrown(); | |
| 253 | + if (throwable != null) { | |
| 254 | + message.append(throwable.toString()); | |
| 255 | + message.append('\n'); | |
| 256 | + for (StackTraceElement trace : throwable.getStackTrace()) { | |
| 257 | + message.append('\t'); | |
| 258 | + message.append(trace.toString()); | |
| 259 | + message.append('\n'); | |
| 260 | + } | |
| 261 | + } | |
| 262 | + return message.toString(); | |
| 263 | + } | |
| 264 | +} |