作業部屋の使い方を試しています。
(empty log message)
| @@ -1,163 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package Lib; | |
| 24 | - | |
| 25 | -import java.util.ArrayList; | |
| 26 | -import java.util.Enumeration; | |
| 27 | -import javax.swing.text.MutableAttributeSet; | |
| 28 | -import javax.swing.text.html.HTML; | |
| 29 | - | |
| 30 | -/** | |
| 31 | - * HTMLタグの属性情報を保持する. | |
| 32 | - * @author kgto | |
| 33 | - */ | |
| 34 | -public class AttributeData { | |
| 35 | - | |
| 36 | - public AttributeData() { | |
| 37 | - AttrList = new ArrayList(); | |
| 38 | - size = 0; | |
| 39 | - } | |
| 40 | - | |
| 41 | - /** | |
| 42 | - * 属性情報追加. | |
| 43 | - * @param tag | |
| 44 | - * @param attr | |
| 45 | - */ | |
| 46 | - public void add(HTML.Tag tag, MutableAttributeSet attr) { | |
| 47 | - | |
| 48 | - int tagcount = tagcnt(tag); | |
| 49 | - ++tagcount; | |
| 50 | - | |
| 51 | - Enumeration e = attr.getAttributeNames(); | |
| 52 | - while(e.hasMoreElements()) { | |
| 53 | - Object obj = e.nextElement(); | |
| 54 | - | |
| 55 | - AttrData a = new AttrData(); | |
| 56 | - a.tag = tag; | |
| 57 | - a.count = tagcount; | |
| 58 | - a.attrname = obj.toString(); | |
| 59 | - a.attrvalue = attr.getAttribute(obj).toString(); | |
| 60 | - | |
| 61 | - AttrList.add(a); | |
| 62 | - size = AttrList.size(); | |
| 63 | - } | |
| 64 | - | |
| 65 | - } | |
| 66 | - | |
| 67 | - /** | |
| 68 | - * 属性情報検索. | |
| 69 | - * @param tag | |
| 70 | - * @param attrname | |
| 71 | - * @param attrvalue | |
| 72 | - * @return | |
| 73 | - */ | |
| 74 | - public boolean search(HTML.Tag tag, String attrname, String attrvalue) { | |
| 75 | - boolean ret = false; | |
| 76 | - for (Object AttrList1 : AttrList) { | |
| 77 | - AttrData a = (AttrData)AttrList1; | |
| 78 | - if(a.tag == tag) { | |
| 79 | - if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) { | |
| 80 | - ret = true; | |
| 81 | - } | |
| 82 | - } | |
| 83 | - } | |
| 84 | - return ret; | |
| 85 | - } | |
| 86 | - | |
| 87 | - public boolean searchId(HTML.Tag tag, String attrvalue) { | |
| 88 | - return search(tag, "id", attrvalue); | |
| 89 | - } | |
| 90 | - | |
| 91 | - public boolean searchClass(HTML.Tag tag, String attrvalue) { | |
| 92 | - return search(tag, "class", attrvalue); | |
| 93 | - } | |
| 94 | - | |
| 95 | - /** | |
| 96 | - * 属性の値を取得する. | |
| 97 | - * @param tag | |
| 98 | - * @param attrname | |
| 99 | - * @return | |
| 100 | - */ | |
| 101 | - public ArrayList getvale(HTML.Tag tag, String attrname) { | |
| 102 | - ArrayList ret = new ArrayList(); | |
| 103 | - for (Object AttrList1 : AttrList) { | |
| 104 | - AttrData a = (AttrData)AttrList1; | |
| 105 | - if(a.tag == tag) { | |
| 106 | - if(a.attrname.equals(attrname)) { | |
| 107 | - ret.add(a.attrvalue); | |
| 108 | - } | |
| 109 | - } | |
| 110 | - } | |
| 111 | - return ret; | |
| 112 | - } | |
| 113 | - | |
| 114 | - /** | |
| 115 | - * 引数で渡されたTAGの最新カウント数を返す. | |
| 116 | - * @param tag | |
| 117 | - * @return | |
| 118 | - */ | |
| 119 | - private int tagcnt(HTML.Tag tag) { | |
| 120 | - int wkcnt = 0; | |
| 121 | - for (Object AttrList1 : AttrList) { | |
| 122 | - AttrData a = (AttrData)AttrList1; | |
| 123 | - if(a.tag == tag) { | |
| 124 | - if(wkcnt < a.count) { | |
| 125 | - wkcnt = a.count; | |
| 126 | - } | |
| 127 | - } | |
| 128 | - } | |
| 129 | - return wkcnt; | |
| 130 | - } | |
| 131 | - | |
| 132 | - // AttrList の内容を返すメソッド | |
| 133 | - public HTML.Tag gettag(int i) { | |
| 134 | - AttrData a = (AttrData)AttrList.get(i); | |
| 135 | - return a.tag; | |
| 136 | - } | |
| 137 | - | |
| 138 | - public int getcount(int i) { | |
| 139 | - AttrData a = (AttrData)AttrList.get(i); | |
| 140 | - return a.count; | |
| 141 | - } | |
| 142 | - | |
| 143 | - public String getattrname(int i) { | |
| 144 | - AttrData a = (AttrData)AttrList.get(i); | |
| 145 | - return a.attrname; | |
| 146 | - } | |
| 147 | - | |
| 148 | - public String getattrvalue(int i) { | |
| 149 | - AttrData a = (AttrData)AttrList.get(i); | |
| 150 | - return a.attrvalue; | |
| 151 | - } | |
| 152 | - | |
| 153 | - // フィールド変数 | |
| 154 | - public class AttrData { | |
| 155 | - public HTML.Tag tag; | |
| 156 | - public int count; | |
| 157 | - public String attrname; | |
| 158 | - public String attrvalue; | |
| 159 | - } | |
| 160 | - public ArrayList AttrList; | |
| 161 | - public int size; // AttrListのサイズ | |
| 162 | - | |
| 163 | -} |
| @@ -1,268 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package Lib; | |
| 24 | - | |
| 25 | -import java.util.ArrayList; | |
| 26 | -import java.util.HashMap; | |
| 27 | -import javax.swing.text.MutableAttributeSet; | |
| 28 | -import javax.swing.text.html.HTML; | |
| 29 | -import javax.swing.text.html.HTMLEditorKit; | |
| 30 | - | |
| 31 | -/** | |
| 32 | - * HTMLパーサ部品. | |
| 33 | - * @author kgto | |
| 34 | - */ | |
| 35 | -public class HtmlParserCallback extends HTMLEditorKit.ParserCallback { | |
| 36 | - | |
| 37 | - // デバック情報表示フラグ | |
| 38 | - final boolean DEBUG = false; | |
| 39 | - //final boolean DEBUG = true; | |
| 40 | - | |
| 41 | - // Tag毎の階層 | |
| 42 | - HashMap<HTML.Tag,Integer> tagMap = new HashMap<>(); | |
| 43 | - | |
| 44 | - // serach key 情報 | |
| 45 | - String keytag; | |
| 46 | - String keyid; | |
| 47 | - String keyclass; | |
| 48 | - | |
| 49 | - // serach key と一致時の情報退避 | |
| 50 | - int bufCount = 0; | |
| 51 | - HTML.Tag bufTag = null; | |
| 52 | - // serach key と一致時の情報格納ワーク | |
| 53 | - StringBuilder bufText; | |
| 54 | - | |
| 55 | - // serach key と一致時のデータ一覧 | |
| 56 | - ArrayList sData; | |
| 57 | - | |
| 58 | - // 属性データ | |
| 59 | - AttributeData attrdata; | |
| 60 | - | |
| 61 | - public HtmlParserCallback(SearchData skey) { | |
| 62 | - | |
| 63 | - // キー情報展開 | |
| 64 | - keytag = skey.getHtmltag(); | |
| 65 | - keyid = skey.getHtmlid(); | |
| 66 | - keyclass = skey.getHtmlclass(); | |
| 67 | - | |
| 68 | - sData = new ArrayList(); | |
| 69 | - } | |
| 70 | - | |
| 71 | - public ArrayList getrtnData() { | |
| 72 | - return this.sData; | |
| 73 | - } | |
| 74 | - | |
| 75 | - @Override | |
| 76 | - public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
| 77 | - // Tag毎の階層を保持 | |
| 78 | - int count = 1; | |
| 79 | - if(tagMap.containsKey(tag)) { | |
| 80 | - count = tagMap.get(tag); | |
| 81 | - count++; | |
| 82 | - } | |
| 83 | - tagMap.put(tag, count); | |
| 84 | - | |
| 85 | - // 属性解析 | |
| 86 | - AttributeData handleStartattrdata = new AttributeData(); | |
| 87 | - handleStartattrdata.add(tag, attr); | |
| 88 | - | |
| 89 | - //--- DEBUG OUT ---- start --- | |
| 90 | - if(DEBUG) { | |
| 91 | - StringBuffer strBuf = new StringBuffer(); | |
| 92 | - // tag情報 | |
| 93 | - strBuf.append(count).append(" : F : ").append(tag.toString()); | |
| 94 | - // 属性情報 | |
| 95 | - for(int i = 0; i < handleStartattrdata.size; i++) { | |
| 96 | - strBuf.append(" [").append(handleStartattrdata.getattrname(i)).append("] ") | |
| 97 | - .append(handleStartattrdata.getattrvalue(i)); | |
| 98 | - } | |
| 99 | - // 表示 | |
| 100 | - System.out.println(strBuf); | |
| 101 | - } | |
| 102 | - //--- DEBUG OUT ---- end --- | |
| 103 | - | |
| 104 | - if(bufCount == 0) { | |
| 105 | - if(tag.toString().equals(keytag)) { | |
| 106 | - //if(serachAttribute(attr)) { | |
| 107 | - if(serachAttribute(tag, handleStartattrdata)) { | |
| 108 | - bufCount = count; | |
| 109 | - bufTag = tag; | |
| 110 | - attrdata = new AttributeData(); | |
| 111 | - bufText = new StringBuilder(); | |
| 112 | - } | |
| 113 | - } | |
| 114 | - } | |
| 115 | - if(bufCount > 0) { | |
| 116 | - attrdata.add(tag, attr); | |
| 117 | - } | |
| 118 | - } | |
| 119 | - | |
| 120 | - @Override | |
| 121 | - public void handleEndTag(HTML.Tag tag, int pos){ | |
| 122 | - // Tag毎の階層を取得 | |
| 123 | - int count = 0; | |
| 124 | - if(tagMap.containsKey(tag)) { | |
| 125 | - count = tagMap.get(tag); | |
| 126 | - } | |
| 127 | - | |
| 128 | - //--- DEBUG OUT ---- start --- | |
| 129 | - if(DEBUG) { | |
| 130 | - if(tag.equals(bufTag) && count <= bufCount) { | |
| 131 | - for(int i = 0; i < attrdata.size; i++) { | |
| 132 | - StringBuffer strBuf = new StringBuffer(); | |
| 133 | - strBuf.append(" Tag-attr : "); | |
| 134 | - strBuf.append(attrdata.gettag(i)).append(" [ "); | |
| 135 | - strBuf.append(attrdata.getcount(i)).append(" ] "); | |
| 136 | - strBuf.append(attrdata.getattrname(i)).append(" = "); | |
| 137 | - strBuf.append(attrdata.getattrvalue(i)); | |
| 138 | - System.out.println(strBuf); | |
| 139 | - } | |
| 140 | - } | |
| 141 | - StringBuffer strBuf = new StringBuffer(); | |
| 142 | - // tag情報 | |
| 143 | - strBuf.append(count).append(" : E : ").append(tag.toString()); | |
| 144 | - System.out.println(strBuf); | |
| 145 | - } | |
| 146 | - //--- DEBUG OUT ---- end --- | |
| 147 | - | |
| 148 | - if(tag.equals(bufTag) && count <= bufCount) { | |
| 149 | - | |
| 150 | - // 溜め込んだ一致情報をリストへ格納 | |
| 151 | - sData.add(bufText.toString()); | |
| 152 | - | |
| 153 | - // 退避したserach keyとの一致情報クリア | |
| 154 | - bufCount = 0; | |
| 155 | - bufTag = null; | |
| 156 | - bufText = null; | |
| 157 | - } | |
| 158 | - | |
| 159 | - // Tag毎の階層減算 | |
| 160 | - tagMap.put(tag, --count); | |
| 161 | - } | |
| 162 | - | |
| 163 | - @Override | |
| 164 | - public void handleText(char[] data, int pos){ | |
| 165 | - //--- DEBUG OUT ---- start --- | |
| 166 | - if(DEBUG) { | |
| 167 | - String dat = new String(data); | |
| 168 | - System.out.println(dat); | |
| 169 | - } | |
| 170 | - //--- DEBUG OUT ---- end --- | |
| 171 | - String splitchar = "\t"; | |
| 172 | - | |
| 173 | - //制御文字の削除 | |
| 174 | - // 0xa0 | |
| 175 | - StringBuilder buf = new StringBuilder(); | |
| 176 | - for(int i = 0; i < data.length; i++) { | |
| 177 | - if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) { | |
| 178 | - buf.append(data[i]); | |
| 179 | - } | |
| 180 | - } | |
| 181 | - | |
| 182 | - if(bufCount > 0) { | |
| 183 | - if(bufText.length() > 0) { | |
| 184 | - bufText.append(splitchar); | |
| 185 | - } | |
| 186 | - bufText.append(buf.toString()); | |
| 187 | - } | |
| 188 | - | |
| 189 | - } | |
| 190 | - | |
| 191 | - @Override | |
| 192 | - public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
| 193 | - | |
| 194 | - if(bufCount > 0) { | |
| 195 | - attrdata.add(tag, attr); | |
| 196 | - } | |
| 197 | - | |
| 198 | - //--- DEBUG OUT ---- start --- | |
| 199 | - if(DEBUG) { | |
| 200 | - AttributeData simpleattrdata = new AttributeData(); | |
| 201 | - simpleattrdata.add(tag, attr); | |
| 202 | - StringBuffer strBuf = new StringBuffer(); | |
| 203 | - // tag情報 | |
| 204 | - strBuf.append("x : S : ").append(tag.toString()); | |
| 205 | - // 属性情報 | |
| 206 | - for(int i = 0; i < simpleattrdata.size; i++) { | |
| 207 | - strBuf.append(" [").append(simpleattrdata.getattrname(i)).append("] ").append(simpleattrdata.getcount(i)) | |
| 208 | - .append(" = ").append(simpleattrdata.getattrvalue(i)); | |
| 209 | - } | |
| 210 | - System.out.println(strBuf); | |
| 211 | - } | |
| 212 | - //--- DEBUG OUT ---- end --- | |
| 213 | - } | |
| 214 | - | |
| 215 | - /** | |
| 216 | - * ページ内のID/CLASS値と検索キーを比較する. | |
| 217 | - * @param attr ページのMutableAttributeSet | |
| 218 | - * @return boolean 検索キーと一致の時、true | |
| 219 | - */ | |
| 220 | - public boolean serachAttribute(MutableAttributeSet attr) { | |
| 221 | - String currentID = (String)attr.getAttribute(HTML.Attribute.ID); | |
| 222 | - String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS); | |
| 223 | - | |
| 224 | - if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
| 225 | - if(keyid.equals(currentID) && keyclass.equals(currentClass)) { | |
| 226 | - return true; | |
| 227 | - } | |
| 228 | - } | |
| 229 | - | |
| 230 | - if(keyid.isEmpty() == false) { | |
| 231 | - if(keyid.equals(currentID)) { | |
| 232 | - return true; | |
| 233 | - } | |
| 234 | - } | |
| 235 | - | |
| 236 | - if(keyclass.isEmpty() == false) { | |
| 237 | - if(keyclass.equals(currentClass)) { | |
| 238 | - return true; | |
| 239 | - } | |
| 240 | - } | |
| 241 | - | |
| 242 | - return false; | |
| 243 | - } | |
| 244 | - | |
| 245 | - /** | |
| 246 | - * ページ内のID/CLASS値と検索キーを比較する. | |
| 247 | - * @param tag | |
| 248 | - * @param attrdata | |
| 249 | - * @return boolean 検索キーと一致の時、true | |
| 250 | - */ | |
| 251 | - public boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) { | |
| 252 | - // ID と CLASS の両方にキー入力有りの場合 | |
| 253 | - if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
| 254 | - if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) { | |
| 255 | - return true; | |
| 256 | - } | |
| 257 | - } | |
| 258 | - // ID のキーチェック | |
| 259 | - if(keyid.isEmpty() == false) { | |
| 260 | - return attrdata.searchId(tag, keyid); | |
| 261 | - } | |
| 262 | - // CLASS のキーチェック | |
| 263 | - if(keyclass.isEmpty() == false) { | |
| 264 | - return attrdata.searchClass(tag, keyclass); | |
| 265 | - } | |
| 266 | - return false; | |
| 267 | - } | |
| 268 | -} |
| @@ -1,98 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package Lib; | |
| 24 | - | |
| 25 | -/** | |
| 26 | - * | |
| 27 | - * @author kgto | |
| 28 | - */ | |
| 29 | -public class SearchData { | |
| 30 | - | |
| 31 | - private String item; | |
| 32 | - private String htmltag; | |
| 33 | - private String htmlid; | |
| 34 | - private String htmlclass; | |
| 35 | - private String around; | |
| 36 | - private String regexp; | |
| 37 | - | |
| 38 | - public SearchData() { | |
| 39 | - } | |
| 40 | - | |
| 41 | - public SearchData(SearchData dat) { | |
| 42 | - this.item = dat.getitem(); | |
| 43 | - this.htmltag = dat.getHtmltag(); | |
| 44 | - this.htmlid = dat.getHtmlid(); | |
| 45 | - this.htmlclass = dat.getHtmlclass(); | |
| 46 | - this.around = dat.getaround(); | |
| 47 | - this.regexp = dat.getregexp(); | |
| 48 | - } | |
| 49 | - | |
| 50 | - public void setitem(String item) { | |
| 51 | - this.item = item; | |
| 52 | - } | |
| 53 | - | |
| 54 | - public void setHtmltag(String htmltag) { | |
| 55 | - this.htmltag = htmltag; | |
| 56 | - } | |
| 57 | - | |
| 58 | - public void setHtmlid(String htmlid) { | |
| 59 | - this.htmlid = htmlid; | |
| 60 | - } | |
| 61 | - | |
| 62 | - public void setHtmlclass(String htmlclass) { | |
| 63 | - this.htmlclass = htmlclass; | |
| 64 | - } | |
| 65 | - | |
| 66 | - public void setaround(String around) { | |
| 67 | - this.around = around; | |
| 68 | - } | |
| 69 | - | |
| 70 | - public void setregexp(String regexp) { | |
| 71 | - this.regexp = regexp; | |
| 72 | - } | |
| 73 | - | |
| 74 | - public String getitem() { | |
| 75 | - return item; | |
| 76 | - } | |
| 77 | - | |
| 78 | - public String getHtmltag() { | |
| 79 | - return htmltag; | |
| 80 | - } | |
| 81 | - | |
| 82 | - public String getHtmlid() { | |
| 83 | - return htmlid; | |
| 84 | - } | |
| 85 | - | |
| 86 | - public String getHtmlclass() { | |
| 87 | - return htmlclass; | |
| 88 | - } | |
| 89 | - | |
| 90 | - public String getaround() { | |
| 91 | - return around; | |
| 92 | - } | |
| 93 | - | |
| 94 | - public String getregexp() { | |
| 95 | - return regexp; | |
| 96 | - } | |
| 97 | - | |
| 98 | -} |
| @@ -1,229 +0,0 @@ | ||
| 1 | -/* | |
| 2 | - * Copyright (C) 2014 kgto. | |
| 3 | - * | |
| 4 | - * This library is free software; you can redistribute it and/or | |
| 5 | - * modify it under the terms of the GNU Lesser General Public | |
| 6 | - * License as published by the Free Software Foundation; either | |
| 7 | - * version 2.1 of the License, or (at your option) any later version. | |
| 8 | - * | |
| 9 | - * This library is distributed in the hope that it will be useful, | |
| 10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 12 | - * Lesser General Public License for more details. | |
| 13 | - * | |
| 14 | - * You should have received a copy of the GNU Lesser General Public | |
| 15 | - * License along with this library; if not, write to the Free Software | |
| 16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
| 17 | - * MA 02110-1301 USA | |
| 18 | - */ | |
| 19 | -/* | |
| 20 | - * $Id$ | |
| 21 | - */ | |
| 22 | - | |
| 23 | -package Lib; | |
| 24 | - | |
| 25 | -import java.io.*; | |
| 26 | -import java.net.*; | |
| 27 | -import java.util.ArrayList; | |
| 28 | -import java.util.regex.Matcher; | |
| 29 | -import java.util.regex.Pattern; | |
| 30 | -import javax.swing.text.html.parser.ParserDelegator; | |
| 31 | - | |
| 32 | -/** | |
| 33 | - * | |
| 34 | - * @author kgto | |
| 35 | - */ | |
| 36 | -public class HtmlParser { | |
| 37 | - | |
| 38 | - String UrlAdress; | |
| 39 | - String pageData; | |
| 40 | - | |
| 41 | - ArrayList sData; | |
| 42 | - | |
| 43 | - // 作業ワーク | |
| 44 | - String htmltag; | |
| 45 | - String htmlid; | |
| 46 | - String htmlclass; | |
| 47 | - | |
| 48 | - public HtmlParser() { | |
| 49 | - UrlAdress = null; | |
| 50 | - } | |
| 51 | - | |
| 52 | - public HtmlParser(String UrlAdress) { | |
| 53 | - this.UrlAdress = UrlAdress; | |
| 54 | - getpageData(); | |
| 55 | - } | |
| 56 | - | |
| 57 | - public void seturl(String UrlAdress) { | |
| 58 | - this.UrlAdress = UrlAdress; | |
| 59 | - getpageData(); | |
| 60 | - } | |
| 61 | - | |
| 62 | - /** | |
| 63 | - * HTMLページ内検索. | |
| 64 | - * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、 | |
| 65 | - * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を | |
| 66 | - * 行った結果を返す。<br> | |
| 67 | - * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br> | |
| 68 | - * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br> | |
| 69 | - * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。 | |
| 70 | - * @param skey 検索キーデータ(SearchData) | |
| 71 | - * @return String 検索キーに一致するデータの文字列 | |
| 72 | - */ | |
| 73 | - public String search(SearchData skey) { | |
| 74 | - | |
| 75 | - String item = skey.getitem(); | |
| 76 | - String regexp = skey.getregexp(); | |
| 77 | - | |
| 78 | - // htmlページ内を検索 | |
| 79 | - if(isHtmlkeyEq(skey) == false) { | |
| 80 | - serchpageData(skey); | |
| 81 | - } | |
| 82 | - /* | |
| 83 | - around 出現位置指定 入力有り:指定された位置の情報のみ返す。 | |
| 84 | - 入力無し:取得した全ての情報を返す。 | |
| 85 | - */ | |
| 86 | - String wkaround = skey.getaround(); | |
| 87 | - if(wkaround.length() > 0) { | |
| 88 | - byte wbAround = 0; | |
| 89 | - wbAround = Byte.parseByte(wkaround); // 検索位置を数値変換 | |
| 90 | - if(wbAround < sData.size()) { | |
| 91 | - String str = (String)sData.get(wbAround); | |
| 92 | - String rtn = RegularExpression(str, regexp); | |
| 93 | - return item + "\t" + rtn; | |
| 94 | - } | |
| 95 | - } else { | |
| 96 | - StringBuilder strbuf = new StringBuilder(); | |
| 97 | - for (Object sData1 : sData) { | |
| 98 | - String str = (String)sData1; | |
| 99 | - String rtn = RegularExpression(str, regexp); | |
| 100 | - if(strbuf.length() > 0) { | |
| 101 | - strbuf.append("\t"); | |
| 102 | - } | |
| 103 | - strbuf.append(rtn); | |
| 104 | - } | |
| 105 | - return item + "\t" + strbuf; | |
| 106 | - } | |
| 107 | - return null; | |
| 108 | - } | |
| 109 | - | |
| 110 | - /** | |
| 111 | - * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する. | |
| 112 | - * @param skey HTMLタグ/ID/CLASSが格納された検索キー | |
| 113 | - * @return boolean HTMLタグ/ID/CLASS値が一致する時、true | |
| 114 | - */ | |
| 115 | - public boolean isHtmlkeyEq(SearchData skey) { | |
| 116 | - | |
| 117 | - String stag = skey.getHtmltag(); | |
| 118 | - String sid = skey.getHtmlid(); | |
| 119 | - String sclass = skey.getHtmlclass(); | |
| 120 | - | |
| 121 | - boolean rtn = true; | |
| 122 | - | |
| 123 | - // htmltag | |
| 124 | - if(htmltag == null) { | |
| 125 | - rtn = false; | |
| 126 | - } else { | |
| 127 | - if(htmltag.equals(stag) == false) { | |
| 128 | - rtn = false; | |
| 129 | - } | |
| 130 | - } | |
| 131 | - | |
| 132 | - // htmlid | |
| 133 | - if(htmlid == null) { | |
| 134 | - rtn = false; | |
| 135 | - } else { | |
| 136 | - if(htmlid.equals(sid) == false) { | |
| 137 | - rtn = false; | |
| 138 | - } | |
| 139 | - } | |
| 140 | - | |
| 141 | - // htmlclass | |
| 142 | - if(htmlclass == null) { | |
| 143 | - rtn = false; | |
| 144 | - } else { | |
| 145 | - if(htmlclass.equals(sclass) == false) { | |
| 146 | - rtn = false; | |
| 147 | - } | |
| 148 | - } | |
| 149 | - | |
| 150 | - if(!rtn) { | |
| 151 | - htmltag = stag; | |
| 152 | - htmlid = sid; | |
| 153 | - htmlclass = sclass; | |
| 154 | - } | |
| 155 | - | |
| 156 | - return rtn; | |
| 157 | - } | |
| 158 | - | |
| 159 | - /** | |
| 160 | - * 正規表現検索. | |
| 161 | - * @param strdata | |
| 162 | - * @param regexp | |
| 163 | - * @return | |
| 164 | - */ | |
| 165 | - public String RegularExpression(String strdata, String regexp) { | |
| 166 | - String expdata = null; | |
| 167 | - | |
| 168 | - //regexpのチェック | |
| 169 | - if(regexp.isEmpty()) { | |
| 170 | - expdata = strdata; | |
| 171 | - return expdata; | |
| 172 | - } | |
| 173 | - | |
| 174 | - //正規表現検索 | |
| 175 | - Pattern ptn = Pattern.compile(regexp); | |
| 176 | - Matcher matchdata = ptn.matcher(strdata); | |
| 177 | - if (matchdata.find()) { | |
| 178 | - if(matchdata.groupCount() >= 1) { | |
| 179 | - expdata = matchdata.group(1); | |
| 180 | - } | |
| 181 | - } | |
| 182 | - return expdata; | |
| 183 | - } | |
| 184 | - | |
| 185 | - /** | |
| 186 | - * インターネット接続. | |
| 187 | - */ | |
| 188 | - private void getpageData() { | |
| 189 | - try { | |
| 190 | - URL url = new URL(UrlAdress); | |
| 191 | - HttpURLConnection con = (HttpURLConnection)url.openConnection(); | |
| 192 | - con.setRequestMethod("GET"); | |
| 193 | - BufferedReader reader = new BufferedReader( | |
| 194 | - new InputStreamReader(con.getInputStream(), "utf-8")); | |
| 195 | - String wkline; | |
| 196 | - StringBuilder sb = new StringBuilder(); | |
| 197 | - while((wkline = reader.readLine()) != null) { | |
| 198 | - sb.append(wkline).append("\n"); | |
| 199 | - } | |
| 200 | - pageData = sb.toString(); | |
| 201 | - | |
| 202 | - con.disconnect(); | |
| 203 | - } | |
| 204 | - catch(IOException e) { | |
| 205 | - System.err.println(e); | |
| 206 | - } | |
| 207 | - } | |
| 208 | - | |
| 209 | - /** | |
| 210 | - * HTMLパーサ. | |
| 211 | - * @param skey | |
| 212 | - */ | |
| 213 | - public void serchpageData(SearchData skey){ | |
| 214 | - Reader reader; | |
| 215 | - try { | |
| 216 | - reader = new BufferedReader(new StringReader(pageData)); | |
| 217 | - HtmlParserCallback cb = new HtmlParserCallback(skey); | |
| 218 | - ParserDelegator pd = new ParserDelegator(); | |
| 219 | - pd.parse(reader, cb, true); | |
| 220 | - reader.close(); | |
| 221 | - | |
| 222 | - sData = cb.getrtnData(); | |
| 223 | - | |
| 224 | - } catch (IOException e) { | |
| 225 | - System.err.println(e); | |
| 226 | - } | |
| 227 | - } | |
| 228 | - | |
| 229 | -} |