• R/O
  • SSH
  • HTTPS

Commit

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

作業部屋の使い方を試しています。


Commit MetaInfo

Revision115 (tree)
Time2015-01-11 01:35:52
Authortuna_p

Log Message

マージ branches/b3/WebScraping

Change Summary

Incremental Difference

--- trunk/HtmlTest2/src/Form/SearchDataRW.java (revision 114)
+++ trunk/HtmlTest2/src/Form/SearchDataRW.java (nonexistent)
@@ -1,508 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package Form;
24-
25-import WebScraping.SearchData;
26-import java.io.BufferedReader;
27-import java.io.BufferedWriter;
28-import java.io.File;
29-import java.io.FileInputStream;
30-import java.io.FileNotFoundException;
31-import java.io.FileOutputStream;
32-import java.io.IOException;
33-import java.io.InputStreamReader;
34-import java.io.OutputStreamWriter;
35-import java.util.ArrayList;
36-import java.util.logging.Level;
37-import java.util.logging.Logger;
38-import javax.xml.parsers.DocumentBuilder;
39-import javax.xml.parsers.DocumentBuilderFactory;
40-import javax.xml.parsers.ParserConfigurationException;
41-import javax.xml.transform.Transformer;
42-import javax.xml.transform.TransformerConfigurationException;
43-import javax.xml.transform.TransformerException;
44-import javax.xml.transform.TransformerFactory;
45-import javax.xml.transform.dom.DOMSource;
46-import javax.xml.transform.stream.StreamResult;
47-import org.w3c.dom.DOMImplementation;
48-import org.w3c.dom.Document;
49-import org.w3c.dom.Element;
50-import org.w3c.dom.Node;
51-import org.w3c.dom.NodeList;
52-import org.xml.sax.SAXException;
53-
54-/**
55- *
56- * @author kgto
57- */
58-public class SearchDataRW {
59-
60- DocumentBuilder builder;
61- public Document document;
62- Element root;
63-
64- private final String splitchar = "\t";
65-
66- private String UrlAdress;
67- private ArrayList slist;
68-
69- public SearchDataRW() {
70- try {
71- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
72- builder = factory.newDocumentBuilder();
73-
74- } catch (ParserConfigurationException ex) {
75- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
76- }
77- }
78-
79- public void seturl(String UrlAdress) {
80- this.UrlAdress = UrlAdress;
81- }
82-
83- public void setslist(ArrayList slist) {
84- this.slist = slist;
85- }
86-
87- public String geturl() {
88- return UrlAdress;
89- }
90-
91- public ArrayList getslist() {
92- return slist;
93- }
94-
95- /**
96- * 保存.
97- * @param file
98- */
99- public void save(File file) {
100- //saveCsv(file);
101- //saveXml(file);
102-
103- saveUrl(UrlAdress);
104- saveSearchList(slist);
105- write(file);
106- }
107-
108- /**
109- * 読込.
110- * @param file
111- */
112- public void load(File file) {
113- //loadCsv(file);
114- //loadXml(file);
115-
116- read(file);
117- UrlAdress = loadUrl();
118- slist = loadSearchList();
119- }
120-
121- /* ---------------------------------------------------------------------- */
122- /**
123- * 保存(CSV形式).
124- * @param file
125- */
126- public void saveCsv(File file) {
127-
128- try {
129- //空のファイルを作成
130- file.createNewFile();
131-
132- FileOutputStream fileoutputstream = new FileOutputStream(file);
133- OutputStreamWriter outputstreamwriter = new OutputStreamWriter(fileoutputstream, "UTF-8");
134- BufferedWriter bufferedwriter = new BufferedWriter(outputstreamwriter);
135-
136- // URL
137- bufferedwriter.write(UrlAdress);
138- bufferedwriter.write("\n");
139- // 検索情報
140- for (Object slist1 : slist) {
141- SearchData sdat = (SearchData) slist1;
142- StringBuilder str = new StringBuilder();
143- str.append(sdat.getitem()).append(splitchar);
144- str.append(sdat.getHtmltag()).append(splitchar);
145- str.append(sdat.getHtmlid()).append(splitchar);
146- str.append(sdat.getHtmlclass()).append(splitchar);
147- str.append(sdat.getaround()).append(splitchar);
148- str.append(sdat.getregexp()).append("\n");
149-
150- bufferedwriter.write(str.toString());
151- }
152- bufferedwriter.close();
153-
154- } catch(IOException ex) {
155- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
156- }
157- }
158-
159- /**
160- * 読込(CSV形式).
161- * @param file
162- */
163- public void loadCsv(File file) {
164- slist = new ArrayList();
165-
166- try {
167- FileInputStream fileinputstream = new FileInputStream(file);
168- InputStreamReader inputstreamreader = new InputStreamReader(fileinputstream, "UTF-8");
169- BufferedReader bufferedreader = new BufferedReader(inputstreamreader);
170-
171- String rec;
172-
173- // URL
174- UrlAdress = bufferedreader.readLine();
175- // 検索情報
176- while((rec = bufferedreader.readLine()) != null) {
177- String[] recary = rec.split(splitchar, -1);
178- SearchData sdat = new SearchData();
179- sdat.setitem(recary[0]);
180- sdat.setHtmltag(recary[1]);
181- sdat.setHtmlid(recary[2]);
182- sdat.setHtmlclass(recary[3]);
183- sdat.setaround(recary[4]);
184- sdat.setregexp(recary[5]);
185-
186- slist.add(sdat);
187- }
188- bufferedreader.close();
189-
190- } catch(IOException ex) {
191- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
192- }
193-
194- }
195-
196- /* ---------------------------------------------------------------------- */
197- /**
198- * 保存(XML形式).
199- * @param file
200- */
201- public void saveXml(File file) {
202- try {
203- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
204- DocumentBuilder builder = factory.newDocumentBuilder();
205- DOMImplementation domImpl = builder.getDOMImplementation();
206-
207- Document document = domImpl.createDocument("","searchdata",null);
208- Element root = document.getDocumentElement();
209-
210- // URL
211- Element url = document.createElement("url");
212- url.appendChild(document.createTextNode(UrlAdress));
213- root.appendChild(url);
214-
215- // 検索情報
216- for (Object slist1 : slist) {
217- SearchData sdat = (SearchData) slist1;
218-
219- Element cslist = document.createElement("searchlist");
220- Element item = document.createElement("item");
221- Element htmltag = document.createElement("htmltag");
222- Element htmlid = document.createElement("htmlid");
223- Element htmlclass = document.createElement("htmlclass");
224- Element around = document.createElement("around");
225- Element regexp = document.createElement("regexp");
226-
227- item.appendChild(document.createTextNode(sdat.getitem()));
228- htmltag.appendChild(document.createTextNode(sdat.getHtmltag()));
229- htmlid.appendChild(document.createTextNode(sdat.getHtmlid()));
230- htmlclass.appendChild(document.createTextNode(sdat.getHtmlclass()));
231- around.appendChild(document.createTextNode(sdat.getaround()));
232- regexp.appendChild(document.createTextNode(sdat.getregexp()));
233-
234- cslist.appendChild(item);
235- cslist.appendChild(htmltag);
236- cslist.appendChild(htmlid);
237- cslist.appendChild(htmlclass);
238- cslist.appendChild(around);
239- cslist.appendChild(regexp);
240-
241- root.appendChild(cslist);
242- }
243- // 出力
244- TransformerFactory transFactory = TransformerFactory.newInstance();
245- Transformer transformer = transFactory.newTransformer();
246-
247- DOMSource source = new DOMSource(document);
248- FileOutputStream os = new FileOutputStream(file);
249- StreamResult result = new StreamResult(os);
250- transformer.transform(source, result);
251-
252- } catch (ParserConfigurationException | FileNotFoundException ex) {
253- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
254- } catch (TransformerConfigurationException ex) {
255- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
256- } catch (TransformerException ex) {
257- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
258- }
259- }
260-
261- /**
262- * 読込(XML形式).
263- * @param file
264- */
265- public void loadXml(File file) {
266- slist = new ArrayList();
267-
268- try {
269- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
270- DocumentBuilder builder = factory.newDocumentBuilder();
271- Document doc = builder.parse(file);
272-
273- // ルート要素の取得
274- Element root = doc.getDocumentElement();
275-
276- // URL
277- NodeList url = root.getElementsByTagName("url");
278- Node urlnode = url.item(0);
279- UrlAdress = urlnode.getFirstChild().getNodeValue();
280-
281- // 検索情報
282- NodeList cslist = root.getElementsByTagName("searchlist");
283- for(int i = 0; i < cslist.getLength(); i++) {
284- SearchData sdat = new SearchData();
285-
286- Node slistnode = cslist.item(i);
287- Node child;
288- for (child = slistnode.getFirstChild(); child != null; child = child.getNextSibling()) {
289- if(child.getNodeType() == Node.ELEMENT_NODE) {
290-
291- String tag = child.getNodeName();
292- String rtn = "";
293- if(child.getFirstChild() != null) {
294- rtn = child.getFirstChild().getNodeValue();
295- }
296-
297- switch (tag) {
298- case "item" :
299- sdat.setitem(rtn);
300- break;
301- case "htmltag" :
302- sdat.setHtmltag(rtn);
303- break;
304- case "htmlid" :
305- sdat.setHtmlid(rtn);
306- break;
307- case "htmlclass" :
308- sdat.setHtmlclass(rtn);
309- break;
310- case "around" :
311- sdat.setaround(rtn);
312- break;
313- case "regexp" :
314- sdat.setregexp(rtn);
315- break;
316- }
317- }
318- }
319- slist.add(sdat);
320- }
321-
322- } catch (ParserConfigurationException | SAXException | IOException ex) {
323- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
324- }
325- }
326-
327- /* ---------------------------------------------------------------------- */
328-
329- public String loadUrl() {
330- String urladdress;
331-
332- NodeList nodelist = root.getElementsByTagName("url");
333- Node node = nodelist.item(0);
334- urladdress = node.getFirstChild().getNodeValue();
335-
336- return urladdress;
337- }
338-
339- public ArrayList<SearchData> loadSearchList() {
340- ArrayList<SearchData> slist = new ArrayList<>();
341-
342- NodeList nodelist = root.getElementsByTagName("searchlist");
343- for(int i = 0; i < nodelist.getLength(); i++) {
344- Node childnode = nodelist.item(i);
345-
346- boolean sdatflg = false;
347- SearchData sdat = new SearchData();
348-
349- //NodeList childnodelist = childnode.getChildNodes();
350- //for(int j = 0; j < childnodelist.getLength(); j++) {
351- // Node child = childnodelist.item(j);
352-
353- for (Node child = childnode.getFirstChild();
354- child != null; child = child.getNextSibling()) {
355-
356- if(child.getNodeType() == Node.ELEMENT_NODE) {
357-
358- String tag = child.getNodeName();
359- String rtn = "";
360- if(child.getFirstChild() != null) {
361- rtn = child.getFirstChild().getNodeValue();
362- }
363-
364- switch (tag) {
365- case "item" :
366- sdat.setitem(rtn);
367- sdatflg = true;
368- break;
369- case "htmltag" :
370- sdat.setHtmltag(rtn);
371- sdatflg = true;
372- break;
373- case "htmlid" :
374- sdat.setHtmlid(rtn);
375- sdatflg = true;
376- break;
377- case "htmlclass" :
378- sdat.setHtmlclass(rtn);
379- sdatflg = true;
380- break;
381- case "around" :
382- sdat.setaround(rtn);
383- sdatflg = true;
384- break;
385- case "regexp" :
386- sdat.setregexp(rtn);
387- sdatflg = true;
388- break;
389- }
390- }
391- }
392- if(sdatflg) slist.add(sdat);
393- }
394- return slist;
395- }
396-
397- public Element loadElement(String elementTagName) {
398- NodeList nodelist = root.getElementsByTagName(elementTagName);
399- Element element = (Element)nodelist.item(0);
400-
401- return element;
402- }
403-
404- public void saveUrl(String urladdress) {
405- checkdoc();
406- removeElement("url"); // 既にElementが存在してた場合、一度削除
407-
408- Element url = document.createElement("url");
409- url.appendChild(document.createTextNode(urladdress));
410- root.appendChild(url);
411- }
412-
413- public void saveSearchList(ArrayList slist) {
414- checkdoc();
415- removeElement("searchlist"); // 既にElementが存在してた場合、一度削除
416-
417- for (Object slist1 : slist) {
418- SearchData sdat = (SearchData) slist1;
419-
420- Element cslist = document.createElement("searchlist");
421-
422- addChild(cslist, "item", sdat.getitem());
423- addChild(cslist, "htmltag", sdat.getHtmltag());
424- addChild(cslist, "htmlid", sdat.getHtmlid());
425- addChild(cslist, "htmlclass", sdat.getHtmlclass());
426- addChild(cslist, "around", sdat.getaround());
427- addChild(cslist, "regexp", sdat.getregexp());
428-
429- root.appendChild(cslist);
430- }
431- }
432-
433- public void saveElement(Element element) {
434- checkdoc();
435- removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除
436-
437- root.appendChild(element);
438- }
439-
440- private void addChild(Element cslist, String keyword, String data) {
441- if(!data.isEmpty()) {
442- Element element = document.createElement(keyword);
443- element.appendChild(document.createTextNode(data));
444- cslist.appendChild(element);
445- }
446- }
447-
448- private void removeElement(String elementTagName) {
449- int nodeSize;
450- do {
451- NodeList nodelist = document.getElementsByTagName(elementTagName);
452- nodeSize = nodelist.getLength();
453- for(int i = 0; i < nodelist.getLength(); i++) {
454- Node node = nodelist.item(i);
455- root.removeChild(node);
456- }
457- } while(nodeSize > 0);
458- }
459-
460- /**
461- * ドキュメントチェック.
462- * 新規の場合やXMLファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。
463- * 既読の場合、ルートエレメントの取得を行う。
464- */
465- public void checkdoc() {
466- if(document == null) {
467- DOMImplementation domImpl = builder.getDOMImplementation();
468- document = domImpl.createDocument("","searchdata",null);
469- }
470- root = document.getDocumentElement();
471- }
472-
473- /**
474- * XML読込み.
475- * @param file
476- */
477- public void read(File file) {
478- try {
479- document = builder.parse(file);
480- root = document.getDocumentElement();
481-
482- } catch (SAXException | IOException ex) {
483- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
484- }
485- }
486-
487- /**
488- * XML書込み.
489- * @param file
490- */
491- public void write(File file) {
492- try {
493- TransformerFactory transFactory = TransformerFactory.newInstance();
494- Transformer transformer = transFactory.newTransformer();
495-
496- DOMSource source = new DOMSource(document);
497- FileOutputStream os = new FileOutputStream(file);
498- StreamResult result = new StreamResult(os);
499- transformer.transform(source, result);
500-
501- } catch (TransformerConfigurationException ex) {
502- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
503- } catch (FileNotFoundException | TransformerException ex) {
504- Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
505- }
506- }
507-
508-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/HtmlTest2/src/Form/HtmlSearch.java (revision 114)
+++ trunk/HtmlTest2/src/Form/HtmlSearch.java (nonexistent)
@@ -1,454 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-package Form;
23-
24-import WebScraping.HtmlParser;
25-import WebScraping.SearchData;
26-import java.awt.Desktop;
27-import java.io.File;
28-import java.io.IOException;
29-import java.net.URI;
30-import java.net.URISyntaxException;
31-import java.util.*;
32-import java.util.logging.Level;
33-import java.util.logging.Logger;
34-import javax.swing.JFileChooser;
35-import javax.swing.filechooser.FileFilter;
36-import javax.swing.filechooser.FileNameExtensionFilter;
37-import org.jdesktop.observablecollections.ObservableCollections;
38-
39-/**
40- * HTMLページ上の特定の項目を検索し、その項目内容の値を取得する.
41- * @author kgto
42- */
43-public class HtmlSearch extends javax.swing.JFrame {
44-
45- private final SearchDataRW sio = new SearchDataRW();
46-
47- private ArrayList slist = new ArrayList();
48- private List serachDataList = ObservableCollections.observableList(slist);
49-
50- /**
51- * Creates new form Frame1
52- */
53- public HtmlSearch() {
54- initComponents();
55-
56- // カレントディレクトリ取得
57- String dir = System.getProperty("user.dir");
58- File file = new java.io.File(dir + "\\data");
59- jFileChooser1.setCurrentDirectory(file);
60-
61- FileFilter filter1 = new FileNameExtensionFilter("XMLファイル", "xml");
62- FileFilter filter2 = new FileNameExtensionFilter("TEXTファイル", "txt");
63- jFileChooser1.addChoosableFileFilter(filter1);
64- jFileChooser1.addChoosableFileFilter(filter2);
65- jFileChooser1.setFileFilter(filter1);
66- }
67-
68- public List getSerachDataList() {
69- return this.serachDataList;
70- }
71-
72- public void setSerachDataList(List serachDataList) {
73- this.serachDataList = serachDataList;
74- }
75-
76- /**
77- * This method is called from within the constructor to initialize the form.
78- * WARNING: Do NOT modify this code. The content of this method is always
79- * regenerated by the Form Editor.
80- */
81- @SuppressWarnings("unchecked")
82- // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents
83- private void initComponents() {
84- bindingGroup = new org.jdesktop.beansbinding.BindingGroup();
85-
86- jFileChooser1 = new javax.swing.JFileChooser();
87- jLabel1 = new javax.swing.JLabel();
88- jTxtUrl = new javax.swing.JTextField();
89- jBtnSearch = new javax.swing.JButton();
90- jPanel1 = new javax.swing.JPanel();
91- jScrollPane1 = new javax.swing.JScrollPane();
92- jTable1 = new javax.swing.JTable();
93- jBtnRowIns = new javax.swing.JButton();
94- jBtnRowDel = new javax.swing.JButton();
95- jBtnRowCpy = new javax.swing.JButton();
96- jPanel2 = new javax.swing.JPanel();
97- jScrollPane2 = new javax.swing.JScrollPane();
98- jTxtRtn = new javax.swing.JTextArea();
99- jMenuBar1 = new javax.swing.JMenuBar();
100- jMenu1 = new javax.swing.JMenu();
101- jMenuLoad = new javax.swing.JMenuItem();
102- jMenuSave = new javax.swing.JMenuItem();
103- jMenu3 = new javax.swing.JMenu();
104- jMenuItem1 = new javax.swing.JMenuItem();
105- jMenu2 = new javax.swing.JMenu();
106-
107- jFileChooser1.setCurrentDirectory(null);
108- jFileChooser1.setDialogTitle("");
109-
110- setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
111- setTitle("タグ検索");
112-
113- jLabel1.setText(" URL:");
114-
115- jBtnSearch.setText("検索");
116- jBtnSearch.addActionListener(new java.awt.event.ActionListener() {
117- public void actionPerformed(java.awt.event.ActionEvent evt) {
118- jBtnSearchActionPerformed(evt);
119- }
120- });
121-
122- jPanel1.setBorder(javax.swing.BorderFactory.createTitledBorder("検索情報"));
123-
124- jTable1.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION);
125- jTable1.getTableHeader().setReorderingAllowed(false);
126-
127- org.jdesktop.beansbinding.ELProperty eLProperty = org.jdesktop.beansbinding.ELProperty.create("${serachDataList}");
128- org.jdesktop.swingbinding.JTableBinding jTableBinding = org.jdesktop.swingbinding.SwingBindings.createJTableBinding(org.jdesktop.beansbinding.AutoBinding.UpdateStrategy.READ_WRITE, this, eLProperty, jTable1);
129- org.jdesktop.swingbinding.JTableBinding.ColumnBinding columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${item}"));
130- columnBinding.setColumnName("項目名");
131- columnBinding.setColumnClass(String.class);
132- columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${htmltag}"));
133- columnBinding.setColumnName("タグ");
134- columnBinding.setColumnClass(String.class);
135- columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${htmlid}"));
136- columnBinding.setColumnName("ID");
137- columnBinding.setColumnClass(String.class);
138- columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${htmlclass}"));
139- columnBinding.setColumnName("クラス");
140- columnBinding.setColumnClass(String.class);
141- columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${around}"));
142- columnBinding.setColumnName("位置");
143- columnBinding.setColumnClass(String.class);
144- columnBinding = jTableBinding.addColumnBinding(org.jdesktop.beansbinding.ELProperty.create("${regexp}"));
145- columnBinding.setColumnName("抽出条件");
146- columnBinding.setColumnClass(String.class);
147- bindingGroup.addBinding(jTableBinding);
148- jTableBinding.bind();
149- jScrollPane1.setViewportView(jTable1);
150-
151- jBtnRowIns.setText("行挿入");
152- jBtnRowIns.addActionListener(new java.awt.event.ActionListener() {
153- public void actionPerformed(java.awt.event.ActionEvent evt) {
154- jBtnRowInsActionPerformed(evt);
155- }
156- });
157-
158- jBtnRowDel.setText("行削除");
159- jBtnRowDel.addActionListener(new java.awt.event.ActionListener() {
160- public void actionPerformed(java.awt.event.ActionEvent evt) {
161- jBtnRowDelActionPerformed(evt);
162- }
163- });
164-
165- jBtnRowCpy.setText("行コピー");
166- jBtnRowCpy.addActionListener(new java.awt.event.ActionListener() {
167- public void actionPerformed(java.awt.event.ActionEvent evt) {
168- jBtnRowCpyActionPerformed(evt);
169- }
170- });
171-
172- javax.swing.GroupLayout jPanel1Layout = new javax.swing.GroupLayout(jPanel1);
173- jPanel1.setLayout(jPanel1Layout);
174- jPanel1Layout.setHorizontalGroup(
175- jPanel1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
176- .addGroup(jPanel1Layout.createSequentialGroup()
177- .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
178- .addComponent(jBtnRowCpy)
179- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
180- .addComponent(jBtnRowDel)
181- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
182- .addComponent(jBtnRowIns))
183- .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 0, Short.MAX_VALUE)
184- );
185- jPanel1Layout.setVerticalGroup(
186- jPanel1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
187- .addGroup(jPanel1Layout.createSequentialGroup()
188- .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 140, javax.swing.GroupLayout.PREFERRED_SIZE)
189- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
190- .addGroup(jPanel1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
191- .addComponent(jBtnRowDel)
192- .addComponent(jBtnRowIns)
193- .addComponent(jBtnRowCpy)))
194- );
195-
196- jPanel2.setBorder(javax.swing.BorderFactory.createTitledBorder("検索結果"));
197-
198- jTxtRtn.setColumns(20);
199- jTxtRtn.setRows(5);
200- jScrollPane2.setViewportView(jTxtRtn);
201-
202- javax.swing.GroupLayout jPanel2Layout = new javax.swing.GroupLayout(jPanel2);
203- jPanel2.setLayout(jPanel2Layout);
204- jPanel2Layout.setHorizontalGroup(
205- jPanel2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
206- .addComponent(jScrollPane2, javax.swing.GroupLayout.DEFAULT_SIZE, 532, Short.MAX_VALUE)
207- );
208- jPanel2Layout.setVerticalGroup(
209- jPanel2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
210- .addComponent(jScrollPane2, javax.swing.GroupLayout.DEFAULT_SIZE, 156, Short.MAX_VALUE)
211- );
212-
213- jMenu1.setText("ファイル");
214-
215- jMenuLoad.setText("LOAD");
216- jMenuLoad.addActionListener(new java.awt.event.ActionListener() {
217- public void actionPerformed(java.awt.event.ActionEvent evt) {
218- jMenuLoadActionPerformed(evt);
219- }
220- });
221- jMenu1.add(jMenuLoad);
222-
223- jMenuSave.setText("SAVE");
224- jMenuSave.addActionListener(new java.awt.event.ActionListener() {
225- public void actionPerformed(java.awt.event.ActionEvent evt) {
226- jMenuSaveActionPerformed(evt);
227- }
228- });
229- jMenu1.add(jMenuSave);
230-
231- jMenuBar1.add(jMenu1);
232-
233- jMenu3.setText("ツール");
234-
235- jMenuItem1.setText("ブラウザで表示");
236- jMenuItem1.addActionListener(new java.awt.event.ActionListener() {
237- public void actionPerformed(java.awt.event.ActionEvent evt) {
238- jMenuItem1ActionPerformed(evt);
239- }
240- });
241- jMenu3.add(jMenuItem1);
242-
243- jMenuBar1.add(jMenu3);
244-
245- jMenu2.setText("検索");
246- jMenu2.addMouseListener(new java.awt.event.MouseAdapter() {
247- public void mouseClicked(java.awt.event.MouseEvent evt) {
248- jMenu2MouseClicked(evt);
249- }
250- });
251- jMenuBar1.add(jMenu2);
252-
253- setJMenuBar(jMenuBar1);
254-
255- javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane());
256- getContentPane().setLayout(layout);
257- layout.setHorizontalGroup(
258- layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
259- .addGroup(layout.createSequentialGroup()
260- .addComponent(jLabel1)
261- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
262- .addComponent(jTxtUrl)
263- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
264- .addComponent(jBtnSearch))
265- .addComponent(jPanel2, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
266- .addComponent(jPanel1, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
267- );
268- layout.setVerticalGroup(
269- layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
270- .addGroup(layout.createSequentialGroup()
271- .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
272- .addComponent(jLabel1)
273- .addComponent(jTxtUrl, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
274- .addComponent(jBtnSearch))
275- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
276- .addComponent(jPanel1, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
277- .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
278- .addComponent(jPanel2, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))
279- );
280-
281- bindingGroup.bind();
282-
283- pack();
284- }// </editor-fold>//GEN-END:initComponents
285-
286- private void jBtnRowInsActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowInsActionPerformed
287- int SelectedRow = jTable1.getSelectedRow();
288- SearchData sdat = new SearchData();
289-
290- if(SelectedRow >= 0) {
291- this.serachDataList.add(SelectedRow, sdat);
292- } else {
293- this.serachDataList.add(sdat);
294- }
295- }//GEN-LAST:event_jBtnRowInsActionPerformed
296-
297- private void jBtnRowDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowDelActionPerformed
298- int SelectedRow = jTable1.getSelectedRow();
299- if(!(SelectedRow < 0)) {
300- this.serachDataList.remove(SelectedRow);
301- }
302- }//GEN-LAST:event_jBtnRowDelActionPerformed
303-
304- private void jMenuLoadActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuLoadActionPerformed
305- jFileChooser1.setDialogTitle("読込");
306- int selected = jFileChooser1.showOpenDialog(this);
307- if (selected == JFileChooser.APPROVE_OPTION) {
308- File file = jFileChooser1.getSelectedFile();
309- serachDataList.clear();
310- sio.load(file);
311- jTxtUrl.setText(sio.geturl());
312- serachDataList.addAll(sio.getslist());
313- }
314- }//GEN-LAST:event_jMenuLoadActionPerformed
315-
316- private void jMenuSaveActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuSaveActionPerformed
317- jFileChooser1.setDialogTitle("保存");
318- int selected = jFileChooser1.showSaveDialog(this);
319- if (selected == JFileChooser.APPROVE_OPTION) {
320- File file = jFileChooser1.getSelectedFile();
321- sio.seturl(jTxtUrl.getText());
322- sio.setslist(slist);
323- sio.save(file);
324- }
325- }//GEN-LAST:event_jMenuSaveActionPerformed
326-
327- private void jMenu2MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jMenu2MouseClicked
328- jTxtRtn.setText(null);
329- HtmlParser par = new HtmlParser(jTxtUrl.getText());
330-
331- String strdata = par.getStringPageData();
332- String strsearch = "一致する銘柄は見つかりませんでした";
333- if(check404(strdata, strsearch)) {
334- jTxtRtn.append(strsearch);
335- return;
336- }
337-
338- for (Object slist1 : slist) {
339- SearchData sdata = (SearchData)slist1;
340- String ans = sdata.getitem();
341- String rtn = par.search(sdata);
342- jTxtRtn.append(ans + "\t" + rtn + "\r\n");
343- }
344- jTxtRtn.setCaretPosition(0);
345- }//GEN-LAST:event_jMenu2MouseClicked
346-
347- private void jBtnRowCpyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowCpyActionPerformed
348- int SelectedRow = jTable1.getSelectedRow();
349- if(SelectedRow >= 0) {
350- SearchData SelectData = (SearchData)slist.get(SelectedRow);
351- SearchData Cpydata = new SearchData(SelectData);
352- this.serachDataList.add(SelectedRow, Cpydata);
353- }
354- }//GEN-LAST:event_jBtnRowCpyActionPerformed
355-
356- private void jBtnSearchActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnSearchActionPerformed
357- jTxtRtn.setText(null);
358- HtmlParser par = new HtmlParser(jTxtUrl.getText());
359-
360- String strdata = par.getStringPageData();
361- String strsearch = "一致する銘柄は見つかりませんでした";
362- if(check404(strdata, strsearch)) {
363- jTxtRtn.append(strsearch);
364- return;
365- }
366-
367- for (Object slist1 : slist) {
368- SearchData sdata = (SearchData)slist1;
369- String ans = sdata.getitem();
370- String rtn = par.search(sdata);
371- jTxtRtn.append(ans + "\t" + rtn + "\r\n");
372- }
373- jTxtRtn.setCaretPosition(0);
374- }//GEN-LAST:event_jBtnSearchActionPerformed
375-
376- private void jMenuItem1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuItem1ActionPerformed
377- Desktop desktop = Desktop.getDesktop();
378- String uriString = jTxtUrl.getText();
379- try {
380- URI uri = new URI(uriString);
381- desktop.browse(uri);
382-
383- } catch (URISyntaxException | IOException ex) {
384- Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex);
385- }
386- }//GEN-LAST:event_jMenuItem1ActionPerformed
387-
388- boolean check404(String strdata, String strsearch) {
389- if(strdata.contains(strsearch)) {
390- return true;
391- }
392- return false;
393- }
394-
395- /**
396- * @param args the command line arguments
397- */
398- public static void main(String args[]) {
399- /* Set the Nimbus look and feel */
400- //<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) ">
401- /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel.
402- * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html
403- */
404- try {
405- for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) {
406- if ("Nimbus".equals(info.getName())) {
407- javax.swing.UIManager.setLookAndFeel(info.getClassName());
408- break;
409- }
410- }
411- } catch (ClassNotFoundException ex) {
412- java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
413- } catch (InstantiationException ex) {
414- java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
415- } catch (IllegalAccessException ex) {
416- java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
417- } catch (javax.swing.UnsupportedLookAndFeelException ex) {
418- java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
419- }
420- //</editor-fold>
421-
422- /* Create and display the form */
423- java.awt.EventQueue.invokeLater(new Runnable() {
424- @Override
425- public void run() {
426- new HtmlSearch().setVisible(true);
427- }
428- });
429- }
430-
431- // Variables declaration - do not modify//GEN-BEGIN:variables
432- private javax.swing.JButton jBtnRowCpy;
433- private javax.swing.JButton jBtnRowDel;
434- private javax.swing.JButton jBtnRowIns;
435- private javax.swing.JButton jBtnSearch;
436- private javax.swing.JFileChooser jFileChooser1;
437- private javax.swing.JLabel jLabel1;
438- private javax.swing.JMenu jMenu1;
439- private javax.swing.JMenu jMenu2;
440- private javax.swing.JMenu jMenu3;
441- private javax.swing.JMenuBar jMenuBar1;
442- private javax.swing.JMenuItem jMenuItem1;
443- private javax.swing.JMenuItem jMenuLoad;
444- private javax.swing.JMenuItem jMenuSave;
445- private javax.swing.JPanel jPanel1;
446- private javax.swing.JPanel jPanel2;
447- private javax.swing.JScrollPane jScrollPane1;
448- private javax.swing.JScrollPane jScrollPane2;
449- private javax.swing.JTable jTable1;
450- private javax.swing.JTextArea jTxtRtn;
451- private javax.swing.JTextField jTxtUrl;
452- private org.jdesktop.beansbinding.BindingGroup bindingGroup;
453- // End of variables declaration//GEN-END:variables
454-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/AttributeData.java (revision 114)
+++ trunk/HtmlTest2/src/WebScraping/AttributeData.java (nonexistent)
@@ -1,163 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package WebScraping;
24-
25-import java.util.ArrayList;
26-import java.util.Enumeration;
27-import javax.swing.text.MutableAttributeSet;
28-import javax.swing.text.html.HTML;
29-
30-/**
31- * HTMLタグの属性情報を保持する.
32- * @author kgto
33- */
34-public class AttributeData {
35-
36- public AttributeData() {
37- AttrList = new ArrayList();
38- size = 0;
39- }
40-
41- /**
42- * 属性情報追加.
43- * @param tag
44- * @param attr
45- */
46- public void add(HTML.Tag tag, MutableAttributeSet attr) {
47-
48- int tagcount = tagcnt(tag);
49- ++tagcount;
50-
51- Enumeration e = attr.getAttributeNames();
52- while(e.hasMoreElements()) {
53- Object obj = e.nextElement();
54-
55- AttrData a = new AttrData();
56- a.tag = tag;
57- a.count = tagcount;
58- a.attrname = obj.toString();
59- a.attrvalue = attr.getAttribute(obj).toString();
60-
61- AttrList.add(a);
62- size = AttrList.size();
63- }
64-
65- }
66-
67- /**
68- * 属性情報検索.
69- * @param tag
70- * @param attrname
71- * @param attrvalue
72- * @return
73- */
74- public boolean search(HTML.Tag tag, String attrname, String attrvalue) {
75- boolean ret = false;
76- for (Object AttrList1 : AttrList) {
77- AttrData a = (AttrData)AttrList1;
78- if(a.tag == tag) {
79- if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) {
80- ret = true;
81- }
82- }
83- }
84- return ret;
85- }
86-
87- public boolean searchId(HTML.Tag tag, String attrvalue) {
88- return search(tag, "id", attrvalue);
89- }
90-
91- public boolean searchClass(HTML.Tag tag, String attrvalue) {
92- return search(tag, "class", attrvalue);
93- }
94-
95- /**
96- * 属性の値を取得する.
97- * @param tag
98- * @param attrname
99- * @return
100- */
101- public ArrayList getvale(HTML.Tag tag, String attrname) {
102- ArrayList ret = new ArrayList();
103- for (Object AttrList1 : AttrList) {
104- AttrData a = (AttrData)AttrList1;
105- if(a.tag == tag) {
106- if(a.attrname.equals(attrname)) {
107- ret.add(a.attrvalue);
108- }
109- }
110- }
111- return ret;
112- }
113-
114- /**
115- * 引数で渡されたTAGの最新カウント数を返す.
116- * @param tag
117- * @return
118- */
119- private int tagcnt(HTML.Tag tag) {
120- int wkcnt = 0;
121- for (Object AttrList1 : AttrList) {
122- AttrData a = (AttrData)AttrList1;
123- if(a.tag == tag) {
124- if(wkcnt < a.count) {
125- wkcnt = a.count;
126- }
127- }
128- }
129- return wkcnt;
130- }
131-
132- // AttrList の内容を返すメソッド
133- public HTML.Tag gettag(int i) {
134- AttrData a = (AttrData)AttrList.get(i);
135- return a.tag;
136- }
137-
138- public int getcount(int i) {
139- AttrData a = (AttrData)AttrList.get(i);
140- return a.count;
141- }
142-
143- public String getattrname(int i) {
144- AttrData a = (AttrData)AttrList.get(i);
145- return a.attrname;
146- }
147-
148- public String getattrvalue(int i) {
149- AttrData a = (AttrData)AttrList.get(i);
150- return a.attrvalue;
151- }
152-
153- // フィールド変数
154- public class AttrData {
155- public HTML.Tag tag;
156- public int count;
157- public String attrname;
158- public String attrvalue;
159- }
160- public ArrayList AttrList;
161- public int size; // AttrListのサイズ
162-
163-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/DebugProcess.java (revision 114)
+++ trunk/HtmlTest2/src/WebScraping/DebugProcess.java (nonexistent)
@@ -1,264 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package WebScraping;
24-
25-import java.io.File;
26-import java.io.FileInputStream;
27-import java.io.FileNotFoundException;
28-import java.io.IOException;
29-import java.util.logging.FileHandler;
30-import java.util.logging.Formatter;
31-import java.util.logging.Handler;
32-import java.util.logging.Level;
33-import java.util.logging.LogManager;
34-import java.util.logging.LogRecord;
35-import java.util.logging.Logger;
36-import javax.swing.text.MutableAttributeSet;
37-import javax.swing.text.html.HTML;
38-
39-/**
40- * デバック情報.
41- * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。
42- * @author kgto
43- */
44-public class DebugProcess {
45- // 設定ファイル名
46- protected static final String configurationFilename = "Debug.prop";
47- // ロガー名
48- protected static final Logger logger = Logger.getLogger("WebScraping");
49- // ログ出力デフォルトレベル
50- protected static final Level loggerlevel = Level.FINEST;
51-
52-
53- /**
54- * ログ出力設定.
55- * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、
56- * ファイルハンドラの設定と出力書式の設定を行う。
57- */
58- public static void debuglog_set() {
59- try {
60- initLogConfiguration();
61-
62- if(Level.ALL.equals(logger.getLevel())) {
63- //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2));
64- logger.addHandler(new FileHandler("WebScraping%g.log", true));
65- }
66- setFomatter();
67-
68- } catch (IOException | SecurityException ex) {
69- Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
70- }
71- }
72-
73- /**
74- * ログ出力設定解除.
75- */
76- public static void debuglog_unset() {
77- }
78-
79-
80- /**
81- * デバック出力(HTML解析-タグ&属性).
82- * HTMLのタグと属性の解析状態を出力する。
83- * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br>
84- * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br>
85- * @param tag タグ
86- * @param attr 属性
87- * @param methodname このメソッドを呼び出した親メソッド名
88- * @param count HTMLタグの階層レベル
89- */
90- public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr,
91- String methodname, int count) {
92-
93- // ログ出力レベルチェック
94- if(logger.getLevel() == null) {
95- return;
96- }
97- if(logger.getLevel().intValue() > loggerlevel.intValue()) {
98- return;
99- }
100-
101- // 編集処理
102- char kbn = ' ';
103- if("handleStartTag".equals(methodname)) {
104- kbn = 'F';
105- }
106- if("handleEndTag".equals(methodname)) {
107- kbn = 'E';
108- }
109- if("handleSimpleTag".equals(methodname)) {
110- kbn = 'S';
111- }
112-
113- StringBuilder strBuf = new StringBuilder(80);
114- strBuf.append(count).append(" : ");
115- strBuf.append(kbn).append(" : ");
116- strBuf.append(tag.toString());
117- // 属性情報
118- if(attr != null) {
119- if(attr.getAttributeCount() != 0) {
120- AttributeData handleAttrData = new AttributeData();
121- handleAttrData.add(tag, attr);
122- for(int i = 0; i < handleAttrData.size; i++) {
123- strBuf.append(" [");
124- strBuf.append(handleAttrData.getattrname(i));
125- strBuf.append("]");
126- strBuf.append(handleAttrData.getcount(i));
127- strBuf.append(" = ");
128- strBuf.append(handleAttrData.getattrvalue(i));
129- }
130- }
131- }
132-
133- logger.log(loggerlevel, strBuf.toString());
134- }
135-
136- /**
137- * デバック出力(メッセージ).
138- * 引数に渡された任意のメッセージを出力する。
139- * @param str メッセージ
140- * @param methodname このメソッドを呼び出した親メソッド名
141- */
142- public static void htmlinfo(String str, String methodname) {
143- logger.log(loggerlevel, str);
144- }
145-
146- public static void htmlinfo(String str) {
147- logger.log(loggerlevel, str);
148- }
149-
150- /**
151- * デバック出力(HTML解析-本文).
152- * 本文の内容を出力する。
153- * @param data 本文(HTML内の文字列)
154- * @param methodname このメソッドを呼び出した親メソッド名
155- */
156- public static void htmlinfo(char[] data, String methodname) {
157- String dat = new String(data);
158- logger.log(loggerlevel, dat);
159- }
160-
161- public static void htmlinfo(char[] data) {
162- String dat = new String(data);
163- logger.log(loggerlevel, dat);
164- }
165-
166- /**
167- * デバック出力(検索キー).
168- * 検索キー(SearchData)の内容を出力する。
169- * @param skey
170- */
171- public static void searchDatainfo(SearchData skey) {
172-
173- StringBuilder strBuf = new StringBuilder(30);
174- strBuf.append("SearchData KEY tag[");
175- strBuf.append(skey.getHtmltag());
176- strBuf.append("] ID[");
177- strBuf.append(skey.getHtmlid());
178- strBuf.append("] CLASS[");
179- strBuf.append(skey.getHtmlclass());
180- strBuf.append("]\n");
181-
182- logger.log(loggerlevel, strBuf.toString());
183- }
184-
185- /**
186- * ログ出力設定ファイルチェック.
187- * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。
188- */
189- private static void initLogConfiguration() {
190-
191- File file = new File(configurationFilename);
192- try {
193- if(file.exists()) {
194- FileInputStream inputStream = new FileInputStream(file);
195- // 設定ファイルの読み込み
196- LogManager.getLogManager().readConfiguration(inputStream);
197- }
198-
199- } catch (FileNotFoundException ex) {
200- Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
201- } catch (IOException ex) {
202- Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
203- }
204- }
205-
206- /**
207- * ログ出力フォーマッター設定.
208- * ファイルへログ出力時の書式を設定する。
209- */
210- private static void setFomatter() {
211- Handler[] handlers = logger.getHandlers();
212- for(int i = 0 ; i < handlers.length ; i++) {
213- if(handlers[i] instanceof java.util.logging.FileHandler) {
214- handlers[i].setFormatter(new HtmlFormatter());
215- }
216- }
217- }
218-
219-}
220-
221-/**
222- * ログ出力フォーマッター.
223- * @author kgto
224- */
225-class HtmlFormatter extends Formatter {
226- /**
227- * Logの出力文字列を生成する。
228- * 出力書式:<br>
229- * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ
230- */
231- @Override
232- public synchronized String format(final LogRecord aRecord) {
233-
234- final StringBuffer message = new StringBuffer(100);
235-
236- long millis = aRecord.getMillis();
237- String time = String.format("%tF %<tT", millis);
238-
239- message.append(time);
240- message.append(' ');
241-
242- message.append(aRecord.getLevel());
243- message.append('<');
244- String methodName = aRecord.getSourceMethodName();
245- message.append(methodName != null ? methodName : "N/A");
246- message.append('>');
247-
248- message.append(formatMessage(aRecord));
249- message.append('\n');
250-
251- // 例外エラーの場合、エラー内容とスタックトレース出力
252- Throwable throwable = aRecord.getThrown();
253- if (throwable != null) {
254- message.append(throwable.toString());
255- message.append('\n');
256- for (StackTraceElement trace : throwable.getStackTrace()) {
257- message.append('\t');
258- message.append(trace.toString());
259- message.append('\n');
260- }
261- }
262- return message.toString();
263- }
264-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/HtmlParser.java (revision 114)
+++ trunk/HtmlTest2/src/WebScraping/HtmlParser.java (nonexistent)
@@ -1,255 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package WebScraping;
24-
25-import java.io.*;
26-import java.net.*;
27-import java.util.ArrayList;
28-import java.util.logging.Level;
29-import java.util.logging.Logger;
30-import java.util.regex.Matcher;
31-import java.util.regex.Pattern;
32-import javax.swing.text.html.parser.ParserDelegator;
33-
34-/**
35- *
36- * @author kgto
37- */
38-public class HtmlParser {
39-
40- URL url;
41- String pageData;
42- ArrayList sData;
43-
44- // 作業ワーク
45- String htmltag;
46- String htmlid;
47- String htmlclass;
48-
49- public HtmlParser(URL UrlAdress) {
50- DebugProcess.debuglog_set();
51- this.url = UrlAdress;
52- getPageData();
53- }
54-
55- public HtmlParser(String UrlAdress) {
56- DebugProcess.debuglog_set();
57- try {
58- url = new URL(UrlAdress);
59- getPageData();
60-
61- } catch (MalformedURLException ex) {
62- Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
63- }
64- }
65-
66- public HtmlParser() {
67- DebugProcess.debuglog_set();
68- url = null;
69- }
70-
71- public String getStringPageData() {
72- return pageData;
73- }
74-
75- public void seturl(URL UrlAdress) {
76- this.url = UrlAdress;
77- getPageData();
78- }
79-
80- public void seturl(String UrlAdress) {
81- try {
82- url = new URL(UrlAdress);
83- getPageData();
84-
85- } catch (MalformedURLException ex) {
86- Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
87- }
88- }
89-
90- /**
91- * HTMLページ内検索.
92- * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、
93- * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を
94- * 行った結果を返す。<br>
95- * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br>
96- * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br>
97- * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。
98- * @param skey 検索キーデータ(SearchData)
99- * @return String 検索キーに一致するデータの文字列
100- */
101- public String search(SearchData skey) {
102-
103- // htmlページ内を検索
104- if(isHtmlkeyEq(skey) == false) {
105- searchPageData(skey);
106- }
107- /*
108- around 出現位置指定 入力有り:指定された位置の情報のみ返す。
109- 入力無し:取得した全ての情報を返す。
110- */
111- String regexp = skey.getregexp();
112- if(skey.getaround().length() > 0) {
113- int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換
114- if(wkAround < sData.size()) {
115- String str = (String)sData.get(wkAround);
116- String rtn = RegularExpression(str, regexp);
117- return rtn;
118- }
119- } else {
120- StringBuilder strbuf = new StringBuilder();
121- for (Object sData1 : sData) {
122- String str = (String)sData1;
123- String rtn = RegularExpression(str, regexp);
124- if(strbuf.length() > 0) {
125- strbuf.append("\t");
126- }
127- strbuf.append(rtn);
128- }
129- return strbuf.toString();
130- }
131- return null;
132- }
133-
134- /**
135- * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する.
136- * @param skey HTMLタグ/ID/CLASSが格納された検索キー
137- * @return boolean HTMLタグ/ID/CLASS値が一致する時、true
138- */
139- boolean isHtmlkeyEq(SearchData skey) {
140-
141- String stag = skey.getHtmltag();
142- String sid = skey.getHtmlid();
143- String sclass = skey.getHtmlclass();
144-
145- boolean rtn = true;
146-
147- // htmltag
148- if(htmltag == null) {
149- rtn = false;
150- } else {
151- if(htmltag.equals(stag) == false) {
152- rtn = false;
153- }
154- }
155-
156- // htmlid
157- if(htmlid == null) {
158- rtn = false;
159- } else {
160- if(htmlid.equals(sid) == false) {
161- rtn = false;
162- }
163- }
164-
165- // htmlclass
166- if(htmlclass == null) {
167- rtn = false;
168- } else {
169- if(htmlclass.equals(sclass) == false) {
170- rtn = false;
171- }
172- }
173-
174- if(!rtn) {
175- htmltag = stag;
176- htmlid = sid;
177- htmlclass = sclass;
178- }
179-
180- return rtn;
181- }
182-
183- /**
184- * 正規表現検索.
185- * @param strdata
186- * @param regexp
187- * @return
188- */
189- String RegularExpression(String strdata, String regexp) {
190- String expdata = null;
191-
192- //regexpのチェック
193- if(regexp.isEmpty()) {
194- expdata = strdata;
195- return expdata;
196- }
197-
198- //正規表現検索
199- Pattern ptn = Pattern.compile(regexp);
200- Matcher matchdata = ptn.matcher(strdata);
201- if (matchdata.find()) {
202- if(matchdata.groupCount() >= 1) {
203- expdata = matchdata.group(1);
204- }
205- }
206- return expdata;
207- }
208-
209- /**
210- * インターネット接続.
211- */
212- private void getPageData() {
213- try {
214- //URL url = new URL(UrlAdress);
215- HttpURLConnection con = (HttpURLConnection)url.openConnection();
216- con.setRequestMethod("GET");
217- BufferedReader reader = new BufferedReader(
218- new InputStreamReader(con.getInputStream(), "utf-8"));
219- String wkline;
220- StringBuilder sb = new StringBuilder();
221- while((wkline = reader.readLine()) != null) {
222- sb.append(wkline).append("\n");
223- }
224- pageData = sb.toString();
225-
226- con.disconnect();
227- }
228- catch(IOException ex) {
229- Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
230- }
231- }
232-
233- /**
234- * HTMLパーサ.
235- * @param skey
236- */
237- private void searchPageData(SearchData skey) {
238-
239- DebugProcess.searchDatainfo(skey);
240-
241- Reader reader;
242- try {
243- reader = new BufferedReader(new StringReader(pageData));
244- HtmlParserCallback cb = new HtmlParserCallback(skey);
245- ParserDelegator pd = new ParserDelegator();
246- pd.parse(reader, cb, true);
247- reader.close();
248-
249- sData = cb.getrtnData();
250-
251- } catch (IOException ex) {
252- Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
253- }
254- }
255-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/SearchData.java (revision 114)
+++ trunk/HtmlTest2/src/WebScraping/SearchData.java (nonexistent)
@@ -1,113 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package WebScraping;
24-
25-/**
26- * 検索データ.
27- * @author kgto
28- */
29-public class SearchData {
30-
31- private String item;
32- private String htmltag;
33- private String htmlid;
34- private String htmlclass;
35- private String around;
36- private String regexp;
37-
38- public SearchData() {
39- initialize();
40- }
41-
42- public SearchData(SearchData dat) {
43- this.item = dat.getitem();
44- this.htmltag = dat.getHtmltag();
45- this.htmlid = dat.getHtmlid();
46- this.htmlclass = dat.getHtmlclass();
47- this.around = dat.getaround();
48- this.regexp = dat.getregexp();
49- }
50-
51- /**
52- * データ初期化.
53- */
54- public final void initialize() {
55- this.item = "";
56- this.htmltag = "";
57- this.htmlid = "";
58- this.htmlclass = "";
59- this.around = "";
60- this.regexp = "";
61- }
62-
63- // Setter
64- public void setitem(String item) {
65- this.item = item;
66- }
67-
68- public void setHtmltag(String htmltag) {
69- this.htmltag = htmltag;
70- }
71-
72- public void setHtmlid(String htmlid) {
73- this.htmlid = htmlid;
74- }
75-
76- public void setHtmlclass(String htmlclass) {
77- this.htmlclass = htmlclass;
78- }
79-
80- public void setaround(String around) {
81- this.around = around;
82- }
83-
84- public void setregexp(String regexp) {
85- this.regexp = regexp;
86- }
87-
88- // Getter
89- public String getitem() {
90- return item;
91- }
92-
93- public String getHtmltag() {
94- return htmltag;
95- }
96-
97- public String getHtmlid() {
98- return htmlid;
99- }
100-
101- public String getHtmlclass() {
102- return htmlclass;
103- }
104-
105- public String getaround() {
106- return around;
107- }
108-
109- public String getregexp() {
110- return regexp;
111- }
112-
113-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/HtmlParserCallback.java (revision 114)
+++ trunk/HtmlTest2/src/WebScraping/HtmlParserCallback.java (nonexistent)
@@ -1,211 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-/*
20- * $Id$
21- */
22-
23-package WebScraping;
24-
25-import java.util.ArrayList;
26-import java.util.HashMap;
27-import javax.swing.text.MutableAttributeSet;
28-import javax.swing.text.html.HTML;
29-import javax.swing.text.html.HTMLEditorKit;
30-
31-/**
32- * HTMLパーサ部品.
33- * @author kgto
34- */
35-class HtmlParserCallback extends HTMLEditorKit.ParserCallback {
36-
37- // Tag毎の階層
38- HashMap<HTML.Tag,Integer> tagMap = new HashMap<>();
39-
40- // serach key 情報
41- String keytag;
42- String keyid;
43- String keyclass;
44-
45- // serach key と一致時の情報退避
46- int bufCount = 0;
47- HTML.Tag bufTag = null;
48- // serach key と一致時の情報格納ワーク
49- StringBuilder bufText;
50-
51- // serach key と一致時のデータ一覧
52- ArrayList sData;
53-
54- // 属性データ
55- AttributeData attrdata;
56-
57- protected HtmlParserCallback(SearchData skey) {
58-
59- // キー情報展開
60- keytag = skey.getHtmltag();
61- keyid = skey.getHtmlid();
62- keyclass = skey.getHtmlclass();
63-
64- sData = new ArrayList();
65- }
66-
67- ArrayList getrtnData() {
68- return this.sData;
69- }
70-
71- @Override
72- public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
73- // Tag毎の階層を保持
74- int count = 1;
75- if(tagMap.containsKey(tag)) {
76- count = tagMap.get(tag);
77- count++;
78- }
79- tagMap.put(tag, count);
80-
81- // 属性解析
82- AttributeData handleStartattrdata = new AttributeData();
83- handleStartattrdata.add(tag, attr);
84-
85- DebugProcess.htmlinfo(tag, attr, "handleStartTag", count);
86-
87- if(bufCount == 0) {
88- if(tag.toString().equals(keytag)) {
89- //if(serachAttribute(attr)) {
90- if(serachAttribute(tag, handleStartattrdata)) {
91- bufCount = count;
92- bufTag = tag;
93- attrdata = new AttributeData();
94- bufText = new StringBuilder();
95- }
96- }
97- }
98- if(bufCount > 0) {
99- attrdata.add(tag, attr);
100- }
101- }
102-
103- @Override
104- public void handleEndTag(HTML.Tag tag, int pos){
105- // Tag毎の階層を取得
106- int count = 0;
107- if(tagMap.containsKey(tag)) {
108- count = tagMap.get(tag);
109- }
110-
111- DebugProcess.htmlinfo(tag, null, "handleEndTag", count);
112-
113- if(tag.equals(bufTag) && count <= bufCount) {
114-
115- // 溜め込んだ一致情報をリストへ格納
116- sData.add(bufText.toString());
117-
118- // 退避したserach keyとの一致情報クリア
119- bufCount = 0;
120- bufTag = null;
121- bufText = null;
122- }
123-
124- // Tag毎の階層減算
125- tagMap.put(tag, --count);
126- }
127-
128- @Override
129- public void handleText(char[] data, int pos){
130-
131- DebugProcess.htmlinfo(data, "handleText");
132-
133- String splitchar = "\t";
134- //制御文字の削除
135- // &nbsp; 0xa0
136- StringBuilder buf = new StringBuilder();
137- for(int i = 0; i < data.length; i++) {
138- if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) {
139- buf.append(data[i]);
140- }
141- }
142- if(bufCount > 0) {
143- if(bufText.length() > 0) {
144- bufText.append(splitchar);
145- }
146- bufText.append(buf.toString());
147- }
148- }
149-
150- @Override
151- public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
152- if(bufCount > 0) {
153- attrdata.add(tag, attr);
154- }
155- DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0);
156- }
157-
158- /**
159- * ページ内のID/CLASS値と検索キーを比較する.
160- * @param attr ページのMutableAttributeSet
161- * @return boolean 検索キーと一致の時、true
162- */
163- boolean serachAttribute(MutableAttributeSet attr) {
164- String currentID = (String)attr.getAttribute(HTML.Attribute.ID);
165- String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS);
166-
167- if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
168- if(keyid.equals(currentID) && keyclass.equals(currentClass)) {
169- return true;
170- }
171- }
172-
173- if(keyid.isEmpty() == false) {
174- if(keyid.equals(currentID)) {
175- return true;
176- }
177- }
178-
179- if(keyclass.isEmpty() == false) {
180- if(keyclass.equals(currentClass)) {
181- return true;
182- }
183- }
184-
185- return false;
186- }
187-
188- /**
189- * ページ内のID/CLASS値と検索キーを比較する.
190- * @param tag
191- * @param attrdata
192- * @return boolean 検索キーと一致の時、true
193- */
194- boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) {
195- // ID と CLASS の両方にキー入力有りの場合
196- if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
197- if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) {
198- return true;
199- }
200- }
201- // ID のキーチェック
202- if(keyid.isEmpty() == false) {
203- return attrdata.searchId(tag, keyid);
204- }
205- // CLASS のキーチェック
206- if(keyclass.isEmpty() == false) {
207- return attrdata.searchClass(tag, keyclass);
208- }
209- return false;
210- }
211-}
Deleted: svn:keywords
## -1 +0,0 ##
-Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/utility/SearchDataRW.java (nonexistent)
+++ trunk/HtmlTest2/src/WebScraping/utility/SearchDataRW.java (revision 115)
@@ -0,0 +1,547 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id$
21+ */
22+
23+package webScraping.utility;
24+
25+import webScraping.core.SearchData;
26+import java.io.BufferedReader;
27+import java.io.BufferedWriter;
28+import java.io.File;
29+import java.io.FileInputStream;
30+import java.io.FileNotFoundException;
31+import java.io.FileOutputStream;
32+import java.io.IOException;
33+import java.io.InputStreamReader;
34+import java.io.OutputStreamWriter;
35+import java.util.ArrayList;
36+import java.util.logging.Level;
37+import java.util.logging.Logger;
38+import javax.xml.parsers.DocumentBuilder;
39+import javax.xml.parsers.DocumentBuilderFactory;
40+import javax.xml.parsers.ParserConfigurationException;
41+import javax.xml.transform.Transformer;
42+import javax.xml.transform.TransformerConfigurationException;
43+import javax.xml.transform.TransformerException;
44+import javax.xml.transform.TransformerFactory;
45+import javax.xml.transform.dom.DOMSource;
46+import javax.xml.transform.stream.StreamResult;
47+import org.w3c.dom.DOMImplementation;
48+import org.w3c.dom.Document;
49+import org.w3c.dom.Element;
50+import org.w3c.dom.Node;
51+import org.w3c.dom.NodeList;
52+import org.xml.sax.SAXException;
53+
54+/**
55+ *
56+ * @author kgto
57+ */
58+public class SearchDataRW {
59+
60+ DocumentBuilder builder;
61+ public Document document;
62+ Element root;
63+
64+ private final String splitchar = "\t";
65+
66+ private String UrlAdress;
67+ private ArrayList<SearchData> slist = new ArrayList<>();
68+
69+ public SearchDataRW() {
70+ try {
71+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
72+ builder = factory.newDocumentBuilder();
73+
74+ } catch (ParserConfigurationException ex) {
75+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
76+ }
77+ }
78+
79+ public void seturl(String UrlAdress) {
80+ this.UrlAdress = UrlAdress;
81+ }
82+
83+ public void setslist(ArrayList slist) {
84+ this.slist = slist;
85+ }
86+
87+ public String geturl() {
88+ return UrlAdress;
89+ }
90+
91+ public ArrayList getslist() {
92+ return slist;
93+ }
94+
95+ /**
96+ * 保存.
97+ * @param file
98+ */
99+ public void save(File file) {
100+ //saveCsv(file);
101+ //saveXml(file);
102+
103+ saveUrl(UrlAdress);
104+ saveSearchList(slist);
105+ write(file);
106+ }
107+
108+ /**
109+ * 読込.
110+ * @param file
111+ */
112+ public void load(File file) {
113+ //loadCsv(file);
114+ //loadXml(file);
115+
116+ read(file);
117+ loadUrl();
118+ loadSearchList();
119+ }
120+
121+ /* ---------------------------------------------------------------------- */
122+ /**
123+ * 保存(CSV形式).
124+ * @param file
125+ */
126+ public void saveCsv(File file) {
127+ BufferedWriter bufferedwriter = null;
128+ try {
129+ //空のファイルを作成
130+ file.createNewFile();
131+ FileOutputStream fileoutputstream = new FileOutputStream(file);
132+ OutputStreamWriter outputstreamwriter = new OutputStreamWriter(fileoutputstream, "UTF-8");
133+ bufferedwriter = new BufferedWriter(outputstreamwriter);
134+
135+ // URL
136+ bufferedwriter.write(UrlAdress);
137+ bufferedwriter.write("\n");
138+ // 検索情報
139+ for(Object slist1 : slist) {
140+ SearchData sdat = (SearchData)slist1;
141+ //
142+ StringBuilder str = new StringBuilder();
143+ str.append(sdat.getitem()).append(splitchar);
144+ str.append(sdat.getHtmltag()).append(splitchar);
145+ str.append(sdat.getHtmlid()).append(splitchar);
146+ str.append(sdat.getHtmlclass()).append(splitchar);
147+ str.append(sdat.getaround()).append(splitchar);
148+ str.append(sdat.getregexp()).append("\n");
149+ // 書込み
150+ bufferedwriter.write(str.toString());
151+ }
152+
153+ } catch (IOException ex) {
154+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
155+ } finally {
156+ try {
157+ if(bufferedwriter != null) {
158+ bufferedwriter.close();
159+ }
160+
161+ } catch (IOException ex) {
162+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
163+ }
164+ }
165+ }
166+
167+ /**
168+ * 読込(CSV形式).
169+ * @param file
170+ */
171+ public void loadCsv(File file) {
172+ slist = new ArrayList();
173+
174+ BufferedReader bufferedreader = null;
175+ try {
176+ FileInputStream fileinputstream = new FileInputStream(file);
177+ InputStreamReader inputstreamreader = new InputStreamReader(fileinputstream, "UTF-8");
178+ bufferedreader = new BufferedReader(inputstreamreader);
179+
180+ // URL
181+ UrlAdress = bufferedreader.readLine();
182+ // 検索情報
183+ String rec;
184+ while((rec = bufferedreader.readLine()) != null) {
185+ String[] recary = rec.split(splitchar, -1);
186+ SearchData sdat = new SearchData();
187+ sdat.setitem(recary[0]);
188+ sdat.setHtmltag(recary[1]);
189+ sdat.setHtmlid(recary[2]);
190+ sdat.setHtmlclass(recary[3]);
191+ sdat.setaround(recary[4]);
192+ sdat.setregexp(recary[5]);
193+
194+ slist.add(sdat);
195+ }
196+
197+ } catch(IOException ex) {
198+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
199+
200+ } finally {
201+ try {
202+ if(bufferedreader != null) {
203+ bufferedreader.close();
204+ }
205+
206+ } catch (IOException ex) {
207+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
208+ }
209+ }
210+ }
211+
212+ /* ---------------------------------------------------------------------- */
213+ /**
214+ * 保存(XML形式).
215+ * @param file
216+ */
217+ public void saveXml(File file) {
218+ try {
219+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
220+ DocumentBuilder wkBuilder = factory.newDocumentBuilder();
221+ DOMImplementation domImpl = wkBuilder.getDOMImplementation();
222+
223+ Document doc = domImpl.createDocument("","searchdata",null);
224+ Element wkRoot = doc.getDocumentElement();
225+
226+ // URL
227+ Element url = doc.createElement("url");
228+ url.appendChild(doc.createTextNode(UrlAdress));
229+ wkRoot.appendChild(url);
230+
231+ // 検索情報
232+ for (Object slist1 : slist) {
233+ SearchData sdat = (SearchData) slist1;
234+
235+ Element cslist = doc.createElement("searchlist");
236+ Element item = doc.createElement("item");
237+ Element htmltag = doc.createElement("htmltag");
238+ Element htmlid = doc.createElement("htmlid");
239+ Element htmlclass = doc.createElement("htmlclass");
240+ Element around = doc.createElement("around");
241+ Element regexp = doc.createElement("regexp");
242+
243+ item.appendChild(doc.createTextNode(sdat.getitem()));
244+ htmltag.appendChild(doc.createTextNode(sdat.getHtmltag()));
245+ htmlid.appendChild(doc.createTextNode(sdat.getHtmlid()));
246+ htmlclass.appendChild(doc.createTextNode(sdat.getHtmlclass()));
247+ around.appendChild(doc.createTextNode(sdat.getaround()));
248+ regexp.appendChild(doc.createTextNode(sdat.getregexp()));
249+
250+ cslist.appendChild(item);
251+ cslist.appendChild(htmltag);
252+ cslist.appendChild(htmlid);
253+ cslist.appendChild(htmlclass);
254+ cslist.appendChild(around);
255+ cslist.appendChild(regexp);
256+
257+ wkRoot.appendChild(cslist);
258+ }
259+ // 出力
260+ TransformerFactory transFactory = TransformerFactory.newInstance();
261+ Transformer transformer = transFactory.newTransformer();
262+
263+ DOMSource source = new DOMSource(doc);
264+ FileOutputStream os = new FileOutputStream(file);
265+ StreamResult result = new StreamResult(os);
266+ transformer.transform(source, result);
267+
268+ } catch (ParserConfigurationException | FileNotFoundException ex) {
269+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
270+ } catch (TransformerConfigurationException ex) {
271+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
272+ } catch (TransformerException ex) {
273+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
274+ }
275+ }
276+
277+ /**
278+ * 読込(XML形式).
279+ * @param file
280+ */
281+ public void loadXml(File file) {
282+ slist = new ArrayList();
283+
284+ try {
285+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
286+ DocumentBuilder wkBuilder = factory.newDocumentBuilder();
287+ Document doc = wkBuilder.parse(file);
288+
289+ // ルート要素の取得
290+ Element wkRoot = doc.getDocumentElement();
291+
292+ // URL
293+ NodeList url = wkRoot.getElementsByTagName("url");
294+ Node urlnode = url.item(0);
295+ UrlAdress = urlnode.getFirstChild().getNodeValue();
296+
297+ // 検索情報
298+ NodeList cslist = wkRoot.getElementsByTagName("searchlist");
299+ for(int i = 0; i < cslist.getLength(); i++) {
300+ SearchData sdat = new SearchData();
301+
302+ Node slistnode = cslist.item(i);
303+ Node child;
304+ for (child = slistnode.getFirstChild(); child != null; child = child.getNextSibling()) {
305+ if(child.getNodeType() == Node.ELEMENT_NODE) {
306+
307+ String tag = child.getNodeName();
308+ String rtn = "";
309+ if(child.getFirstChild() != null) {
310+ rtn = child.getFirstChild().getNodeValue();
311+ }
312+
313+ switch (tag) {
314+ case "item" :
315+ sdat.setitem(rtn);
316+ break;
317+ case "htmltag" :
318+ sdat.setHtmltag(rtn);
319+ break;
320+ case "htmlid" :
321+ sdat.setHtmlid(rtn);
322+ break;
323+ case "htmlclass" :
324+ sdat.setHtmlclass(rtn);
325+ break;
326+ case "around" :
327+ sdat.setaround(rtn);
328+ break;
329+ case "regexp" :
330+ sdat.setregexp(rtn);
331+ break;
332+ }
333+ }
334+ }
335+ slist.add(sdat);
336+ }
337+
338+ } catch (ParserConfigurationException | SAXException | IOException ex) {
339+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
340+ }
341+ }
342+
343+ /* ---------------------------------------------------------------------- */
344+
345+ void loadUrl() {
346+ NodeList nodelist = root.getElementsByTagName("url");
347+ Node node = nodelist.item(0);
348+ UrlAdress = node.getFirstChild().getNodeValue();
349+ }
350+
351+ public void loadSearchList() {
352+ slist.clear();
353+ SearchData.clear();
354+
355+ NodeList nodelist = root.getElementsByTagName("searchlist");
356+ for(int i = 0; i < nodelist.getLength(); i++) {
357+ Node childnode = nodelist.item(i);
358+
359+ boolean sdatflg = false;
360+ SearchData sdat = new SearchData();
361+ for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) {
362+ if(child.getNodeType() == Node.ELEMENT_NODE) {
363+ String tag = child.getNodeName();
364+ String rtn = "";
365+ if(child.getFirstChild() != null) {
366+ rtn = child.getFirstChild().getNodeValue();
367+ }
368+ switch (tag) {
369+ case "item" :
370+ sdat.setitem(rtn);
371+ sdatflg = true;
372+ break;
373+ case "htmltag" :
374+ sdat.setHtmltag(rtn);
375+ sdatflg = true;
376+ break;
377+ case "htmlid" :
378+ sdat.setHtmlid(rtn);
379+ sdatflg = true;
380+ break;
381+ case "htmlclass" :
382+ sdat.setHtmlclass(rtn);
383+ sdatflg = true;
384+ break;
385+ case "around" :
386+ sdat.setaround(rtn);
387+ sdatflg = true;
388+ break;
389+ case "regexp" :
390+ sdat.setregexp(rtn);
391+ sdatflg = true;
392+ break;
393+ }
394+ }
395+ }
396+ if(sdatflg) slist.add(sdat);
397+ if(sdatflg) SearchData.add(sdat);
398+ }
399+ }
400+
401+ public String loadMsg404() {
402+ StringBuilder strbuf = new StringBuilder();
403+ NodeList nodelist = root.getElementsByTagName("msg404");
404+ for(int i = 0; i < nodelist.getLength(); i++) {
405+ Node childnode = nodelist.item(i);
406+ String str = childnode.getFirstChild().getNodeValue();
407+ if(strbuf.length() > 0) {
408+ strbuf.append("\n");
409+ }
410+ strbuf.append(str);
411+ }
412+ return strbuf.toString();
413+ }
414+
415+ public Element loadElement(String elementTagName) {
416+ NodeList nodelist = root.getElementsByTagName(elementTagName);
417+ Element element = (Element)nodelist.item(0);
418+
419+ return element;
420+ }
421+
422+ /* ---------------------------------------------------------------------- */
423+
424+ void saveUrl(String urladdress) {
425+ checkdoc();
426+ removeElement("url"); // 既にElementが存在してた場合、一度削除
427+
428+ Element url = document.createElement("url");
429+ url.appendChild(document.createTextNode(urladdress));
430+ root.appendChild(url);
431+ }
432+
433+ void saveSearchList(ArrayList slist) {
434+ checkdoc();
435+ removeElement("searchlist"); // 既にElementが存在してた場合、一度削除
436+
437+ int count = 0;
438+ for (Object slist1 : slist) {
439+ SearchData sdat = (SearchData) slist1;
440+
441+ Element cslist = document.createElement("searchlist");
442+ cslist.setAttribute("listNo", String.valueOf(++count));
443+
444+ addChild(cslist, "item", sdat.getitem());
445+ addChild(cslist, "htmltag", sdat.getHtmltag());
446+ addChild(cslist, "htmlid", sdat.getHtmlid());
447+ addChild(cslist, "htmlclass", sdat.getHtmlclass());
448+ addChild(cslist, "around", sdat.getaround());
449+ addChild(cslist, "regexp", sdat.getregexp());
450+
451+ root.appendChild(cslist);
452+ }
453+ }
454+
455+ void saveMsg404(String msg) {
456+ checkdoc();
457+ removeElement("msg404"); // 既にElementが存在してた場合、一度削除
458+
459+ String[] msgs = msg.split("\n");
460+ int count = 0;
461+ for(String msgOne : msgs) {
462+ Element msgElement = document.createElement("msg404");
463+ msgElement.setAttribute("No", String.valueOf(++count));
464+ msgElement.appendChild(document.createTextNode(msgOne));
465+
466+ root.appendChild(msgElement);
467+ }
468+ }
469+
470+ public void saveElement(Element element) {
471+ checkdoc();
472+ removeElement(element.getTagName()); // 既にElementが存在してた場合、一度削除
473+
474+ root.appendChild(element);
475+ }
476+
477+ /* ---------------------------------------------------------------------- */
478+
479+ private void addChild(Element cslist, String keyword, String data) {
480+ if(!data.isEmpty()) {
481+ Element element = document.createElement(keyword);
482+ element.appendChild(document.createTextNode(data));
483+ cslist.appendChild(element);
484+ }
485+ }
486+
487+ private void removeElement(String elementTagName) {
488+ int nodeSize;
489+ do {
490+ NodeList nodelist = document.getElementsByTagName(elementTagName);
491+ nodeSize = nodelist.getLength();
492+ for(int i = 0; i < nodelist.getLength(); i++) {
493+ Node node = nodelist.item(i);
494+ root.removeChild(node);
495+ }
496+ } while(nodeSize > 0);
497+ }
498+
499+ /**
500+ * ドキュメントチェック.
501+ * 新規の場合やXMLファイルの読込みが行われていない状態時、新たにルートエレメントを作成する。
502+ * 既読の場合、ルートエレメントの取得を行う。
503+ */
504+ public void checkdoc() {
505+ if(document == null) {
506+ DOMImplementation domImpl = builder.getDOMImplementation();
507+ document = domImpl.createDocument("","searchdata",null);
508+ }
509+ root = document.getDocumentElement();
510+ }
511+
512+ /**
513+ * XML読込み.
514+ * @param file
515+ */
516+ public void read(File file) {
517+ try {
518+ document = builder.parse(file);
519+ root = document.getDocumentElement();
520+
521+ } catch (SAXException | IOException ex) {
522+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
523+ }
524+ }
525+
526+ /**
527+ * XML書込み.
528+ * @param file
529+ */
530+ public void write(File file) {
531+ try {
532+ TransformerFactory transFactory = TransformerFactory.newInstance();
533+ Transformer transformer = transFactory.newTransformer();
534+
535+ DOMSource source = new DOMSource(document);
536+ FileOutputStream os = new FileOutputStream(file);
537+ StreamResult result = new StreamResult(os);
538+ transformer.transform(source, result);
539+
540+ } catch (TransformerConfigurationException ex) {
541+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
542+ } catch (FileNotFoundException | TransformerException ex) {
543+ Logger.getLogger(SearchDataRW.class.getName()).log(Level.SEVERE, null, ex);
544+ }
545+ }
546+
547+}
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/utility/HtmlSearch.java (nonexistent)
+++ trunk/HtmlTest2/src/WebScraping/utility/HtmlSearch.java (revision 115)
@@ -0,0 +1,566 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id$
21+ */
22+package webScraping.utility;
23+
24+import webScraping.core.HtmlParser;
25+import webScraping.core.SearchData;
26+import java.awt.Desktop;
27+import java.io.File;
28+import java.io.IOException;
29+import java.net.URI;
30+import java.net.URISyntaxException;
31+import java.util.logging.Level;
32+import java.util.logging.Logger;
33+import javax.swing.JFileChooser;
34+import javax.swing.filechooser.FileFilter;
35+import javax.swing.filechooser.FileNameExtensionFilter;
36+import javax.swing.table.DefaultTableModel;
37+
38+/**
39+ * HTMLページ上の特定の項目を検索し、その項目内容の値を取得する.
40+ * @author kgto
41+ */
42+public class HtmlSearch extends javax.swing.JFrame {
43+ private final SearchDataRW sio = new SearchDataRW();
44+
45+ SearchDataTableModel sdatatblmodel;
46+
47+ /**
48+ * Creates new form Frame1
49+ */
50+ public HtmlSearch() {
51+ sdatatblmodel = new SearchDataTableModel();
52+
53+ initComponents();
54+
55+ // カレントディレクトリ取得
56+ String dir = System.getProperty("user.dir");
57+ File file = new java.io.File(dir + "\\data");
58+ jFileChooser1.setCurrentDirectory(file);
59+
60+ FileFilter filter1 = new FileNameExtensionFilter("XMLファイル", "xml");
61+ FileFilter filter2 = new FileNameExtensionFilter("TEXTファイル", "txt");
62+ jFileChooser1.addChoosableFileFilter(filter1);
63+ jFileChooser1.addChoosableFileFilter(filter2);
64+ jFileChooser1.setFileFilter(filter1);
65+
66+ }
67+
68+ /**
69+ * This method is called from within the constructor to initialize the form.
70+ * WARNING: Do NOT modify this code. The content of this method is always
71+ * regenerated by the Form Editor.
72+ */
73+ @SuppressWarnings("unchecked")
74+ // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents
75+ private void initComponents() {
76+
77+ jFileChooser1 = new javax.swing.JFileChooser();
78+ jRadioButton1 = new javax.swing.JRadioButton();
79+ jLabel1 = new javax.swing.JLabel();
80+ jTxtUrl = new javax.swing.JTextField();
81+ jBtnSearch = new javax.swing.JButton();
82+ jTabbedPane1 = new javax.swing.JTabbedPane();
83+ jPanelTab1 = new javax.swing.JPanel();
84+ jScrollPane1 = new javax.swing.JScrollPane();
85+ jTable1 = new javax.swing.JTable();
86+ jBtnRowIns = new javax.swing.JButton();
87+ jBtnRowDel = new javax.swing.JButton();
88+ jBtnRowCpy = new javax.swing.JButton();
89+ jPanelTab2 = new javax.swing.JPanel();
90+ jScrollPaneLabel = new javax.swing.JScrollPane();
91+ jTxtLabel = new javax.swing.JTextArea();
92+ jScrollPane404msg = new javax.swing.JScrollPane();
93+ jTxt404msg = new javax.swing.JTextArea();
94+ jPanelRtn = new javax.swing.JPanel();
95+ jScrollPaneRtn = new javax.swing.JScrollPane();
96+ jTxtRtn = new javax.swing.JTextArea();
97+ jMenuBar1 = new javax.swing.JMenuBar();
98+ jMenu1 = new javax.swing.JMenu();
99+ jMenuLoad = new javax.swing.JMenuItem();
100+ jMenuSave = new javax.swing.JMenuItem();
101+ jMenu3 = new javax.swing.JMenu();
102+ jMenuItem1 = new javax.swing.JMenuItem();
103+ jMenu2 = new javax.swing.JMenu();
104+
105+ jFileChooser1.setCurrentDirectory(null);
106+ jFileChooser1.setDialogTitle("");
107+
108+ jRadioButton1.setText("jRadioButton1");
109+
110+ setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
111+ setTitle("タグ検索");
112+
113+ jLabel1.setText(" URL:");
114+
115+ jBtnSearch.setText("検索");
116+ jBtnSearch.addActionListener(new java.awt.event.ActionListener() {
117+ public void actionPerformed(java.awt.event.ActionEvent evt) {
118+ jBtnSearchActionPerformed(evt);
119+ }
120+ });
121+
122+ jPanelTab1.setBorder(javax.swing.BorderFactory.createTitledBorder("検索情報"));
123+
124+ jTable1.setModel(sdatatblmodel);
125+ jTable1.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION);
126+ jTable1.getTableHeader().setReorderingAllowed(false);
127+ jScrollPane1.setViewportView(jTable1);
128+
129+ jBtnRowIns.setText("行挿入");
130+ jBtnRowIns.addActionListener(new java.awt.event.ActionListener() {
131+ public void actionPerformed(java.awt.event.ActionEvent evt) {
132+ jBtnRowInsActionPerformed(evt);
133+ }
134+ });
135+
136+ jBtnRowDel.setText("行削除");
137+ jBtnRowDel.addActionListener(new java.awt.event.ActionListener() {
138+ public void actionPerformed(java.awt.event.ActionEvent evt) {
139+ jBtnRowDelActionPerformed(evt);
140+ }
141+ });
142+
143+ jBtnRowCpy.setText("行コピー");
144+ jBtnRowCpy.addActionListener(new java.awt.event.ActionListener() {
145+ public void actionPerformed(java.awt.event.ActionEvent evt) {
146+ jBtnRowCpyActionPerformed(evt);
147+ }
148+ });
149+
150+ javax.swing.GroupLayout jPanelTab1Layout = new javax.swing.GroupLayout(jPanelTab1);
151+ jPanelTab1.setLayout(jPanelTab1Layout);
152+ jPanelTab1Layout.setHorizontalGroup(
153+ jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
154+ .addGroup(jPanelTab1Layout.createSequentialGroup()
155+ .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
156+ .addComponent(jBtnRowCpy)
157+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
158+ .addComponent(jBtnRowDel)
159+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
160+ .addComponent(jBtnRowIns))
161+ .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 0, Short.MAX_VALUE)
162+ );
163+ jPanelTab1Layout.setVerticalGroup(
164+ jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
165+ .addGroup(jPanelTab1Layout.createSequentialGroup()
166+ .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 173, Short.MAX_VALUE)
167+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
168+ .addGroup(jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
169+ .addComponent(jBtnRowDel)
170+ .addComponent(jBtnRowIns)
171+ .addComponent(jBtnRowCpy)))
172+ );
173+
174+ jTabbedPane1.addTab("キー設定", jPanelTab1);
175+
176+ jPanelTab2.setBorder(javax.swing.BorderFactory.createTitledBorder("メッセージ"));
177+
178+ jScrollPaneLabel.setHorizontalScrollBarPolicy(javax.swing.ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER);
179+ jScrollPaneLabel.setVerticalScrollBarPolicy(javax.swing.ScrollPaneConstants.VERTICAL_SCROLLBAR_NEVER);
180+
181+ jTxtLabel.setEditable(false);
182+ jTxtLabel.setBackground(java.awt.Color.lightGray);
183+ jTxtLabel.setColumns(20);
184+ jTxtLabel.setFont(new java.awt.Font("MS UI Gothic", 0, 12)); // NOI18N
185+ jTxtLabel.setLineWrap(true);
186+ jTxtLabel.setRows(2);
187+ jTxtLabel.setText("取得ページに以下のメッセージが含まれていた場合、対象データが取得出来なかったと通知します。");
188+ jTxtLabel.setAutoscrolls(false);
189+ jTxtLabel.setBorder(null);
190+ jTxtLabel.setCursor(new java.awt.Cursor(java.awt.Cursor.DEFAULT_CURSOR));
191+ jTxtLabel.setFocusable(false);
192+ jTxtLabel.setHighlighter(null);
193+ jTxtLabel.setKeymap(null);
194+ jTxtLabel.setOpaque(false);
195+ jTxtLabel.setRequestFocusEnabled(false);
196+ jTxtLabel.setVerifyInputWhenFocusTarget(false);
197+ jScrollPaneLabel.setViewportView(jTxtLabel);
198+
199+ jTxt404msg.setColumns(20);
200+ jTxt404msg.setRows(3);
201+ jTxt404msg.setText("一致する銘柄は見つかりませんでした\n");
202+ jScrollPane404msg.setViewportView(jTxt404msg);
203+
204+ javax.swing.GroupLayout jPanelTab2Layout = new javax.swing.GroupLayout(jPanelTab2);
205+ jPanelTab2.setLayout(jPanelTab2Layout);
206+ jPanelTab2Layout.setHorizontalGroup(
207+ jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
208+ .addComponent(jScrollPane404msg)
209+ .addGroup(javax.swing.GroupLayout.Alignment.TRAILING, jPanelTab2Layout.createSequentialGroup()
210+ .addContainerGap()
211+ .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.DEFAULT_SIZE, 359, Short.MAX_VALUE)
212+ .addContainerGap())
213+ );
214+ jPanelTab2Layout.setVerticalGroup(
215+ jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
216+ .addGroup(jPanelTab2Layout.createSequentialGroup()
217+ .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.PREFERRED_SIZE, 38, javax.swing.GroupLayout.PREFERRED_SIZE)
218+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
219+ .addComponent(jScrollPane404msg))
220+ );
221+
222+ jTabbedPane1.addTab("結果無し判定", jPanelTab2);
223+
224+ jPanelRtn.setBorder(javax.swing.BorderFactory.createTitledBorder("検索結果"));
225+
226+ jTxtRtn.setColumns(20);
227+ jTxtRtn.setRows(5);
228+ jScrollPaneRtn.setViewportView(jTxtRtn);
229+
230+ javax.swing.GroupLayout jPanelRtnLayout = new javax.swing.GroupLayout(jPanelRtn);
231+ jPanelRtn.setLayout(jPanelRtnLayout);
232+ jPanelRtnLayout.setHorizontalGroup(
233+ jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
234+ .addComponent(jScrollPaneRtn)
235+ );
236+ jPanelRtnLayout.setVerticalGroup(
237+ jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
238+ .addComponent(jScrollPaneRtn, javax.swing.GroupLayout.DEFAULT_SIZE, 163, Short.MAX_VALUE)
239+ );
240+
241+ jMenu1.setText("ファイル");
242+
243+ jMenuLoad.setText("LOAD");
244+ jMenuLoad.addActionListener(new java.awt.event.ActionListener() {
245+ public void actionPerformed(java.awt.event.ActionEvent evt) {
246+ jMenuLoadActionPerformed(evt);
247+ }
248+ });
249+ jMenu1.add(jMenuLoad);
250+
251+ jMenuSave.setText("SAVE");
252+ jMenuSave.addActionListener(new java.awt.event.ActionListener() {
253+ public void actionPerformed(java.awt.event.ActionEvent evt) {
254+ jMenuSaveActionPerformed(evt);
255+ }
256+ });
257+ jMenu1.add(jMenuSave);
258+
259+ jMenuBar1.add(jMenu1);
260+
261+ jMenu3.setText("ツール");
262+
263+ jMenuItem1.setText("ブラウザで表示");
264+ jMenuItem1.addActionListener(new java.awt.event.ActionListener() {
265+ public void actionPerformed(java.awt.event.ActionEvent evt) {
266+ jMenuItem1ActionPerformed(evt);
267+ }
268+ });
269+ jMenu3.add(jMenuItem1);
270+
271+ jMenuBar1.add(jMenu3);
272+
273+ jMenu2.setText("検索");
274+ jMenu2.addMouseListener(new java.awt.event.MouseAdapter() {
275+ public void mouseClicked(java.awt.event.MouseEvent evt) {
276+ jMenu2MouseClicked(evt);
277+ }
278+ });
279+ jMenuBar1.add(jMenu2);
280+
281+ setJMenuBar(jMenuBar1);
282+
283+ javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane());
284+ getContentPane().setLayout(layout);
285+ layout.setHorizontalGroup(
286+ layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
287+ .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
288+ .addGroup(layout.createSequentialGroup()
289+ .addComponent(jLabel1)
290+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
291+ .addComponent(jTxtUrl)
292+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
293+ .addComponent(jBtnSearch))
294+ .addComponent(jTabbedPane1)
295+ );
296+ layout.setVerticalGroup(
297+ layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
298+ .addGroup(layout.createSequentialGroup()
299+ .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
300+ .addComponent(jLabel1)
301+ .addComponent(jTxtUrl, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
302+ .addComponent(jBtnSearch))
303+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
304+ .addComponent(jTabbedPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 250, javax.swing.GroupLayout.PREFERRED_SIZE)
305+ .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
306+ .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
307+ .addContainerGap())
308+ );
309+
310+ pack();
311+ }// </editor-fold>//GEN-END:initComponents
312+
313+ private void jBtnRowInsActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowInsActionPerformed
314+ int SelectedRow = jTable1.getSelectedRow();
315+ SearchData sdata = new SearchData();
316+ if(SelectedRow >= 0) {
317+ sdatatblmodel.insertRow(SelectedRow, sdata);
318+ } else {
319+ sdatatblmodel.addRow(sdata);
320+ }
321+ }//GEN-LAST:event_jBtnRowInsActionPerformed
322+
323+ private void jBtnRowDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowDelActionPerformed
324+ int SelectedRow = jTable1.getSelectedRow();
325+ if(!(SelectedRow < 0)) {
326+ sdatatblmodel.removeRow(SelectedRow);
327+ }
328+ }//GEN-LAST:event_jBtnRowDelActionPerformed
329+
330+ private void jMenuLoadActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuLoadActionPerformed
331+ jFileChooser1.setDialogTitle("読込");
332+ int selected = jFileChooser1.showOpenDialog(this);
333+ if (selected == JFileChooser.APPROVE_OPTION) {
334+ File file = jFileChooser1.getSelectedFile();
335+ sio.load(file);
336+ jTxtUrl.setText(sio.geturl());
337+ sdatatblmodel.setRowCount(0);
338+ for(int i = 0; i < SearchData.size(); i++) {
339+ SearchData sdata = SearchData.get(i);
340+ sdatatblmodel.addRow(sdata);
341+ }
342+ }
343+ }//GEN-LAST:event_jMenuLoadActionPerformed
344+
345+ private void jMenuSaveActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuSaveActionPerformed
346+ jFileChooser1.setDialogTitle("保存");
347+ int selected = jFileChooser1.showSaveDialog(this);
348+ if (selected == JFileChooser.APPROVE_OPTION) {
349+ File file = jFileChooser1.getSelectedFile();
350+ sio.seturl(jTxtUrl.getText());
351+
352+ SearchData.clear();
353+ for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
354+ SearchData sdata = sdatatblmodel.getSearchData(row);
355+ SearchData.add(sdata);
356+ }
357+ sio.save(file);
358+ }
359+ }//GEN-LAST:event_jMenuSaveActionPerformed
360+
361+ private void jBtnRowCpyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowCpyActionPerformed
362+ int SelectedRow = jTable1.getSelectedRow();
363+ if(SelectedRow >= 0) {
364+ SearchData sdata = sdatatblmodel.getSearchData(SelectedRow);
365+ sdatatblmodel.insertRow(SelectedRow, sdata);
366+ }
367+ }//GEN-LAST:event_jBtnRowCpyActionPerformed
368+
369+ private void jMenuItem1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuItem1ActionPerformed
370+ Desktop desktop = Desktop.getDesktop();
371+ String uriString = jTxtUrl.getText();
372+ try {
373+ URI uri = new URI(uriString);
374+ desktop.browse(uri);
375+
376+ } catch (URISyntaxException | IOException ex) {
377+ Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex);
378+ }
379+ }//GEN-LAST:event_jMenuItem1ActionPerformed
380+
381+ private void jMenu2MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jMenu2MouseClicked
382+ Search_execution();
383+ }//GEN-LAST:event_jMenu2MouseClicked
384+
385+ private void jBtnSearchActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnSearchActionPerformed
386+ Search_execution();
387+ }//GEN-LAST:event_jBtnSearchActionPerformed
388+
389+ /**
390+ * 検索実行.
391+ */
392+ void Search_execution() {
393+ jTxtRtn.setText(null);
394+ HtmlParser par = new HtmlParser(jTxtUrl.getText());
395+
396+ // データ無し(404)判定
397+ String strdata = par.getStringPageData();
398+ if(strdata == null) {
399+ jTxtRtn.append("読込みページがありません");
400+ return;
401+ }
402+ String text = jTxt404msg.getText();
403+ String[] strsearch = text.split("\n");
404+ for(String strsearch1 : strsearch) {
405+ if(strdata.contains(strsearch1)) {
406+ jTxtRtn.append(strsearch1);
407+ return;
408+ }
409+ }
410+
411+ // 検索結果
412+ for(int row = 0; row < sdatatblmodel.getRowCount(); row++) {
413+ SearchData sdata = sdatatblmodel.getSearchData(row);
414+ String ans = sdata.getitem();
415+ String rtn = par.search(sdata);
416+ jTxtRtn.append(ans + "\t" + rtn + "\n");
417+ }
418+
419+ jTxtRtn.setCaretPosition(0);
420+ }
421+
422+ /**
423+ * @param args the command line arguments
424+ */
425+ public static void main(String args[]) {
426+ /* Set the Nimbus look and feel */
427+ //<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) ">
428+ /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel.
429+ * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html
430+ */
431+ try {
432+ for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) {
433+ if ("Nimbus".equals(info.getName())) {
434+ javax.swing.UIManager.setLookAndFeel(info.getClassName());
435+ break;
436+ }
437+ }
438+ } catch (ClassNotFoundException
439+ | InstantiationException
440+ | IllegalAccessException
441+ | javax.swing.UnsupportedLookAndFeelException ex) {
442+ java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
443+ }
444+ //</editor-fold>
445+
446+ /* Create and display the form */
447+ java.awt.EventQueue.invokeLater(new Runnable() {
448+ @Override
449+ public void run() {
450+ new HtmlSearch().setVisible(true);
451+ }
452+ });
453+ }
454+
455+ // Variables declaration - do not modify//GEN-BEGIN:variables
456+ private javax.swing.JButton jBtnRowCpy;
457+ private javax.swing.JButton jBtnRowDel;
458+ private javax.swing.JButton jBtnRowIns;
459+ private javax.swing.JButton jBtnSearch;
460+ private javax.swing.JFileChooser jFileChooser1;
461+ private javax.swing.JLabel jLabel1;
462+ private javax.swing.JMenu jMenu1;
463+ private javax.swing.JMenu jMenu2;
464+ private javax.swing.JMenu jMenu3;
465+ private javax.swing.JMenuBar jMenuBar1;
466+ private javax.swing.JMenuItem jMenuItem1;
467+ private javax.swing.JMenuItem jMenuLoad;
468+ private javax.swing.JMenuItem jMenuSave;
469+ private javax.swing.JPanel jPanelRtn;
470+ private javax.swing.JPanel jPanelTab1;
471+ private javax.swing.JPanel jPanelTab2;
472+ private javax.swing.JRadioButton jRadioButton1;
473+ private javax.swing.JScrollPane jScrollPane1;
474+ private javax.swing.JScrollPane jScrollPane404msg;
475+ private javax.swing.JScrollPane jScrollPaneLabel;
476+ private javax.swing.JScrollPane jScrollPaneRtn;
477+ private javax.swing.JTabbedPane jTabbedPane1;
478+ private javax.swing.JTable jTable1;
479+ private javax.swing.JTextArea jTxt404msg;
480+ private javax.swing.JTextArea jTxtLabel;
481+ private javax.swing.JTextArea jTxtRtn;
482+ private javax.swing.JTextField jTxtUrl;
483+ // End of variables declaration//GEN-END:variables
484+}
485+
486+class SearchDataTableModel extends DefaultTableModel {
487+ /* ---------------------------------------------------------------------- *
488+ * データ属性
489+ * ---------------------------------------------------------------------- */
490+ public String[] columnName = {
491+ /* 0 */ "項目名",
492+ /* 1 */ "タグ",
493+ /* 2 */ "ID",
494+ /* 3 */ "クラス",
495+ /* 4 */ "位置",
496+ /* 5 */ "抽出条件"
497+ };
498+
499+ public Class[] columnClass = {
500+ /* 0 */ String.class,
501+ /* 1 */ String.class,
502+ /* 2 */ String.class,
503+ /* 3 */ String.class,
504+ /* 4 */ String.class,
505+ /* 5 */ String.class
506+ };
507+
508+ int column_item = 0;
509+ int column_htmltag = 1;
510+ int column_htmlid = 2;
511+ int column_htmlclass = 3;
512+ int column_around = 4;
513+ int column_regexp = 5;
514+
515+ /* ---------------------------------------------------------------------- *
516+ * 処理
517+ * ---------------------------------------------------------------------- */
518+ @Override
519+ public String getColumnName(int modelIndex) {
520+ return columnName[modelIndex];
521+ }
522+
523+ @Override
524+ public Class<?> getColumnClass(int modelIndex) {
525+ return columnClass[modelIndex];
526+ }
527+
528+ @Override
529+ public int getColumnCount() {
530+ return columnName.length;
531+ }
532+
533+ /* ---------------------------------------------------------------------- */
534+
535+ public SearchData getSearchData(int row) {
536+ SearchData sdata = new SearchData();
537+ sdata.setitem(String.valueOf(getValueAt(row, column_item)));
538+ sdata.setHtmltag(String.valueOf(getValueAt(row, column_htmltag)));
539+ sdata.setHtmlid(String.valueOf(getValueAt(row, column_htmlid)));
540+ sdata.setHtmlclass(String.valueOf(getValueAt(row, column_htmlclass)));
541+ sdata.setaround(String.valueOf(getValueAt(row, column_around)));
542+ sdata.setregexp(String.valueOf(getValueAt(row, column_regexp)));
543+ return sdata;
544+ }
545+
546+ public void addRow(SearchData sdata) {
547+ addRow(getObjdata(sdata));
548+ }
549+
550+ public void insertRow(int row, SearchData sdata) {
551+ insertRow(row, getObjdata(sdata));
552+ }
553+
554+ private Object[] getObjdata(SearchData sdata) {
555+ Object[] obj = new Object[] {
556+ sdata.getitem(),
557+ sdata.getHtmltag(),
558+ sdata.getHtmlid(),
559+ sdata.getHtmlclass(),
560+ sdata.getaround(),
561+ sdata.getregexp()
562+ };
563+ return obj;
564+ }
565+
566+}
\ No newline at end of file
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/core/SearchData.java (nonexistent)
+++ trunk/HtmlTest2/src/WebScraping/core/SearchData.java (revision 115)
@@ -0,0 +1,166 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id$
21+ */
22+
23+package webScraping.core;
24+
25+import java.util.ArrayList;
26+
27+/**
28+ * タグ検索データ.
29+ * @author kgto
30+ */
31+public class SearchData {
32+ /* ---------------------------------------------------------------------- *
33+ * フィールド
34+ * ---------------------------------------------------------------------- */
35+ private String item;
36+ private String htmltag;
37+ private String htmlid;
38+ private String htmlclass;
39+ private String around;
40+ private String regexp;
41+
42+ /* ---------------------------------------------------------------------- *
43+ * static 処理
44+ * ---------------------------------------------------------------------- */
45+ private static ArrayList<SearchData> slist = new ArrayList<>();
46+
47+ public static void addSearchData(
48+ String item, String htmltag, String htmlid,
49+ String htmlclass, String around, String regexp) {
50+ SearchData sdat = new SearchData();
51+ sdat.setitem(item);
52+ sdat.setHtmltag(htmltag);
53+ sdat.setHtmlid(htmlid);
54+ sdat.setHtmlclass(htmlclass);
55+ sdat.setaround(around);
56+ sdat.setregexp(regexp);
57+
58+ slist.add(sdat);
59+ }
60+
61+ public static void add(SearchData sdat) {
62+ slist.add(sdat);
63+ }
64+
65+ public static SearchData get(int i) {
66+ return slist.get(i);
67+ }
68+
69+ public static int size() {
70+ return slist.size();
71+ }
72+
73+ public static SearchData remove(int index) {
74+ return slist.remove(index);
75+ }
76+
77+ public static void clear() {
78+ slist.clear();
79+ }
80+
81+ /* ---------------------------------------------------------------------- *
82+ * コンストラクタ
83+ * ---------------------------------------------------------------------- */
84+ public SearchData() {
85+ initialize();
86+ }
87+
88+ public SearchData(SearchData dat) {
89+ this.item = dat.getitem();
90+ this.htmltag = dat.getHtmltag();
91+ this.htmlid = dat.getHtmlid();
92+ this.htmlclass = dat.getHtmlclass();
93+ this.around = dat.getaround();
94+ this.regexp = dat.getregexp();
95+ }
96+
97+ /* ---------------------------------------------------------------------- *
98+ * Setter
99+ * ---------------------------------------------------------------------- */
100+ public void setitem(String item) {
101+ this.item = item;
102+ }
103+
104+ public void setHtmltag(String htmltag) {
105+ this.htmltag = htmltag;
106+ }
107+
108+ public void setHtmlid(String htmlid) {
109+ this.htmlid = htmlid;
110+ }
111+
112+ public void setHtmlclass(String htmlclass) {
113+ this.htmlclass = htmlclass;
114+ }
115+
116+ public void setaround(String around) {
117+ this.around = around;
118+ }
119+
120+ public void setregexp(String regexp) {
121+ this.regexp = regexp;
122+ }
123+
124+ /* ---------------------------------------------------------------------- *
125+ * Getter
126+ * ---------------------------------------------------------------------- */
127+ public String getitem() {
128+ return item;
129+ }
130+
131+ public String getHtmltag() {
132+ return htmltag;
133+ }
134+
135+ public String getHtmlid() {
136+ return htmlid;
137+ }
138+
139+ public String getHtmlclass() {
140+ return htmlclass;
141+ }
142+
143+ public String getaround() {
144+ return around;
145+ }
146+
147+ public String getregexp() {
148+ return regexp;
149+ }
150+
151+ /* ---------------------------------------------------------------------- *
152+ * メソッド
153+ * ---------------------------------------------------------------------- */
154+ /**
155+ * データ初期化.
156+ */
157+ public final void initialize() {
158+ this.item = "";
159+ this.htmltag = "";
160+ this.htmlid = "";
161+ this.htmlclass = "";
162+ this.around = "";
163+ this.regexp = "";
164+ }
165+
166+}
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/core/HtmlParser.java (nonexistent)
+++ trunk/HtmlTest2/src/WebScraping/core/HtmlParser.java (revision 115)
@@ -0,0 +1,259 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id$
21+ */
22+
23+package webScraping.core;
24+
25+import java.io.*;
26+import java.net.*;
27+import java.util.ArrayList;
28+import java.util.logging.Level;
29+import java.util.logging.Logger;
30+import java.util.regex.Matcher;
31+import java.util.regex.Pattern;
32+import javax.swing.text.html.parser.ParserDelegator;
33+
34+/**
35+ *
36+ * @author kgto
37+ */
38+public class HtmlParser {
39+
40+ URL url;
41+ String pageData;
42+ ArrayList sData;
43+
44+ // 作業ワーク
45+ String htmltag;
46+ String htmlid;
47+ String htmlclass;
48+
49+ public HtmlParser(URL UrlAdress) {
50+ DebugProcess.debuglog_set();
51+ this.url = UrlAdress;
52+ getPageData();
53+ }
54+
55+ public HtmlParser(String UrlAdress) {
56+ DebugProcess.debuglog_set();
57+ try {
58+ url = new URL(UrlAdress);
59+ getPageData();
60+
61+ } catch (MalformedURLException ex) {
62+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
63+ }
64+ }
65+
66+ public HtmlParser() {
67+ DebugProcess.debuglog_set();
68+ url = null;
69+ }
70+
71+ public String getStringPageData() {
72+ return pageData;
73+ }
74+
75+ public void seturl(URL UrlAdress) {
76+ this.url = UrlAdress;
77+ getPageData();
78+ }
79+
80+ public void seturl(String UrlAdress) {
81+ try {
82+ url = new URL(UrlAdress);
83+ getPageData();
84+
85+ } catch (MalformedURLException ex) {
86+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
87+ }
88+ }
89+
90+ /**
91+ * HTMLページ内検索.
92+ * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、
93+ * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を
94+ * 行った結果を返す。<br>
95+ * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br>
96+ * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br>
97+ * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。
98+ * @param skey 検索キーデータ(SearchData)
99+ * @return String 検索キーに一致するデータの文字列
100+ */
101+ public String search(SearchData skey) {
102+
103+ // htmlページ内を検索
104+ if(isHtmlkeyEq(skey) == false) {
105+ searchPageData(skey);
106+ }
107+ /*
108+ around 出現位置指定 入力有り:指定された位置の情報のみ返す。
109+ 入力無し:取得した全ての情報を返す。
110+ */
111+ String regexp = skey.getregexp();
112+ if(skey.getaround().length() > 0) {
113+ int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換
114+ if(wkAround < sData.size()) {
115+ String str = (String)sData.get(wkAround);
116+ String rtn = RegularExpression(str, regexp);
117+ return rtn;
118+ }
119+ } else {
120+ StringBuilder strbuf = new StringBuilder();
121+ for (Object sData1 : sData) {
122+ String str = (String)sData1;
123+ String rtn = RegularExpression(str, regexp);
124+ if(strbuf.length() > 0) {
125+ strbuf.append("\t");
126+ }
127+ strbuf.append(rtn);
128+ }
129+ return strbuf.toString();
130+ }
131+ return null;
132+ }
133+
134+ /**
135+ * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する.
136+ * @param skey HTMLタグ/ID/CLASSが格納された検索キー
137+ * @return boolean HTMLタグ/ID/CLASS値が一致する時、true
138+ */
139+ boolean isHtmlkeyEq(SearchData skey) {
140+
141+ String stag = skey.getHtmltag();
142+ String sid = skey.getHtmlid();
143+ String sclass = skey.getHtmlclass();
144+
145+ boolean rtn = true;
146+
147+ // htmltag
148+ if(htmltag == null) {
149+ rtn = false;
150+ } else {
151+ if(htmltag.equals(stag) == false) {
152+ rtn = false;
153+ }
154+ }
155+
156+ // htmlid
157+ if(htmlid == null) {
158+ rtn = false;
159+ } else {
160+ if(htmlid.equals(sid) == false) {
161+ rtn = false;
162+ }
163+ }
164+
165+ // htmlclass
166+ if(htmlclass == null) {
167+ rtn = false;
168+ } else {
169+ if(htmlclass.equals(sclass) == false) {
170+ rtn = false;
171+ }
172+ }
173+
174+ if(!rtn) {
175+ htmltag = stag;
176+ htmlid = sid;
177+ htmlclass = sclass;
178+ }
179+
180+ return rtn;
181+ }
182+
183+ /**
184+ * 正規表現検索.
185+ * @param strdata
186+ * @param regexp
187+ * @return
188+ */
189+ String RegularExpression(String strdata, String regexp) {
190+ String expdata = null;
191+
192+ //regexpのチェック
193+ if(regexp.isEmpty()) {
194+ expdata = strdata;
195+ return expdata;
196+ }
197+
198+ //正規表現検索
199+ Pattern ptn = Pattern.compile(regexp);
200+ Matcher matchdata = ptn.matcher(strdata);
201+ if (matchdata.find()) {
202+ if(matchdata.groupCount() >= 1) {
203+ expdata = matchdata.group(1);
204+ }
205+ }
206+ return expdata;
207+ }
208+
209+ /**
210+ * インターネット接続.
211+ */
212+ private void getPageData() {
213+ HttpURLConnection con = null;
214+ try {
215+ con = (HttpURLConnection)url.openConnection();
216+ con.setRequestMethod("GET");
217+ BufferedReader reader = new BufferedReader(
218+ new InputStreamReader(con.getInputStream(), "utf-8"));
219+ String wkline;
220+ StringBuilder sb = new StringBuilder();
221+ while((wkline = reader.readLine()) != null) {
222+ sb.append(wkline).append("\n");
223+ }
224+ pageData = sb.toString();
225+
226+ } catch(FileNotFoundException ex) {
227+ pageData = null;
228+ } catch (IOException ex) {
229+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
230+ } finally {
231+ if(con != null) {
232+ con.disconnect();
233+ }
234+ }
235+ }
236+
237+ /**
238+ * HTMLパーサ.
239+ * @param skey
240+ */
241+ private void searchPageData(SearchData skey) {
242+
243+ DebugProcess.searchDatainfo(skey);
244+
245+ Reader reader;
246+ try {
247+ reader = new BufferedReader(new StringReader(pageData));
248+ HtmlParserCallback cb = new HtmlParserCallback(skey);
249+ ParserDelegator pd = new ParserDelegator();
250+ pd.parse(reader, cb, true);
251+ reader.close();
252+
253+ sData = cb.getrtnData();
254+
255+ } catch (IOException ex) {
256+ Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex);
257+ }
258+ }
259+}
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/core/AttributeData.java (nonexistent)
+++ trunk/HtmlTest2/src/WebScraping/core/AttributeData.java (revision 115)
@@ -0,0 +1,163 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id$
21+ */
22+
23+package webScraping.core;
24+
25+import java.util.ArrayList;
26+import java.util.Enumeration;
27+import javax.swing.text.MutableAttributeSet;
28+import javax.swing.text.html.HTML;
29+
30+/**
31+ * HTMLタグの属性情報を保持する.
32+ * @author kgto
33+ */
34+public class AttributeData {
35+
36+ public AttributeData() {
37+ AttrList = new ArrayList();
38+ size = 0;
39+ }
40+
41+ /**
42+ * 属性情報追加.
43+ * @param tag
44+ * @param attr
45+ */
46+ public void add(HTML.Tag tag, MutableAttributeSet attr) {
47+
48+ int tagcount = tagcnt(tag);
49+ ++tagcount;
50+
51+ Enumeration e = attr.getAttributeNames();
52+ while(e.hasMoreElements()) {
53+ Object obj = e.nextElement();
54+
55+ AttrData a = new AttrData();
56+ a.tag = tag;
57+ a.count = tagcount;
58+ a.attrname = obj.toString();
59+ a.attrvalue = attr.getAttribute(obj).toString();
60+
61+ AttrList.add(a);
62+ size = AttrList.size();
63+ }
64+
65+ }
66+
67+ /**
68+ * 属性情報検索.
69+ * @param tag
70+ * @param attrname
71+ * @param attrvalue
72+ * @return
73+ */
74+ public boolean search(HTML.Tag tag, String attrname, String attrvalue) {
75+ boolean ret = false;
76+ for (Object AttrList1 : AttrList) {
77+ AttrData a = (AttrData)AttrList1;
78+ if(a.tag == tag) {
79+ if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) {
80+ ret = true;
81+ }
82+ }
83+ }
84+ return ret;
85+ }
86+
87+ public boolean searchId(HTML.Tag tag, String attrvalue) {
88+ return search(tag, "id", attrvalue);
89+ }
90+
91+ public boolean searchClass(HTML.Tag tag, String attrvalue) {
92+ return search(tag, "class", attrvalue);
93+ }
94+
95+ /**
96+ * 属性の値を取得する.
97+ * @param tag
98+ * @param attrname
99+ * @return
100+ */
101+ public ArrayList getvale(HTML.Tag tag, String attrname) {
102+ ArrayList ret = new ArrayList();
103+ for (Object AttrList1 : AttrList) {
104+ AttrData a = (AttrData)AttrList1;
105+ if(a.tag == tag) {
106+ if(a.attrname.equals(attrname)) {
107+ ret.add(a.attrvalue);
108+ }
109+ }
110+ }
111+ return ret;
112+ }
113+
114+ /**
115+ * 引数で渡されたTAGの最新カウント数を返す.
116+ * @param tag
117+ * @return
118+ */
119+ private int tagcnt(HTML.Tag tag) {
120+ int wkcnt = 0;
121+ for (Object AttrList1 : AttrList) {
122+ AttrData a = (AttrData)AttrList1;
123+ if(a.tag == tag) {
124+ if(wkcnt < a.count) {
125+ wkcnt = a.count;
126+ }
127+ }
128+ }
129+ return wkcnt;
130+ }
131+
132+ // AttrList の内容を返すメソッド
133+ public HTML.Tag gettag(int i) {
134+ AttrData a = (AttrData)AttrList.get(i);
135+ return a.tag;
136+ }
137+
138+ public int getcount(int i) {
139+ AttrData a = (AttrData)AttrList.get(i);
140+ return a.count;
141+ }
142+
143+ public String getattrname(int i) {
144+ AttrData a = (AttrData)AttrList.get(i);
145+ return a.attrname;
146+ }
147+
148+ public String getattrvalue(int i) {
149+ AttrData a = (AttrData)AttrList.get(i);
150+ return a.attrvalue;
151+ }
152+
153+ // フィールド変数
154+ public class AttrData {
155+ public HTML.Tag tag;
156+ public int count;
157+ public String attrname;
158+ public String attrvalue;
159+ }
160+ public ArrayList AttrList;
161+ public int size; // AttrListのサイズ
162+
163+}
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/core/HtmlParserCallback.java (nonexistent)
+++ trunk/HtmlTest2/src/WebScraping/core/HtmlParserCallback.java (revision 115)
@@ -0,0 +1,211 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id$
21+ */
22+
23+package webScraping.core;
24+
25+import java.util.ArrayList;
26+import java.util.HashMap;
27+import javax.swing.text.MutableAttributeSet;
28+import javax.swing.text.html.HTML;
29+import javax.swing.text.html.HTMLEditorKit;
30+
31+/**
32+ * HTMLパーサ部品.
33+ * @author kgto
34+ */
35+class HtmlParserCallback extends HTMLEditorKit.ParserCallback {
36+
37+ // Tag毎の階層
38+ HashMap<HTML.Tag,Integer> tagMap = new HashMap<>();
39+
40+ // serach key 情報
41+ String keytag;
42+ String keyid;
43+ String keyclass;
44+
45+ // serach key と一致時の情報退避
46+ int bufCount = 0;
47+ HTML.Tag bufTag = null;
48+ // serach key と一致時の情報格納ワーク
49+ StringBuilder bufText;
50+
51+ // serach key と一致時のデータ一覧
52+ ArrayList sData;
53+
54+ // 属性データ
55+ AttributeData attrdata;
56+
57+ protected HtmlParserCallback(SearchData skey) {
58+
59+ // キー情報展開
60+ keytag = skey.getHtmltag();
61+ keyid = skey.getHtmlid();
62+ keyclass = skey.getHtmlclass();
63+
64+ sData = new ArrayList();
65+ }
66+
67+ ArrayList getrtnData() {
68+ return this.sData;
69+ }
70+
71+ @Override
72+ public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
73+ // Tag毎の階層を保持
74+ int count = 1;
75+ if(tagMap.containsKey(tag)) {
76+ count = tagMap.get(tag);
77+ count++;
78+ }
79+ tagMap.put(tag, count);
80+
81+ // 属性解析
82+ AttributeData handleStartattrdata = new AttributeData();
83+ handleStartattrdata.add(tag, attr);
84+
85+ DebugProcess.htmlinfo(tag, attr, "handleStartTag", count);
86+
87+ if(bufCount == 0) {
88+ if(tag.toString().equals(keytag)) {
89+ //if(serachAttribute(attr)) {
90+ if(serachAttribute(tag, handleStartattrdata)) {
91+ bufCount = count;
92+ bufTag = tag;
93+ attrdata = new AttributeData();
94+ bufText = new StringBuilder();
95+ }
96+ }
97+ }
98+ if(bufCount > 0) {
99+ attrdata.add(tag, attr);
100+ }
101+ }
102+
103+ @Override
104+ public void handleEndTag(HTML.Tag tag, int pos){
105+ // Tag毎の階層を取得
106+ int count = 0;
107+ if(tagMap.containsKey(tag)) {
108+ count = tagMap.get(tag);
109+ }
110+
111+ DebugProcess.htmlinfo(tag, null, "handleEndTag", count);
112+
113+ if(tag.equals(bufTag) && count <= bufCount) {
114+
115+ // 溜め込んだ一致情報をリストへ格納
116+ sData.add(bufText.toString());
117+
118+ // 退避したserach keyとの一致情報クリア
119+ bufCount = 0;
120+ bufTag = null;
121+ bufText = null;
122+ }
123+
124+ // Tag毎の階層減算
125+ tagMap.put(tag, --count);
126+ }
127+
128+ @Override
129+ public void handleText(char[] data, int pos){
130+
131+ DebugProcess.htmlinfo(data, "handleText");
132+
133+ String splitchar = "\t";
134+ //制御文字の削除
135+ // &nbsp; 0xa0
136+ StringBuilder buf = new StringBuilder();
137+ for(int i = 0; i < data.length; i++) {
138+ if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) {
139+ buf.append(data[i]);
140+ }
141+ }
142+ if(bufCount > 0) {
143+ if(bufText.length() > 0) {
144+ bufText.append(splitchar);
145+ }
146+ bufText.append(buf.toString());
147+ }
148+ }
149+
150+ @Override
151+ public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
152+ if(bufCount > 0) {
153+ attrdata.add(tag, attr);
154+ }
155+ DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0);
156+ }
157+
158+ /**
159+ * ページ内のID/CLASS値と検索キーを比較する.
160+ * @param attr ページのMutableAttributeSet
161+ * @return boolean 検索キーと一致の時、true
162+ */
163+ boolean serachAttribute(MutableAttributeSet attr) {
164+ String currentID = (String)attr.getAttribute(HTML.Attribute.ID);
165+ String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS);
166+
167+ if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
168+ if(keyid.equals(currentID) && keyclass.equals(currentClass)) {
169+ return true;
170+ }
171+ }
172+
173+ if(keyid.isEmpty() == false) {
174+ if(keyid.equals(currentID)) {
175+ return true;
176+ }
177+ }
178+
179+ if(keyclass.isEmpty() == false) {
180+ if(keyclass.equals(currentClass)) {
181+ return true;
182+ }
183+ }
184+
185+ return false;
186+ }
187+
188+ /**
189+ * ページ内のID/CLASS値と検索キーを比較する.
190+ * @param tag
191+ * @param attrdata
192+ * @return boolean 検索キーと一致の時、true
193+ */
194+ boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) {
195+ // ID と CLASS の両方にキー入力有りの場合
196+ if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
197+ if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) {
198+ return true;
199+ }
200+ }
201+ // ID のキーチェック
202+ if(keyid.isEmpty() == false) {
203+ return attrdata.searchId(tag, keyid);
204+ }
205+ // CLASS のキーチェック
206+ if(keyclass.isEmpty() == false) {
207+ return attrdata.searchClass(tag, keyclass);
208+ }
209+ return false;
210+ }
211+}
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
--- trunk/HtmlTest2/src/WebScraping/core/DebugProcess.java (nonexistent)
+++ trunk/HtmlTest2/src/WebScraping/core/DebugProcess.java (revision 115)
@@ -0,0 +1,264 @@
1+/*
2+ * Copyright (C) 2014 kgto.
3+ *
4+ * This library is free software; you can redistribute it and/or
5+ * modify it under the terms of the GNU Lesser General Public
6+ * License as published by the Free Software Foundation; either
7+ * version 2.1 of the License, or (at your option) any later version.
8+ *
9+ * This library is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+ * Lesser General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU Lesser General Public
15+ * License along with this library; if not, write to the Free Software
16+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17+ * MA 02110-1301 USA
18+ */
19+/*
20+ * $Id$
21+ */
22+
23+package webScraping.core;
24+
25+import java.io.File;
26+import java.io.FileInputStream;
27+import java.io.FileNotFoundException;
28+import java.io.IOException;
29+import java.util.logging.FileHandler;
30+import java.util.logging.Formatter;
31+import java.util.logging.Handler;
32+import java.util.logging.Level;
33+import java.util.logging.LogManager;
34+import java.util.logging.LogRecord;
35+import java.util.logging.Logger;
36+import javax.swing.text.MutableAttributeSet;
37+import javax.swing.text.html.HTML;
38+
39+/**
40+ * デバック情報.
41+ * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。
42+ * @author kgto
43+ */
44+public class DebugProcess {
45+ // 設定ファイル名
46+ protected static final String configurationFilename = "Debug.prop";
47+ // ロガー名
48+ protected static final Logger logger = Logger.getLogger("WebScraping");
49+ // ログ出力デフォルトレベル
50+ protected static final Level loggerlevel = Level.FINEST;
51+
52+
53+ /**
54+ * ログ出力設定.
55+ * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、
56+ * ファイルハンドラの設定と出力書式の設定を行う。
57+ */
58+ public static void debuglog_set() {
59+ try {
60+ initLogConfiguration();
61+
62+ if(Level.ALL.equals(logger.getLevel())) {
63+ //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2));
64+ logger.addHandler(new FileHandler("WebScraping%g.log", true));
65+ }
66+ setFomatter();
67+
68+ } catch (IOException | SecurityException ex) {
69+ Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
70+ }
71+ }
72+
73+ /**
74+ * ログ出力設定解除.
75+ */
76+ public static void debuglog_unset() {
77+ }
78+
79+
80+ /**
81+ * デバック出力(HTML解析-タグ&属性).
82+ * HTMLのタグと属性の解析状態を出力する。
83+ * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br>
84+ * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br>
85+ * @param tag タグ
86+ * @param attr 属性
87+ * @param methodname このメソッドを呼び出した親メソッド名
88+ * @param count HTMLタグの階層レベル
89+ */
90+ public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr,
91+ String methodname, int count) {
92+
93+ // ログ出力レベルチェック
94+ if(logger.getLevel() == null) {
95+ return;
96+ }
97+ if(logger.getLevel().intValue() > loggerlevel.intValue()) {
98+ return;
99+ }
100+
101+ // 編集処理
102+ char kbn = ' ';
103+ if("handleStartTag".equals(methodname)) {
104+ kbn = 'F';
105+ }
106+ if("handleEndTag".equals(methodname)) {
107+ kbn = 'E';
108+ }
109+ if("handleSimpleTag".equals(methodname)) {
110+ kbn = 'S';
111+ }
112+
113+ StringBuilder strBuf = new StringBuilder(80);
114+ strBuf.append(count).append(" : ");
115+ strBuf.append(kbn).append(" : ");
116+ strBuf.append(tag.toString());
117+ // 属性情報
118+ if(attr != null) {
119+ if(attr.getAttributeCount() != 0) {
120+ AttributeData handleAttrData = new AttributeData();
121+ handleAttrData.add(tag, attr);
122+ for(int i = 0; i < handleAttrData.size; i++) {
123+ strBuf.append(" [");
124+ strBuf.append(handleAttrData.getattrname(i));
125+ strBuf.append("]");
126+ strBuf.append(handleAttrData.getcount(i));
127+ strBuf.append(" = ");
128+ strBuf.append(handleAttrData.getattrvalue(i));
129+ }
130+ }
131+ }
132+
133+ logger.log(loggerlevel, strBuf.toString());
134+ }
135+
136+ /**
137+ * デバック出力(メッセージ).
138+ * 引数に渡された任意のメッセージを出力する。
139+ * @param str メッセージ
140+ * @param methodname このメソッドを呼び出した親メソッド名
141+ */
142+ public static void htmlinfo(String str, String methodname) {
143+ logger.log(loggerlevel, str);
144+ }
145+
146+ public static void htmlinfo(String str) {
147+ logger.log(loggerlevel, str);
148+ }
149+
150+ /**
151+ * デバック出力(HTML解析-本文).
152+ * 本文の内容を出力する。
153+ * @param data 本文(HTML内の文字列)
154+ * @param methodname このメソッドを呼び出した親メソッド名
155+ */
156+ public static void htmlinfo(char[] data, String methodname) {
157+ String dat = new String(data);
158+ logger.log(loggerlevel, dat);
159+ }
160+
161+ public static void htmlinfo(char[] data) {
162+ String dat = new String(data);
163+ logger.log(loggerlevel, dat);
164+ }
165+
166+ /**
167+ * デバック出力(検索キー).
168+ * 検索キー(SearchData)の内容を出力する。
169+ * @param skey
170+ */
171+ public static void searchDatainfo(SearchData skey) {
172+
173+ StringBuilder strBuf = new StringBuilder(30);
174+ strBuf.append("SearchData KEY tag[");
175+ strBuf.append(skey.getHtmltag());
176+ strBuf.append("] ID[");
177+ strBuf.append(skey.getHtmlid());
178+ strBuf.append("] CLASS[");
179+ strBuf.append(skey.getHtmlclass());
180+ strBuf.append("]\n");
181+
182+ logger.log(loggerlevel, strBuf.toString());
183+ }
184+
185+ /**
186+ * ログ出力設定ファイルチェック.
187+ * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。
188+ */
189+ private static void initLogConfiguration() {
190+
191+ File file = new File(configurationFilename);
192+ try {
193+ if(file.exists()) {
194+ FileInputStream inputStream = new FileInputStream(file);
195+ // 設定ファイルの読み込み
196+ LogManager.getLogManager().readConfiguration(inputStream);
197+ }
198+
199+ } catch (FileNotFoundException ex) {
200+ Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
201+ } catch (IOException ex) {
202+ Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex);
203+ }
204+ }
205+
206+ /**
207+ * ログ出力フォーマッター設定.
208+ * ファイルへログ出力時の書式を設定する。
209+ */
210+ private static void setFomatter() {
211+ Handler[] handlers = logger.getHandlers();
212+ for(int i = 0 ; i < handlers.length ; i++) {
213+ if(handlers[i] instanceof java.util.logging.FileHandler) {
214+ handlers[i].setFormatter(new HtmlFormatter());
215+ }
216+ }
217+ }
218+
219+}
220+
221+/**
222+ * ログ出力フォーマッター.
223+ * @author kgto
224+ */
225+class HtmlFormatter extends Formatter {
226+ /**
227+ * Logの出力文字列を生成する。
228+ * 出力書式:<br>
229+ * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ
230+ */
231+ @Override
232+ public synchronized String format(final LogRecord aRecord) {
233+
234+ final StringBuffer message = new StringBuffer(100);
235+
236+ long millis = aRecord.getMillis();
237+ String time = String.format("%tF %<tT", millis);
238+
239+ message.append(time);
240+ message.append(' ');
241+
242+ message.append(aRecord.getLevel());
243+ message.append('<');
244+ String methodName = aRecord.getSourceMethodName();
245+ message.append(methodName != null ? methodName : "N/A");
246+ message.append('>');
247+
248+ message.append(formatMessage(aRecord));
249+ message.append('\n');
250+
251+ // 例外エラーの場合、エラー内容とスタックトレース出力
252+ Throwable throwable = aRecord.getThrown();
253+ if (throwable != null) {
254+ message.append(throwable.toString());
255+ message.append('\n');
256+ for (StackTraceElement trace : throwable.getStackTrace()) {
257+ message.append('\t');
258+ message.append(trace.toString());
259+ message.append('\n');
260+ }
261+ }
262+ return message.toString();
263+ }
264+}
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
--- trunk/HtmlTest2/test/WebScraping/DebugProcessT01.java (revision 114)
+++ trunk/HtmlTest2/test/WebScraping/DebugProcessT01.java (nonexistent)
@@ -1,48 +0,0 @@
1-
2-package WebScraping;
3-
4-import static WebScraping.DebugProcess.logger;
5-import java.util.logging.Formatter;
6-import java.util.logging.Handler;
7-import java.util.logging.Logger;
8-
9-/**
10- *
11- * @author kgto
12- */
13-
14-
15-public class DebugProcessT01 {
16-
17- public static void main(String[] args) {
18-
19- DebugProcessT01 test = new DebugProcessT01();
20- test.testdebuglog_set();
21-
22- System.out.println("LoggerName : " + logger.getName());
23- System.out.println("LoggerLevel : " + logger.getLevel());
24- System.out.println("Parent : " + logger.getParent().getName());
25-
26- Handler[] handlers = logger.getHandlers();
27- for(int i = 0 ; i < handlers.length ; i++) {
28- System.out.println(handlers[i] + "'s Level: " + handlers[i].getLevel());
29-
30- Formatter formatter = handlers[i].getFormatter();
31- System.out.println("\tFormatter: " + formatter.toString());
32- }
33-
34- test.testhtmlinfo();
35- }
36-
37- public DebugProcessT01() {
38- }
39-
40- void testdebuglog_set() {
41- DebugProcess.debuglog_set();
42- }
43-
44- void testhtmlinfo() {
45- DebugProcess.htmlinfo("testhtmlinfo");
46- }
47-
48-}
--- trunk/HtmlTest2/test/WebScraping/DebugProcessTest.java (revision 114)
+++ trunk/HtmlTest2/test/WebScraping/DebugProcessTest.java (nonexistent)
@@ -1,113 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-package WebScraping;
20-
21-import javax.swing.text.MutableAttributeSet;
22-import javax.swing.text.html.HTML;
23-import org.junit.AfterClass;
24-import org.junit.BeforeClass;
25-import org.junit.Test;
26-import static org.junit.Assert.*;
27-
28-import org.junit.AfterClass;
29-import org.junit.BeforeClass;
30-import org.junit.Test;
31-import static org.junit.Assert.*;
32-
33-/**
34- *
35- * @author kgto
36- */
37-
38-
39-public class DebugProcessTest {
40-
41- public DebugProcessTest() {
42- }
43-
44- @BeforeClass
45- public static void setUpClass() {
46- }
47-
48- @AfterClass
49- public static void tearDownClass() {
50- }
51-
52- /**
53- * Test of debuglog_set method, of class DebugProcess.
54- */
55- @Test
56- public void testDebuglog_set() {
57- System.out.println("debuglog_set");
58- DebugProcess.debuglog_set();
59- // TODO review the generated test code and remove the default call to fail.
60- fail("The test case is a prototype.");
61- }
62-
63- /**
64- * Test of debuglog_unset method, of class DebugProcess.
65- */
66- @Test
67- public void testDebuglog_unset() {
68- System.out.println("debuglog_unset");
69- DebugProcess.debuglog_unset();
70- // TODO review the generated test code and remove the default call to fail.
71- fail("The test case is a prototype.");
72- }
73-
74- /**
75- * Test of htmlinfo method, of class DebugProcess.
76- */
77- @Test
78- public void testHtmlinfo_4args() {
79- System.out.println("htmlinfo");
80- HTML.Tag tag = null;
81- MutableAttributeSet attr = null;
82- String methodname = "";
83- int count = 0;
84- DebugProcess.htmlinfo(tag, attr, methodname, count);
85- // TODO review the generated test code and remove the default call to fail.
86- fail("The test case is a prototype.");
87- }
88-
89- /**
90- * Test of htmlinfo method, of class DebugProcess.
91- */
92- @Test
93- public void testHtmlinfo_String() {
94- System.out.println("htmlinfo");
95- String str = "";
96- DebugProcess.htmlinfo(str);
97- // TODO review the generated test code and remove the default call to fail.
98- fail("The test case is a prototype.");
99- }
100-
101- /**
102- * Test of htmlinfo method, of class DebugProcess.
103- */
104- @Test
105- public void testHtmlinfo_charArr() {
106- System.out.println("htmlinfo");
107- char[] data = null;
108- DebugProcess.htmlinfo(data);
109- // TODO review the generated test code and remove the default call to fail.
110- fail("The test case is a prototype.");
111- }
112-
113-}
--- trunk/HtmlTest2/test/WebScraping/HtmlFormatterTest.java (revision 114)
+++ trunk/HtmlTest2/test/WebScraping/HtmlFormatterTest.java (nonexistent)
@@ -1,66 +0,0 @@
1-/*
2- * Copyright (C) 2014 kgto.
3- *
4- * This library is free software; you can redistribute it and/or
5- * modify it under the terms of the GNU Lesser General Public
6- * License as published by the Free Software Foundation; either
7- * version 2.1 of the License, or (at your option) any later version.
8- *
9- * This library is distributed in the hope that it will be useful,
10- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12- * Lesser General Public License for more details.
13- *
14- * You should have received a copy of the GNU Lesser General Public
15- * License along with this library; if not, write to the Free Software
16- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17- * MA 02110-1301 USA
18- */
19-package WebScraping;
20-
21-import java.util.logging.LogRecord;
22-import org.junit.AfterClass;
23-import org.junit.BeforeClass;
24-import org.junit.Test;
25-import static org.junit.Assert.*;
26-
27-import org.junit.AfterClass;
28-import org.junit.BeforeClass;
29-import org.junit.Test;
30-import static org.junit.Assert.*;
31-
32-/**
33- *
34- * @author kgto
35- */
36-
37-
38-public class HtmlFormatterTest {
39-
40- public HtmlFormatterTest() {
41- }
42-
43- @BeforeClass
44- public static void setUpClass() {
45- }
46-
47- @AfterClass
48- public static void tearDownClass() {
49- }
50-
51- /**
52- * Test of format method, of class HtmlFormatter.
53- */
54- @Test
55- public void testFormat() {
56- System.out.println("format");
57- LogRecord aRecord = null;
58- HtmlFormatter instance = new HtmlFormatter();
59- String expResult = "";
60- String result = instance.format(aRecord);
61- assertEquals(expResult, result);
62- // TODO review the generated test code and remove the default call to fail.
63- fail("The test case is a prototype.");
64- }
65-
66-}
--- trunk/HtmlTest2/test/utility/test1/SearchDataRWT02.java (nonexistent)
+++ trunk/HtmlTest2/test/utility/test1/SearchDataRWT02.java (revision 115)
@@ -0,0 +1,46 @@
1+
2+package utility.test1;
3+
4+import webScraping.utility.SearchDataRW;
5+import java.io.File;
6+import webScraping.core.SearchData;
7+
8+/**
9+ * ファイル読込みテスト
10+ * @author kgto
11+ */
12+public class SearchDataRWT02 {
13+ SearchDataRW sio = new SearchDataRW();
14+
15+ File file = new File("test1.xml");
16+
17+ /**
18+ * @param args the command line arguments
19+ */
20+ public static void main(String[] args) {
21+ SearchDataRWT02 test01 = new SearchDataRWT02();
22+ test01.load01();
23+ }
24+
25+ void SearchDataRWT01() {
26+ }
27+
28+ void load01() {
29+ char spchar = '\t';
30+
31+ sio.load(file);
32+ for(int i = 0; i < SearchData.size(); i++) {
33+ SearchData sdat = SearchData.get(i);
34+
35+ StringBuilder sbuf = new StringBuilder();
36+ sbuf.append(sdat.getitem()).append(spchar);
37+ sbuf.append(sdat.getHtmltag()).append(spchar);
38+ sbuf.append(sdat.getHtmlid()).append(spchar);
39+ sbuf.append(sdat.getHtmlclass()).append(spchar);
40+ sbuf.append(sdat.getaround()).append(spchar);
41+ sbuf.append(sdat.getregexp()).append(spchar);
42+ System.out.println(sbuf.toString());
43+ }
44+ }
45+
46+}
--- trunk/HtmlTest2/test/utility/test1/SearchDataRWT01.java (nonexistent)
+++ trunk/HtmlTest2/test/utility/test1/SearchDataRWT01.java (revision 115)
@@ -0,0 +1,66 @@
1+
2+package utility.test1;
3+
4+import webScraping.utility.SearchDataRW;
5+import java.io.File;
6+import java.lang.reflect.InvocationTargetException;
7+import java.lang.reflect.Method;
8+import java.util.logging.Level;
9+import java.util.logging.Logger;
10+
11+/**
12+ * ファイル書出し・読込みテスト
13+ * @author kgto
14+ */
15+public class SearchDataRWT01 {
16+ SearchDataRW sio = new SearchDataRW();
17+
18+ File file = new File("SearchDataRWT01.xml");
19+
20+ /**
21+ * @param args the command line arguments
22+ */
23+ public static void main(String[] args) {
24+ SearchDataRWT01 test01 = new SearchDataRWT01();
25+ test01.save01();
26+ test01.load01();
27+ }
28+
29+ void SearchDataRWT01() {
30+ }
31+
32+ void save01() {
33+ try {
34+ String str = "abc\ndef\nghi\n1111";
35+
36+ //sio.saveMsg404(str);
37+ // リフレクション
38+ Method method = SearchDataRW.class.getDeclaredMethod("saveMsg404", String.class);
39+ method.setAccessible(true);
40+ method.invoke(sio, str);
41+
42+ sio.write(file);
43+
44+ } catch (NoSuchMethodException | SecurityException
45+ | IllegalAccessException | IllegalArgumentException | InvocationTargetException ex) {
46+ Logger.getLogger(SearchDataRWT01.class.getName()).log(Level.SEVERE, null, ex);
47+ }
48+ }
49+
50+ void load01() {
51+ try {
52+ Method method = SearchDataRW.class.getDeclaredMethod("loadMsg404");
53+ method.setAccessible(true);
54+ Object obj = method.invoke(sio);
55+ String str = (String)obj;
56+
57+ System.out.println("loadMsg404 = " + str);
58+
59+
60+ } catch (NoSuchMethodException | SecurityException
61+ | IllegalAccessException | IllegalArgumentException | InvocationTargetException ex) {
62+ Logger.getLogger(SearchDataRWT01.class.getName()).log(Level.SEVERE, null, ex);
63+ }
64+ }
65+
66+}
--- trunk/HtmlTest2/test1.xml (nonexistent)
+++ trunk/HtmlTest2/test1.xml (revision 115)
@@ -0,0 +1,16 @@
1+<?xml version="1.0" encoding="UTF-8" standalone="no"?><searchdata>
2+
3+
4+
5+
6+
7+
8+
9+
10+
11+
12+
13+
14+
15+
16+<url>http://weather.yahoo.co.jp/weather/</url><searchlist listNo="1"><item>天気01</item><htmltag>li</htmltag><htmlclass>point pt1400</htmlclass></searchlist><searchlist listNo="2"><item>天気02</item><htmltag>li</htmltag><htmlclass>point pt1900</htmlclass></searchlist><searchlist listNo="3"><item>天気03</item><htmltag>li</htmltag><htmlclass>point pt3410</htmlclass></searchlist><searchlist listNo="4"><item>天気04</item><htmltag>li</htmltag><htmlclass>point pt4410</htmlclass></searchlist><searchlist listNo="5"><item>天気05</item><htmltag>li</htmltag><htmlclass>point pt5110</htmlclass></searchlist><searchlist listNo="6"><item>天気06</item><htmltag>li</htmltag><htmlclass>point pt5410</htmlclass></searchlist><searchlist listNo="7"><item>天気07</item><htmltag>li</htmltag><htmlclass>point pt5610</htmlclass></searchlist><searchlist listNo="8"><item>天気08</item><htmltag>li</htmltag><htmlclass>point pt6200</htmlclass></searchlist><searchlist listNo="9"><item>天気09</item><htmltag>li</htmltag><htmlclass>point pt6710</htmlclass></searchlist><searchlist listNo="10"><item>天気10</item><htmltag>li</htmltag><htmlclass>point pt7410</htmlclass></searchlist><searchlist listNo="11"><item>天気11</item><htmltag>li</htmltag><htmlclass>point pt8210</htmlclass></searchlist><searchlist listNo="12"><item>天気12</item><htmltag>li</htmltag><htmlclass>point pt8810</htmlclass></searchlist><searchlist listNo="13"><item>天気13</item><htmltag>li</htmltag><htmlclass>point pt9110</htmlclass></searchlist></searchdata>
\ No newline at end of file