반응형
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.HTML;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.HttpURLConnection;
import java.util.Enumeration;
import javax.swing.text.BadLocationException;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.parser.ParserDelegator;
public class HTMLParsing
{
//파서는 콜백 형식으로 되어 있다. 각 태그가 들어 올때 적절한 메소드가 호출됨
private class CallbackHandler extends HTMLEditorKit.ParserCallback
{
@Override
public void flush() throws BadLocationException
{
System.out.println("flush");
}
@Override
public void handleComment(char[] data, int pos)
{
System.out.println("Cmt " + new String(data));
}
@Override
public void handleEndOfLineString(String eol)
{
System.out.println("EOL ");
}
@Override
public void handleEndTag(Tag t, int pos)
{
System.out.println("End </" + t + ">");
}
@Override
public void handleError(String errorMsg, int pos)
{
// System.out.println("ERROR\t" + new String(errorMsg));
}
@Override
public void handleSimpleTag(Tag t, MutableAttributeSet a, int pos)
{
System.out.print("Sim <" + t.toString() + ">\n");
for (Enumeration e = a.getAttributeNames(); e.hasMoreElements();)
{
Object attributeName = e.nextElement();
System.out.print("\t" + attributeName + "=");
System.out.println(a.getAttribute(attributeName));
}
}
@Override
public void handleStartTag(Tag t, MutableAttributeSet a, int pos)
{
System.out.println("Str <" + t + ">");
for (Enumeration e = a.getAttributeNames(); e.hasMoreElements();)
{
Object attributeName = e.nextElement();
System.out.print("\t" + attributeName + "=");
System.out.println(a.getAttribute(attributeName));
}
}
public void handleText(char[] data, int pos)
{
System.out.println("\t\t" + new String(data));
}
}
public void parse(String str)
{
try
{
//입력받은 URL에 연결하여 InputStream을 통해 읽은 후 파싱 한다.
URL url = new URL(str);
HttpURLConnection con = (HttpURLConnection) url.openConnection();
InputStreamReader reader =
new InputStreamReader(con.getInputStream(), "euc-kr");
new ParserDelegator().parse(reader, new CallbackHandler(), true);
con.disconnect();
} catch (Exception e)
{
e.printStackTrace();
}
}
public static void main(String[] args)
{
HTMLParsing parser = new HTMLParsing();
parser.parse("http://ecos.bok.or.kr/jsp/use/100keystat/100KeyStatCtl.jsp");
}
}
import javax.swing.text.html.HTML;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.HttpURLConnection;
import java.util.Enumeration;
import javax.swing.text.BadLocationException;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.parser.ParserDelegator;
public class HTMLParsing
{
//파서는 콜백 형식으로 되어 있다. 각 태그가 들어 올때 적절한 메소드가 호출됨
private class CallbackHandler extends HTMLEditorKit.ParserCallback
{
@Override
public void flush() throws BadLocationException
{
System.out.println("flush");
}
@Override
public void handleComment(char[] data, int pos)
{
System.out.println("Cmt " + new String(data));
}
@Override
public void handleEndOfLineString(String eol)
{
System.out.println("EOL ");
}
@Override
public void handleEndTag(Tag t, int pos)
{
System.out.println("End </" + t + ">");
}
@Override
public void handleError(String errorMsg, int pos)
{
// System.out.println("ERROR\t" + new String(errorMsg));
}
@Override
public void handleSimpleTag(Tag t, MutableAttributeSet a, int pos)
{
System.out.print("Sim <" + t.toString() + ">\n");
for (Enumeration e = a.getAttributeNames(); e.hasMoreElements();)
{
Object attributeName = e.nextElement();
System.out.print("\t" + attributeName + "=");
System.out.println(a.getAttribute(attributeName));
}
}
@Override
public void handleStartTag(Tag t, MutableAttributeSet a, int pos)
{
System.out.println("Str <" + t + ">");
for (Enumeration e = a.getAttributeNames(); e.hasMoreElements();)
{
Object attributeName = e.nextElement();
System.out.print("\t" + attributeName + "=");
System.out.println(a.getAttribute(attributeName));
}
}
public void handleText(char[] data, int pos)
{
System.out.println("\t\t" + new String(data));
}
}
public void parse(String str)
{
try
{
//입력받은 URL에 연결하여 InputStream을 통해 읽은 후 파싱 한다.
URL url = new URL(str);
HttpURLConnection con = (HttpURLConnection) url.openConnection();
InputStreamReader reader =
new InputStreamReader(con.getInputStream(), "euc-kr");
new ParserDelegator().parse(reader, new CallbackHandler(), true);
con.disconnect();
} catch (Exception e)
{
e.printStackTrace();
}
}
public static void main(String[] args)
{
HTMLParsing parser = new HTMLParsing();
parser.parse("http://ecos.bok.or.kr/jsp/use/100keystat/100KeyStatCtl.jsp");
}
}