01.JAVA/Java2008. 11. 11. 10:13
반응형
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.HTML;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.HttpURLConnection;
import java.util.Enumeration;
import javax.swing.text.BadLocationException;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.parser.ParserDelegator;
public class HTMLParsing
{
 //파서는 콜백 형식으로 되어 있다. 각 태그가 들어 올때 적절한 메소드가 호출됨
 private class CallbackHandler extends HTMLEditorKit.ParserCallback
 {
  @Override
  public void flush() throws BadLocationException
  {
   System.out.println("flush");
  }
  @Override
  public void handleComment(char[] data, int pos)
  {
   System.out.println("Cmt " + new String(data));
  }
  @Override
  public void handleEndOfLineString(String eol)
  {
   System.out.println("EOL ");
  }
  @Override
  public void handleEndTag(Tag t, int pos)
  {
   System.out.println("End </" + t + ">");
  }
  @Override
  public void handleError(String errorMsg, int pos)
  {
//   System.out.println("ERROR\t" + new String(errorMsg));
  }
  @Override
  public void handleSimpleTag(Tag t, MutableAttributeSet a, int pos)
  {
   System.out.print("Sim <" + t.toString() + ">\n");
   for (Enumeration e = a.getAttributeNames(); e.hasMoreElements();)
   {
    Object attributeName = e.nextElement();
    System.out.print("\t" + attributeName + "=");
    System.out.println(a.getAttribute(attributeName));
   }
  }
  @Override
  public void handleStartTag(Tag t, MutableAttributeSet a, int pos)
  {
   System.out.println("Str <" + t + ">");
   for (Enumeration e = a.getAttributeNames(); e.hasMoreElements();)
   {
    Object attributeName = e.nextElement();
    System.out.print("\t" + attributeName + "=");
    System.out.println(a.getAttribute(attributeName));
   }
  }
  public void handleText(char[] data, int pos)
  {
   System.out.println("\t\t" + new String(data));
  }
 }
 public void parse(String str)
 {
  try
  {
   //입력받은 URL에 연결하여 InputStream을 통해 읽은 후 파싱 한다.
   URL url = new URL(str);
   HttpURLConnection con = (HttpURLConnection) url.openConnection();
   InputStreamReader reader =
     new InputStreamReader(con.getInputStream(), "euc-kr");
   new ParserDelegator().parse(reader, new CallbackHandler(), true);
   con.disconnect();
  } catch (Exception e)
  {
   e.printStackTrace();
  }
 }
 public static void main(String[] args)
 {
  HTMLParsing parser = new HTMLParsing();
  parser.parse("http://ecos.bok.or.kr/jsp/use/100keystat/100KeyStatCtl.jsp");
 }
}
Posted by 1010