import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.HTML;

import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.HttpURLConnection;

import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.parser.ParserDelegator;

public class HTMLParsing {
 static int ateg_count = 0;
 static int img_count = 0;

 private class CallbackHandler extends HTMLEditorKit.ParserCallback {

  public void handleStartTag(HTML.Tag tag, MutableAttributeSet a, int pos) {
   if (tag == HTML.Tag.A) {
    ateg_count++;
    System.out.print("< a href > 태그 ");
    if (a.getAttribute(javax.swing.text.html.HTML.Attribute.HREF).toString()
      .matches("(?i).*http://.*"))
              System.out.println( a.getAttribute(javax.swing.text.html.HTML.Attribute.HREF));

   }

  }
  public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet a, int pos){
   if(tag == HTML.Tag.IMG){
    img_count++;
    System.out.println("< img > 태그  "+
    a.getAttribute(javax.swing.text.html.HTML.Attribute.SRC));
   }
  }
 }

 public void parse(String str) {

  try {

   URL url = new URL(str);

   HttpURLConnection con = (HttpURLConnection) url.openConnection();

   InputStreamReader reader = new InputStreamReader(con
     .getInputStream(), "euc-kr");

   new ParserDelegator().parse(reader, new CallbackHandler(), true);
   System.out.println("<A href > 태그의 갯수는  " + ateg_count);
   System.out.println("<img > 태그의 갯수는 "+img_count);

   con.disconnect();
  } catch (Exception e) {
   e.printStackTrace();
  }
 }

 public void content(String str) throws IOException {

  URL url = new URL(str);
  HttpURLConnection urlcon = (HttpURLConnection) url.openConnection();

  String contentType = urlcon.getContentType();
  int statecode = urlcon.getResponseCode();
  int contentLength = urlcon.getContentLength();
  System.out.println("State Code : " + statecode);
  System.out.println("Content Type : " + contentType);
  System.out.println("Content Length : " + contentLength);

 }

 public static void main(String[] args) throws IOException {

  HTMLParsing parser = new HTMLParsing();
  String url = "http://www.naver.com";

  parser.content(url);
  parser.parse(url);

 }
}


 

신고

'programing > JAVA' 카테고리의 다른 글

java & jsp 개발 팁  (0) 2009.08.19
JSP charset  (0) 2009.08.14
HTML A태그, IMG태그 추출  (1) 2009.06.22
HTML 태그제거 소스  (0) 2009.06.22
계층형 게시판  (0) 2009.06.03
File Up & Down  (0) 2009.06.02
Posted by 대절님

댓글을 달아 주세요

  1. BlogIcon longchamp 2013.04.10 18:35 신고  댓글주소  댓글쓰기 수정/삭제

    태양이 바다에 미광을 비추면,나는 너를 생각한다.



티스토리 툴바