xerces를 사용한 dom 방식의 xml parser lib

03.HTML 4.X, HTML5, XML.../XML2009. 1. 23. 14:42

xerces를 사용한 dom 방식의 xml parser lib

첨부파일이 아니라 내용에 붙였습니다

xmls x = new xmls(...)에서

x.domLoad(...);

....

x.domWrite(...);

이런 방식으로 사용하시면 됩니다

자세한 설명은 생략하겠습니다

xml의 구조에 대해서 조금만 아신다면 금방 파악이 되실 겁니다

상용으로 잘 사용하고 있습니다

붙이다 보니 tab이 먹지 않아서 정렬이 안되어 있는데

copy해서 editor에서 정리한 다음 보십시오

import java.io.*;
import java.util.*;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;

// xercesImpl.jar를 복사한다
// java_home/jre/lib/src.zip에 있음
import org.apache.xpath.*;
import org.w3c.dom.*;
import org.xml.sax.*;
/*****
XML 문서의 구조
- 최상위 요소를 root라고 한다
XML 문서는 단 하나의 root요소만을 가질 수 있다. 이를 흔히 top level이라고 한다
root요소가 2개 이상일 경우에는 에러가 발생한다
- root요소 앞에는 XML 선언을 담고 있는 프롤로그가 올 수 있다
XML 문서마다 사용된 XML 버전 정보를 프롤로그에 넣어주는것이 좋다
<?xml version="1.0" ?>과 같이 버전 정보가 포함될 수 있다
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="euc-kr"?> encoding정보도 포함될 수 있다
반드시 <?xml... 은 붙여야 한다
- 요소는 반드시 <와 >로 둘러싼 tag로 구성된다
element와 attribute로 나뉘어 진다
<tag>text</tag>
<tag attribute="text">
<tag attribute="text"></tag>
<tag attribute="text"/>
<tag></tag>
<tag/>
- 주석은 이다

DOM (Document Object Model)
- XML문서를 메모리상에 올려놓은 다음 데이타를 찾아 들어가는 방식을 사용한다
- 큰 문서일경우 무조건 다 읽기 때문에 로드 시간이 걸리거나 메모리를 낭비하게 된다
- 문서 편집을 할 수 있다
- 데이타를 저장되어 있는 순서대로 받는다
SAX (Simple API for XML)
- 큰 문서를 효율적으로 분석하기 위해서 사용한다
- 이벤트가 발생하면 그에 해당하는 데이타를 추출한다. 속도가 DOM에 비해서 빠르다
- 읽기 전용이다. 문서를 편집할 수 없다
- 데이타를 SAX가 주는 순서대로 받는다. 파서가 문서를 탐색하는 순서에 대해서 어떠한 작업도 할 수 없다
- 원하는 요소만 골라서 읽을 수 있다
-----------------------------------------------------------------------------------------
현재 Java SAX Parser의 종류
- Xerces (Apache Foundation DOM & SAX)
SAX와 DOMahen를 다룰 수 있게하는 XML API를 위하여 Apache Group의 프로젝트이다
org.apache.xerces.parser를 사용한다
- Crimson (Sun Project X, JAXP Default Parser)
Sun ProjectX의 일환으로 JAXP의 default parser로 채택되어져 있으며 XMLReader 인터페이스를 구현하므로
SAX API를 이용하여 여러가지 다양한 파싱기능을 사용할 수 있다
- JAXP (Sun, Java API for XML Processing)
Xerces처럼 DOM과 SAX 둘 다를 사용할 수 있도록 지원해주는 API이다
한가지 다른점은 SAX API의 인터페이스를 직접 구현한 클래스를 제공하는것이 아니라
abstract 계층을 한단계 얹은 형태를 제공한다
- Xalan은 xslt 프로세서이다. xsl을 이용하기 위해 필요하다
*****/

public class Xmls
{
public static final int NONE = 0;
public static final int DOM = 1;
public static final int SAX = 2;
// DOM
private DocumentBuilderFactory dbFactory;
private DocumentBuilder dBuilder;
private Document document = null;
private TransformerFactory tFactory;
private Transformer transformer;
// SAX
private javax.xml.parsers.SAXParserFactory spFactory;
private javax.xml.parsers.SAXParser sParser;
private org.xml.sax.helpers.DefaultHandler defHandler;
// private SAXDbHandler defHandler = null;

// type이 DOM이면 DOM방식으로 아니면 SAX 방식으로 처리한다
public Xmls (int Type, boolean valid, org.xml.sax.helpers.DefaultHandler defHandler) throws Exception
{
if (Type == DOM)
{
dbFactory = DocumentBuilderFactory.newInstance();
dbFactory.setValidating(valid); // check를 넘겨주어도 된다
dBuilder = dbFactory.newDocumentBuilder();
   // 출력할때 사용한다
tFactory = TransformerFactory.newInstance();
transformer = tFactory.newTransformer();
}
else
{
   this.defHandler = defHandler;
   spFactory = javax.xml.parsers.SAXParserFactory.newInstance();
sParser = spFactory.newSAXParser();
// xslt 출력
// TransformerFactory factory = TransformerFactory.newInstance();
// Transformer transformer = factory.newTransformer(new SAXSource(new InputSource(stylesheetFile)));
// transformer.transform(new SAXSource(new InputSource(XMLFilename)), new StreamResult(resultFilename));
}
}

//******************** DOM 방식의 method
// xml 문서를 읽는다
// 버전 정보는 반드시 <?xml... 은 붙여야 한다. 아니면 에러가 발생한다
public void domLoad (InputStream in) throws Exception
{
document = dBuilder.parse(in);
}

public void domLoad (String xmlPath) throws Exception
{
document = dBuilder.parse(new File(xmlPath));
}

public Document domGetDocument ()
{
return document;
}

// xml 문자열을 parsing한다
// x.domParse("<node>가</node>");
public void domParse (String xmlString) throws Exception
{
StringReader sr = new StringReader(xmlString);
InputSource is = new InputSource(sr);

document = dBuilder.parse(is);
}

// encoding이 null이거나 ""으면 setting하지 않는다
public void domWrite (OutputStream out, String encoding) throws Exception
{
setEncoding(encoding);

// javax.xml.transform.dom.
DOMSource source = new DOMSource(document);
StreamResult result = new StreamResult(out);
transformer.transform(source, result);
}

public void domWrite (PrintWriter out, String encoding) throws Exception
{
StringWriter sw = new StringWriter();

setEncoding(encoding);

DOMSource source = new DOMSource(document);
StreamResult result = new StreamResult(sw);
transformer.transform(source, result);

out.print(sw.toString());
}

// setEncoding("euc-kr");
public void domWrite (String xmlPath, String encoding) throws Exception
{
setEncoding(encoding);

// javax.xml.transform.dom.
DOMSource source = new DOMSource(document);
StreamResult result = new StreamResult(new File(xmlPath));
transformer.transform(source, result);
}

// 모든 처리가 끝난 다음 마지막에 부른다
public void domClose ()
{
this.document = null;
}

// element와 attribute를 새로 만들고 싶으면 아래의 순서대로 부른다
// x.domAppendElement("personnel/person", "nation", "korea");
// x.domAppendAttribute("personnel/person/nation", "capital", "서울");
// node마다 각각의 값을 주기 위해서 String[] elementValue를 넘긴다
public void domAppendAttribute (String xpath, String attrName, String[] attrValue) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
Element element;

// append하기 때문에 attribute는 반드시 unique한 값을 가져야 하기 때문에 갯수가 맞지 않으면 에러를 발생시킨다
if (nodes.getLength() != attrValue.length) throw new Exception("node 갯수와 value 갯수가 맞지 않습니다.");

for (int i = 0; i < nodes.getLength(); i++)
{
element = (Element)nodes.item(i);
element.setAttribute(attrName, attrValue[i]);
}
}

// 해당 attribute node를 삭제하고 싶으면 domDeleteElement를 사용한다
public void domDeleteAttribute (String xpath, String attrName) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
Element element;

for (int i = 0; i < nodes.getLength(); i++)
{
element = (Element)nodes.item(i);
element.removeAttribute(attrName);
}
}

// elementValue가 없으면 text node를 append하지 않는다
// x.domAppendElement("personnel/person", "nation", String[] k = { "korea" });
// node마다 각각의 값을 주기 위해서 String[] elementValue를 넘긴다
public void domAppendElement (String xpath, String elementName, String[] elementValue) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
Element element, element2;
Text node;

for (int i = 0; i < nodes.getLength(); i++)
{
// loop안에서 선언한다. 모든 node에 append가 된다
element = document.createElement(elementName);
// nodes의 갯수와 elementValue의 갯수가 달라도 된다
if (i < elementValue.length && Codes.checkNull(elementValue[i]) == false)
{
node = document.createTextNode(elementValue[i]);
element.appendChild(node);
}
element2 = (Element)nodes.item(i);
element2.appendChild(element);
}
}

// x.domDeleteElement("personnel/person")
// 하위 node 전체가 삭제된다
public void domDeleteElement (String xpath) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
Node node;

for (int i = 0; i < nodes.getLength(); i++)
{
node = nodes.item(i);
node.getParentNode().removeChild(node); // 부모 노드에서 자식 노드(자신)를 삭제
}
}

// xml 문자열을 파싱해서 해당 노드에 child로 붙인다
// x.domAppendParse("personnel/person", "<node2><node3></node3></node2>");
// x.domAppendAttribute("personnel/person/node2", "id", new String[] { "1", "2", "3", "4", "5", "6" });
// x.domAppendParse("personnel/person", "<node2><node3 id=\"1\">가</node3></node2>");
public void domAppendParse (String xpath, String xmlString) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
Document tempDoc = makeTempDoc(xmlString);
Node node;

for (int i = 0; i < nodes.getLength(); i++)
{
// loop안에 선언해야 한다.
node = tempDoc.getDocumentElement();
node = document.importNode(node, true);
nodes.item(i).appendChild(node);
}
}

// 기존의 노드에 값만 추가 시킨다
// x.domAppendTextNode("personnel/person/node2/node3", "가");
public void domAppendTextNode (String xpath, String[] txtValue) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
Text node;

for (int i = 0; i < nodes.getLength(); i++)
{
// nodes의 갯수와 txtValue의 갯수가 달라도 된다
if (i < txtValue.length && Codes.checkNull(txtValue[i]) == false)
{
node = document.createTextNode(txtValue[i]);
nodes.item(i).appendChild(node);
}
}
}

// xpath="personnel/person/email"
// attr, element 모두 xpath로 지정하면 갯수를 돌려준다
public int domGetNodeCount (String xpath) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);

return nodes.getLength();
}

// xpath="personnel/person", attr="id"
// 해당 값의 index 값을 돌려준다. 중복값이 있으면 처음 발견되는 값의 index를 돌려준다
public int domGetIndexAttribute (String xpath, String attrName, String attrValue) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
NamedNodeMap map;
Node node, node2;
String value = null;
int idx = -1;

for (int i = 0; i < nodes.getLength(); i++)
{
node = nodes.item(i);
map = node.getAttributes();
node2 = map.getNamedItem(attrName);
if (node2 != null && Codes.checkNull(attrValue) == false)
{
value = node2.getNodeValue();
if (attrValue.equals(value)) { idx = i; break; }
}
}

return idx;
}

public ArrayList domGetAttribute (String xpath, String attrName) throws Exception
{
return getAttribute(xpath, attrName, -1);
}

//** attr과 value의 갯수가 다를 수 있기 때문에 원하는 index의 attribute값이 아닐수도 있다
// dom일 경우는 xml의 순서대로 읽어오기 때문에 index로의 처리가 가능하다
public String domGetAttribute (String xpath, String attrName, int index) throws Exception
{
return (String)getAttribute(xpath, attrName, index).get(0);
}

// xpath="personnel/person", attr="id"
public void domSetAttribute (String xpath, String attrName, String attrValue) throws Exception
{
setAttribute(xpath, attrName, null, new String[] { attrValue }, -1, 0);
}

// attribute는 unique하기 때문에 origs, news가 가능하다. index로 변경하지 않아도 된다
// origValue는 원래 값, newValue는 새로운 값. origValue 값이 null이거나 ""이면 전체를 newValue 값으로 setting한다
// 대소문자를 구분하고 싶지 않으면 외부에서 대소문자로 변환해서 넘겨준다
public void domSetAttribute (String xpath, String attrName, String origValue, String newValue) throws Exception
{
setAttribute(xpath, attrName, origValue, new String[] { newValue }, -1, 0);
}

// 해당 index의 attribute를 수정한다
public void domSetAttribute (String xpath, String attrName, String attrValue, int index) throws Exception
{
setAttribute(xpath, attrName, null, new String[] { attrValue }, index, 0);
}

// 기존의 attribute값을 변경한다
public void domSetAttribute (String xpath, String attrName, String[] attrValue) throws Exception
{
setAttribute(xpath, attrName, null, attrValue, -1, attrValue.length);
}

// xpath에 해당하는 값이 여러개이면 여러개 하나면 하나만 돌려준다
// xpath="personnel/person/email"
public ArrayList domGetValue (String xpath) throws Exception
{
return getValue(xpath, -1);
}

//** attr과 value의 갯수가 다를 수 있기 때문에 원하는 index의 attribute값이 아닐수도 있다
// dom일 경우는 xml의 순서대로 읽어오기 때문에 index로의 처리가 가능하다
public String domGetValue (String xpath, int index) throws Exception
{
return (String)getValue(xpath, index).get(0);
}

// domSetAttribute하고 다르다. attribute는 unique하기 때문에 origs, news가 가능하다
// xpath="personnel/person/email"
public void domSetValue (String xpath, String value) throws Exception
{
setValue(xpath, new String[] { value }, -1, 0);
}

//** attr과 value의 갯수가 다를 수 있기 때문에 원하는 index의 attribute값이 아닐수도 있다
// dom일 경우는 xml의 순서대로 읽어오기 때문에 index로의 처리가 가능하다
public void domSetValue (String xpath, String value, int index) throws Exception
{
setValue(xpath, new String[] { value }, index, 0);
}

public void domSetValue (String xpath, String[] value) throws Exception
{
setValue(xpath, value, -1, value.length);
}

// domWrite할때 사용한다
// setEncoding("euc-kr");
private void setEncoding (String encoding)
{
if (Codes.checkNull(encoding)) return;

transformer.setOutputProperty(OutputKeys.ENCODING, encoding);
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
}

private Document makeTempDoc (String nodeString) throws Exception
{
StringReader sr = new StringReader(nodeString);
InputSource is = new InputSource(sr);

return dBuilder.parse(is);
}

private ArrayList getAttribute (String xpath, String attrName, int index) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
NamedNodeMap map;
Node node, node2;
ArrayList al = new ArrayList();
String value = null;

for (int i = 0; i < nodes.getLength(); i++)
{
if (index >= 0 && i != index) continue;

node = nodes.item(i);
map = node.getAttributes();    // 같은 node를 찾는다
node2 = map.getNamedItem(attrName);
if (node2 != null)
{
   value = node2.getNodeValue();
      al.add(value);
}
}

return al;
}

private void setAttribute (String xpath, String attrName, String attrValue, String[] setValue, int index, int length) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
NamedNodeMap map;
Node node, node2;
String value = null;
int off;

for (int i = 0; i < nodes.getLength(); i++)
{
if (length > 0 && i >= length) break;
if (index >= 0 && i != index) continue;

node = nodes.item(i);
map = node.getAttributes();
node2 = map.getNamedItem(attrName);
off = (length > 0) ? i : 0;
if (node2 != null)
{
   if (index > -1 || Codes.checkNull(attrValue)) node2.setNodeValue(setValue[off]);
      else
{
   value = node2.getNodeValue();
if (attrValue.equals(value)) { node2.setNodeValue(setValue[off]); break; }
}
}
}
}

private ArrayList getValue (String xpath, int index) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
NodeList children;
Node node;
Node childNode;
ArrayList al = new ArrayList();
// value를 리턴하기 위해서 string 변수를 선언
String value = null;

// 여러개의 element에서 하나의 태그가 빠지면 nodes.getLength()의 갯수가 아예 줄어든다
// al.add(Integer.toString(nodes.getLength()));
for (int i = 0; i < nodes.getLength(); i++)
{
if (index >= 0 && i != index) continue;

node = nodes.item(i);
if (node.hasChildNodes())
{
// child node가 있는 node type은 element와 document뿐이다
if (node.getNodeType() == Node.ELEMENT_NODE)
{
// Element 인 경우에는 자식노드를 검색해서 자식 노드중 텍스트 노드, 그러니까
// 태그 사이의 텍스트 값을 리턴하자 일단.
children = node.getChildNodes();
for (int j = 0; j < children.getLength(); j++)
{
childNode = children.item(j);
if (childNode.getNodeType() == Node.TEXT_NODE)
{
// 자식노드를 순환하다가 TextNode 발견하면 value로 세팅.
// out.println(childNode.getNodeName());
value = childNode.getNodeValue();
al.add(value);
}
}
}
else
{
/*
<personnel>
<person id="Big.Boss">
<name><family>Boss</family> <given>Big</given></name>
<email>chief@foo.com</email>
<link subordinates="one.worker two.worker three.worker four.worker five.worker"/>
</person>
</personnel>
-- person이나 name일 경우 값이 없다.
*/
// Document 인 경우에는 Node value라는 개념이 애매하니까..
// 전체 String 을 리턴하거나 null값을리턴해야 하는데 일단 null을 리턴하기로 하자.
// Do nothing
}
}
else
{
// 자식노드가 없는 Attribute나 Text노드, CDATASection등의 값을 질의한 경우. getNodeValue를 이용.
value = node.getNodeValue();
al.add(value);
}
}

return al;
}

private void setValue (String xpath, String[] value, int index, int length) throws Exception
{
NodeList nodes = XPathAPI.selectNodeList(document, xpath);
NodeList children;
Node node;
int off;

for (int i = 0; i < nodes.getLength(); i++)
{
if (length > 0 && i >= length) break;
if (index >= 0 && i != index) continue;

off = (length > 0) ? i : 0;
node = nodes.item(i);
if (node.hasChildNodes())
{
// 기존 Text노드를 삭제하고 다시 setting해야 한다
// 삭제하지 않으면 기존값에 새로운값이 붙여서 처리된다
children = node.getChildNodes();
for (int j = 0; j < children.getLength(); j++)
{
if (children.item(j) instanceof Text) node.removeChild(children.item(j));
}
if (node.getNodeType() == Node.ELEMENT_NODE) node.appendChild((Text)document.createTextNode(value[off]));
else node.setNodeValue(value[off]);
}
else
{
/*
자식노드가 없는 Attribute나 Text노드, CDATASection등의 값을 질의한 경우.
person, link인 경우에 해당
   <personnel>
   <person id="Big.Boss">
   <name><family>Boss</family> <given>Big</given></name>
   <email>chief@foo.com</email>
   <link subordinates="one.worker two.worker three.worker four.worker five.worker"/>
   </person>
   </personnel>
*/
if (node.getNodeType() == Node.ELEMENT_NODE) node.appendChild((Text)document.createTextNode(value[off]));
else node.setNodeValue(value[off]);
}
}
}

//******************** SAX 방식의 method
// defHandler는 saxLoad를 부를때 외부에서 받는다
// overriding을 해야 하기 때문에 외부에서 defHandler를 control을 할 수 있게 한다.
public void saxLoad (InputStream in) throws Exception
{
sParser.parse(in, defHandler);
}

public void saxLoad (String xmlPath) throws Exception
{
sParser.parse(new File(xmlPath), defHandler);
}

public org.xml.sax.helpers.DefaultHandler saxGetDefHandler ()
{
return defHandler;
}
}

[출처] xerces를 사용한 dom 방식의 xml parser lib |작성자 yc7497

Posted by 1010

일	월	화	수	목	금	토
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30	31

개발자(開發者) a developer

xerces를 사용한 dom 방식의 xml parser lib

카테고리

공지사항

태그목록

최근에 올라온 글

최근에 달린 댓글

최근에 받은 트랙백

글 보관함

달력

링크

티스토리툴바