View Javadoc

1   /*
2    * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
3    *
4    * This software is open source.
5    * See the bottom of this file for the licence.
6    */
7   
8   package org.dom4j.io;
9   
10  import java.util.ArrayList;
11  import java.util.List;
12  
13  import org.dom4j.Branch;
14  import org.dom4j.Document;
15  import org.dom4j.DocumentFactory;
16  import org.dom4j.Element;
17  import org.dom4j.Namespace;
18  import org.dom4j.QName;
19  import org.dom4j.tree.NamespaceStack;
20  
21  /***
22   * <p>
23   * <code>DOMReader</code> navigates a W3C DOM tree and creates a DOM4J tree
24   * from it.
25   * </p>
26   * 
27   * @author <a href="mailto:jstrachan@apache.org">James Strachan </a>
28   * @version $Revision: 1.17 $
29   */
30  public class DOMReader {
31      /*** <code>DocumentFactory</code> used to create new document objects */
32      private DocumentFactory factory;
33  
34      /*** stack of <code>Namespace</code> and <code>QName</code> objects */
35      private NamespaceStack namespaceStack;
36  
37      public DOMReader() {
38          this.factory = DocumentFactory.getInstance();
39          this.namespaceStack = new NamespaceStack(factory);
40      }
41  
42      public DOMReader(DocumentFactory factory) {
43          this.factory = factory;
44          this.namespaceStack = new NamespaceStack(factory);
45      }
46  
47      /***
48       * DOCUMENT ME!
49       * 
50       * @return the <code>DocumentFactory</code> used to create document
51       *         objects
52       */
53      public DocumentFactory getDocumentFactory() {
54          return factory;
55      }
56  
57      /***
58       * <p>
59       * This sets the <code>DocumentFactory</code> used to create new
60       * documents. This method allows the building of custom DOM4J tree objects
61       * to be implemented easily using a custom derivation of
62       * {@link DocumentFactory}
63       * </p>
64       * 
65       * @param docFactory
66       *            <code>DocumentFactory</code> used to create DOM4J objects
67       */
68      public void setDocumentFactory(DocumentFactory docFactory) {
69          this.factory = docFactory;
70          this.namespaceStack.setDocumentFactory(factory);
71      }
72  
73      public Document read(org.w3c.dom.Document domDocument) {
74          if (domDocument instanceof Document) {
75              return (Document) domDocument;
76          }
77  
78          Document document = createDocument();
79  
80          clearNamespaceStack();
81  
82          org.w3c.dom.NodeList nodeList = domDocument.getChildNodes();
83  
84          for (int i = 0, size = nodeList.getLength(); i < size; i++) {
85              readTree(nodeList.item(i), document);
86          }
87  
88          return document;
89      }
90  
91      // Implementation methods
92      protected void readTree(org.w3c.dom.Node node, Branch current) {
93          Element element = null;
94          Document document = null;
95  
96          if (current instanceof Element) {
97              element = (Element) current;
98          } else {
99              document = (Document) current;
100         }
101 
102         switch (node.getNodeType()) {
103             case org.w3c.dom.Node.ELEMENT_NODE:
104                 readElement(node, current);
105 
106                 break;
107 
108             case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE:
109 
110                 if (current instanceof Element) {
111                     Element currentEl = (Element) current;
112                     currentEl.addProcessingInstruction(node.getNodeName(), node
113                             .getNodeValue());
114                 } else {
115                     Document currentDoc = (Document) current;
116                     currentDoc.addProcessingInstruction(node.getNodeName(),
117                             node.getNodeValue());
118                 }
119 
120                 break;
121 
122             case org.w3c.dom.Node.COMMENT_NODE:
123 
124                 if (current instanceof Element) {
125                     ((Element) current).addComment(node.getNodeValue());
126                 } else {
127                     ((Document) current).addComment(node.getNodeValue());
128                 }
129 
130                 break;
131 
132             case org.w3c.dom.Node.DOCUMENT_TYPE_NODE:
133 
134                 org.w3c.dom.DocumentType domDocType 
135                         = (org.w3c.dom.DocumentType) node;
136                 document.addDocType(domDocType.getName(), domDocType
137                         .getPublicId(), domDocType.getSystemId());
138 
139                 break;
140 
141             case org.w3c.dom.Node.TEXT_NODE:
142                 element.addText(node.getNodeValue());
143 
144                 break;
145 
146             case org.w3c.dom.Node.CDATA_SECTION_NODE:
147                 element.addCDATA(node.getNodeValue());
148 
149                 break;
150 
151             case org.w3c.dom.Node.ENTITY_REFERENCE_NODE:
152 
153                 // is there a better way to get the value of an entity?
154                 org.w3c.dom.Node firstChild = node.getFirstChild();
155 
156                 if (firstChild != null) {
157                     element.addEntity(node.getNodeName(), firstChild
158                             .getNodeValue());
159                 } else {
160                     element.addEntity(node.getNodeName(), "");
161                 }
162 
163                 break;
164 
165             case org.w3c.dom.Node.ENTITY_NODE:
166                 element.addEntity(node.getNodeName(), node.getNodeValue());
167 
168                 break;
169 
170             default:
171                 System.out.println("WARNING: Unknown DOM node type: "
172                         + node.getNodeType());
173         }
174     }
175 
176     protected void readElement(org.w3c.dom.Node node, Branch current) {
177         int previouslyDeclaredNamespaces = namespaceStack.size();
178 
179         String namespaceUri = node.getNamespaceURI();
180         String elementPrefix = node.getPrefix();
181 
182         if (elementPrefix == null) {
183             elementPrefix = "";
184         }
185 
186         org.w3c.dom.NamedNodeMap attributeList = node.getAttributes();
187 
188         if ((attributeList != null) && (namespaceUri == null)) {
189             // test if we have an "xmlns" attribute
190             org.w3c.dom.Node attribute = attributeList.getNamedItem("xmlns");
191 
192             if (attribute != null) {
193                 namespaceUri = attribute.getNodeValue();
194                 elementPrefix = "";
195             }
196         }
197 
198         QName qName = namespaceStack.getQName(namespaceUri,
199                 node.getLocalName(), node.getNodeName());
200         Element element = current.addElement(qName);
201 
202         if (attributeList != null) {
203             int size = attributeList.getLength();
204             List attributes = new ArrayList(size);
205 
206             for (int i = 0; i < size; i++) {
207                 org.w3c.dom.Node attribute = attributeList.item(i);
208 
209                 // Define all namespaces first then process attributes later
210                 String name = attribute.getNodeName();
211 
212                 if (name.startsWith("xmlns")) {
213                     String prefix = getPrefix(name);
214                     String uri = attribute.getNodeValue();
215 
216                     Namespace namespace = namespaceStack.addNamespace(prefix,
217                             uri);
218                     element.add(namespace);
219                 } else {
220                     attributes.add(attribute);
221                 }
222             }
223 
224             // now add the attributes, the namespaces should be available
225             size = attributes.size();
226 
227             for (int i = 0; i < size; i++) {
228                 org.w3c.dom.Node attribute = (org.w3c.dom.Node) attributes
229                         .get(i);
230                 QName attributeQName = namespaceStack.getQName(attribute
231                         .getNamespaceURI(), attribute.getLocalName(), attribute
232                         .getNodeName());
233                 element.addAttribute(attributeQName, attribute.getNodeValue());
234             }
235         }
236 
237         // Recurse on child nodes
238         org.w3c.dom.NodeList children = node.getChildNodes();
239 
240         for (int i = 0, size = children.getLength(); i < size; i++) {
241             org.w3c.dom.Node child = children.item(i);
242             readTree(child, element);
243         }
244 
245         // pop namespaces from the stack
246         while (namespaceStack.size() > previouslyDeclaredNamespaces) {
247             namespaceStack.pop();
248         }
249     }
250 
251     protected Namespace getNamespace(String prefix, String uri) {
252         return getDocumentFactory().createNamespace(prefix, uri);
253     }
254 
255     protected Document createDocument() {
256         return getDocumentFactory().createDocument();
257     }
258 
259     protected void clearNamespaceStack() {
260         namespaceStack.clear();
261 
262         if (!namespaceStack.contains(Namespace.XML_NAMESPACE)) {
263             namespaceStack.push(Namespace.XML_NAMESPACE);
264         }
265     }
266 
267     private String getPrefix(String xmlnsDecl) {
268         int index = xmlnsDecl.indexOf(':', 5);
269 
270         if (index != -1) {
271             return xmlnsDecl.substring(index + 1);
272         } else {
273             return "";
274         }
275     }
276 }
277 
278 /*
279  * Redistribution and use of this software and associated documentation
280  * ("Software"), with or without modification, are permitted provided that the
281  * following conditions are met:
282  * 
283  * 1. Redistributions of source code must retain copyright statements and
284  * notices. Redistributions must also contain a copy of this document.
285  * 
286  * 2. Redistributions in binary form must reproduce the above copyright notice,
287  * this list of conditions and the following disclaimer in the documentation
288  * and/or other materials provided with the distribution.
289  * 
290  * 3. The name "DOM4J" must not be used to endorse or promote products derived
291  * from this Software without prior written permission of MetaStuff, Ltd. For
292  * written permission, please contact dom4j-info@metastuff.com.
293  * 
294  * 4. Products derived from this Software may not be called "DOM4J" nor may
295  * "DOM4J" appear in their names without prior written permission of MetaStuff,
296  * Ltd. DOM4J is a registered trademark of MetaStuff, Ltd.
297  * 
298  * 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
299  * 
300  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND
301  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
302  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
303  * ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE
304  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
305  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
306  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
307  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
308  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
309  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
310  * POSSIBILITY OF SUCH DAMAGE.
311  * 
312  * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
313  */