1 /*
2 * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
3 *
4 * This software is open source.
5 * See the bottom of this file for the licence.
6 */
7
8 package org.dom4j.io;
9
10 import java.io.File;
11 import java.io.FileInputStream;
12 import java.io.FileNotFoundException;
13 import java.io.InputStream;
14 import java.io.Reader;
15 import java.io.Serializable;
16 import java.net.URL;
17
18 import org.dom4j.Document;
19 import org.dom4j.DocumentException;
20 import org.dom4j.DocumentFactory;
21 import org.dom4j.ElementHandler;
22
23 import org.xml.sax.EntityResolver;
24 import org.xml.sax.ErrorHandler;
25 import org.xml.sax.InputSource;
26 import org.xml.sax.SAXException;
27 import org.xml.sax.SAXParseException;
28 import org.xml.sax.XMLFilter;
29 import org.xml.sax.XMLReader;
30 import org.xml.sax.helpers.DefaultHandler;
31 import org.xml.sax.helpers.XMLReaderFactory;
32
33 /**
34 * <p>
35 * <code>SAXReader</code> creates a DOM4J tree from SAX parsing events.
36 * </p>
37 *
38 * <p>
39 * The actual SAX parser that is used by this class is configurable so you can
40 * use your favourite SAX parser if you wish. DOM4J comes configured with its
41 * own SAX parser so you do not need to worry about configuring the SAX parser.
42 * </p>
43 *
44 * <p>
45 * To explicitly configure the SAX parser that is used via Java code you can use
46 * a constructor or use the {@link #setXMLReader(XMLReader)}or {@link
47 * #setXMLReaderClassName(String)} methods.
48 * </p>
49 *
50 * <p>
51 * If the parser is not specified explicitly then the standard SAX policy of
52 * using the <code>org.xml.sax.driver</code> system property is used to
53 * determine the implementation class of {@link XMLReader}.
54 * </p>
55 *
56 * <p>
57 * If the <code>org.xml.sax.driver</code> system property is not defined then
58 * JAXP is used via reflection (so that DOM4J is not explicitly dependent on the
59 * JAXP classes) to load the JAXP configured SAXParser. If there is any error
60 * creating a JAXP SAXParser an informational message is output and then the
61 * default (Aelfred) SAX parser is used instead.
62 * </p>
63 *
64 * <p>
65 * If you are trying to use JAXP to explicitly set your SAX parser and are
66 * experiencing problems, you can turn on verbose error reporting by defining
67 * the system property <code>org.dom4j.verbose</code> to be "true" which will
68 * output a more detailed description of why JAXP could not find a SAX parser
69 * </p>
70 *
71 * <p>
72 * For more information on JAXP please go to <a
73 * href="http://java.sun.com/xml/">Sun's Java & XML site </a>
74 * </p>
75 *
76 * @author <a href="mailto:james.strachan@metastuff.com">James Strachan </a>
77 * @version $Revision: 1.58 $
78 */
79 public class SAXReader {
80 private static final String SAX_STRING_INTERNING =
81 "http://xml.org/sax/features/string-interning";
82 private static final String SAX_NAMESPACE_PREFIXES =
83 "http://xml.org/sax/features/namespace-prefixes";
84 private static final String SAX_NAMESPACES =
85 "http://xml.org/sax/features/namespaces";
86 private static final String SAX_DECL_HANDLER =
87 "http://xml.org/sax/properties/declaration-handler";
88 private static final String SAX_LEXICAL_HANDLER =
89 "http://xml.org/sax/properties/lexical-handler";
90 private static final String SAX_LEXICALHANDLER =
91 "http://xml.org/sax/handlers/LexicalHandler";
92
93 /** <code>DocumentFactory</code> used to create new document objects */
94 private DocumentFactory factory;
95
96 /** <code>XMLReader</code> used to parse the SAX events */
97 private XMLReader xmlReader;
98
99 /** Whether validation should occur */
100 private boolean validating;
101
102 /** DispatchHandler to call when each <code>Element</code> is encountered */
103 private DispatchHandler dispatchHandler;
104
105 /** ErrorHandler class to use */
106 private ErrorHandler errorHandler;
107
108 /** The entity resolver */
109 private EntityResolver entityResolver;
110
111 /** Should element & attribute names and namespace URIs be interned? */
112 private boolean stringInternEnabled = true;
113
114 /** Should internal DTD declarations be expanded into a List in the DTD */
115 private boolean includeInternalDTDDeclarations = false;
116
117 /** Should external DTD declarations be expanded into a List in the DTD */
118 private boolean includeExternalDTDDeclarations = false;
119
120 /** Whether adjacent text nodes should be merged */
121 private boolean mergeAdjacentText = false;
122
123 /** Holds value of property stripWhitespaceText. */
124 private boolean stripWhitespaceText = false;
125
126 /** Should we ignore comments */
127 private boolean ignoreComments = false;
128
129 /** Encoding of InputSource - null means system default encoding */
130 private String encoding = null;
131
132 // private boolean includeExternalGeneralEntities = false;
133 // private boolean includeExternalParameterEntities = false;
134
135 /** The SAX filter used to filter SAX events */
136 private XMLFilter xmlFilter;
137
138 public SAXReader() {
139 }
140
141 public SAXReader(boolean validating) {
142 this.validating = validating;
143 }
144
145 public SAXReader(DocumentFactory factory) {
146 this.factory = factory;
147 }
148
149 public SAXReader(DocumentFactory factory, boolean validating) {
150 this.factory = factory;
151 this.validating = validating;
152 }
153
154 public SAXReader(XMLReader xmlReader) {
155 this.xmlReader = xmlReader;
156 }
157
158 public SAXReader(XMLReader xmlReader, boolean validating) {
159 this.xmlReader = xmlReader;
160 this.validating = validating;
161 }
162
163 public SAXReader(String xmlReaderClassName) throws SAXException {
164 if (xmlReaderClassName != null) {
165 this.xmlReader = XMLReaderFactory
166 .createXMLReader(xmlReaderClassName);
167 }
168 }
169
170 public SAXReader(String xmlReaderClassName, boolean validating)
171 throws SAXException {
172 if (xmlReaderClassName != null) {
173 this.xmlReader = XMLReaderFactory
174 .createXMLReader(xmlReaderClassName);
175 }
176
177 this.validating = validating;
178 }
179
180 /**
181 * Allows a SAX property to be set on the underlying SAX parser. This can be
182 * useful to set parser-specific properties such as the location of schema
183 * or DTD resources. Though use this method with caution as it has the
184 * possibility of breaking the standard behaviour. An alternative to calling
185 * this method is to correctly configure an XMLReader object instance and
186 * call the {@link #setXMLReader(XMLReader)}method
187 *
188 * @param name
189 * is the SAX property name
190 * @param value
191 * is the value of the SAX property
192 *
193 * @throws SAXException
194 * if the XMLReader could not be created or the property could
195 * not be changed.
196 */
197 public void setProperty(String name, Object value) throws SAXException {
198 getXMLReader().setProperty(name, value);
199 }
200
201 /**
202 * Sets a SAX feature on the underlying SAX parser. This can be useful to
203 * set parser-specific features. Though use this method with caution as it
204 * has the possibility of breaking the standard behaviour. An alternative to
205 * calling this method is to correctly configure an XMLReader object
206 * instance and call the {@link #setXMLReader(XMLReader)}method
207 *
208 * @param name
209 * is the SAX feature name
210 * @param value
211 * is the value of the SAX feature
212 *
213 * @throws SAXException
214 * if the XMLReader could not be created or the feature could
215 * not be changed.
216 */
217 public void setFeature(String name, boolean value) throws SAXException {
218 getXMLReader().setFeature(name, value);
219 }
220
221 /**
222 * <p>
223 * Reads a Document from the given <code>File</code>
224 * </p>
225 *
226 * @param file
227 * is the <code>File</code> to read from.
228 *
229 * @return the newly created Document instance
230 *
231 * @throws DocumentException
232 * if an error occurs during parsing.
233 */
234 public Document read(File file) throws DocumentException {
235 try {
236 /*
237 * We cannot convert the file to an URL because if the filename
238 * contains '#' characters, there will be problems with the URL in
239 * the InputSource (because a URL like
240 * http://myhost.com/index#anchor is treated the same as
241 * http://myhost.com/index) Thanks to Christian Oetterli
242 */
243 InputSource source = new InputSource(new FileInputStream(file));
244 if (this.encoding != null) {
245 source.setEncoding(this.encoding);
246 }
247 String path = file.getAbsolutePath();
248
249 if (path != null) {
250 // Code taken from Ant FileUtils
251 StringBuffer sb = new StringBuffer("file://");
252
253 // add an extra slash for filesystems with drive-specifiers
254 if (!path.startsWith(File.separator)) {
255 sb.append("/");
256 }
257
258 path = path.replace('\\', '/');
259 sb.append(path);
260
261 source.setSystemId(sb.toString());
262 }
263
264 return read(source);
265 } catch (FileNotFoundException e) {
266 throw new DocumentException(e.getMessage(), e);
267 }
268 }
269
270 /**
271 * <p>
272 * Reads a Document from the given <code>URL</code> using SAX
273 * </p>
274 *
275 * @param url
276 * <code>URL</code> to read from.
277 *
278 * @return the newly created Document instance
279 *
280 * @throws DocumentException
281 * if an error occurs during parsing.
282 */
283 public Document read(URL url) throws DocumentException {
284 String systemID = url.toExternalForm();
285
286 InputSource source = new InputSource(systemID);
287 if (this.encoding != null) {
288 source.setEncoding(this.encoding);
289 }
290
291 return read(source);
292 }
293
294 /**
295 * <p>
296 * Reads a Document from the given URL or filename using SAX.
297 * </p>
298 *
299 * <p>
300 * If the systemId contains a <code>':'</code> character then it is
301 * assumed to be a URL otherwise its assumed to be a file name. If you want
302 * finer grained control over this mechansim then please explicitly pass in
303 * either a {@link URL}or a {@link File}instance instead of a {@link
304 * String} to denote the source of the document.
305 * </p>
306 *
307 * @param systemId
308 * is a URL for a document or a file name.
309 *
310 * @return the newly created Document instance
311 *
312 * @throws DocumentException
313 * if an error occurs during parsing.
314 */
315 public Document read(String systemId) throws DocumentException {
316 InputSource source = new InputSource(systemId);
317 if (this.encoding != null) {
318 source.setEncoding(this.encoding);
319 }
320
321 return read(source);
322 }
323
324 /**
325 * <p>
326 * Reads a Document from the given stream using SAX
327 * </p>
328 *
329 * @param in
330 * <code>InputStream</code> to read from.
331 *
332 * @return the newly created Document instance
333 *
334 * @throws DocumentException
335 * if an error occurs during parsing.
336 */
337 public Document read(InputStream in) throws DocumentException {
338 InputSource source = new InputSource(in);
339 if (this.encoding != null) {
340 source.setEncoding(this.encoding);
341 }
342
343 return read(source);
344 }
345
346 /**
347 * <p>
348 * Reads a Document from the given <code>Reader</code> using SAX
349 * </p>
350 *
351 * @param reader
352 * is the reader for the input
353 *
354 * @return the newly created Document instance
355 *
356 * @throws DocumentException
357 * if an error occurs during parsing.
358 */
359 public Document read(Reader reader) throws DocumentException {
360 InputSource source = new InputSource(reader);
361 if (this.encoding != null) {
362 source.setEncoding(this.encoding);
363 }
364
365 return read(source);
366 }
367
368 /**
369 * <p>
370 * Reads a Document from the given stream using SAX
371 * </p>
372 *
373 * @param in
374 * <code>InputStream</code> to read from.
375 * @param systemId
376 * is the URI for the input
377 *
378 * @return the newly created Document instance
379 *
380 * @throws DocumentException
381 * if an error occurs during parsing.
382 */
383 public Document read(InputStream in, String systemId)
384 throws DocumentException {
385 InputSource source = new InputSource(in);
386 source.setSystemId(systemId);
387 if (this.encoding != null) {
388 source.setEncoding(this.encoding);
389 }
390
391 return read(source);
392 }
393
394 /**
395 * <p>
396 * Reads a Document from the given <code>Reader</code> using SAX
397 * </p>
398 *
399 * @param reader
400 * is the reader for the input
401 * @param systemId
402 * is the URI for the input
403 *
404 * @return the newly created Document instance
405 *
406 * @throws DocumentException
407 * if an error occurs during parsing.
408 */
409 public Document read(Reader reader, String systemId)
410 throws DocumentException {
411 InputSource source = new InputSource(reader);
412 source.setSystemId(systemId);
413 if (this.encoding != null) {
414 source.setEncoding(this.encoding);
415 }
416
417 return read(source);
418 }
419
420 /**
421 * <p>
422 * Reads a Document from the given <code>InputSource</code> using SAX
423 * </p>
424 *
425 * @param in
426 * <code>InputSource</code> to read from.
427 *
428 * @return the newly created Document instance
429 *
430 * @throws DocumentException
431 * if an error occurs during parsing.
432 */
433 public Document read(InputSource in) throws DocumentException {
434 try {
435 XMLReader reader = getXMLReader();
436
437 reader = installXMLFilter(reader);
438
439 EntityResolver thatEntityResolver = this.entityResolver;
440
441 if (thatEntityResolver == null) {
442 thatEntityResolver = createDefaultEntityResolver(in
443 .getSystemId());
444 this.entityResolver = thatEntityResolver;
445 }
446
447 reader.setEntityResolver(thatEntityResolver);
448
449 SAXContentHandler contentHandler = createContentHandler(reader);
450 contentHandler.setEntityResolver(thatEntityResolver);
451 contentHandler.setInputSource(in);
452
453 boolean internal = isIncludeInternalDTDDeclarations();
454 boolean external = isIncludeExternalDTDDeclarations();
455
456 contentHandler.setIncludeInternalDTDDeclarations(internal);
457 contentHandler.setIncludeExternalDTDDeclarations(external);
458 contentHandler.setMergeAdjacentText(isMergeAdjacentText());
459 contentHandler.setStripWhitespaceText(isStripWhitespaceText());
460 contentHandler.setIgnoreComments(isIgnoreComments());
461 reader.setContentHandler(contentHandler);
462
463 configureReader(reader, contentHandler);
464
465 reader.parse(in);
466
467 return contentHandler.getDocument();
468 } catch (Exception e) {
469 if (e instanceof SAXParseException) {
470 // e.printStackTrace();
471 SAXParseException parseException = (SAXParseException) e;
472 String systemId = parseException.getSystemId();
473
474 if (systemId == null) {
475 systemId = "";
476 }
477
478 String message = "Error on line "
479 + parseException.getLineNumber() + " of document "
480 + systemId + " : " + parseException.getMessage();
481
482 throw new DocumentException(message, e);
483 } else {
484 throw new DocumentException(e.getMessage(), e);
485 }
486 }
487 }
488
489 // Properties
490 // -------------------------------------------------------------------------
491
492 /**
493 * DOCUMENT ME!
494 *
495 * @return the validation mode, true if validating will be done otherwise
496 * false.
497 */
498 public boolean isValidating() {
499 return validating;
500 }
501
502 /**
503 * Sets the validation mode.
504 *
505 * @param validation
506 * indicates whether or not validation should occur.
507 */
508 public void setValidation(boolean validation) {
509 this.validating = validation;
510 }
511
512 /**
513 * DOCUMENT ME!
514 *
515 * @return whether internal DTD declarations should be expanded into the
516 * DocumentType object or not.
517 */
518 public boolean isIncludeInternalDTDDeclarations() {
519 return includeInternalDTDDeclarations;
520 }
521
522 /**
523 * Sets whether internal DTD declarations should be expanded into the
524 * DocumentType object or not.
525 *
526 * @param include
527 * whether or not DTD declarations should be expanded and
528 * included into the DocumentType object.
529 */
530 public void setIncludeInternalDTDDeclarations(boolean include) {
531 this.includeInternalDTDDeclarations = include;
532 }
533
534 /**
535 * DOCUMENT ME!
536 *
537 * @return whether external DTD declarations should be expanded into the
538 * DocumentType object or not.
539 */
540 public boolean isIncludeExternalDTDDeclarations() {
541 return includeExternalDTDDeclarations;
542 }
543
544 /**
545 * Sets whether DTD external declarations should be expanded into the
546 * DocumentType object or not.
547 *
548 * @param include
549 * whether or not DTD declarations should be expanded and
550 * included into the DocumentType object.
551 */
552 public void setIncludeExternalDTDDeclarations(boolean include) {
553 this.includeExternalDTDDeclarations = include;
554 }
555
556 /**
557 * Sets whether String interning is enabled or disabled for element &
558 * attribute names and namespace URIs. This proprety is enabled by default.
559 *
560 * @return DOCUMENT ME!
561 */
562 public boolean isStringInternEnabled() {
563 return stringInternEnabled;
564 }
565
566 /**
567 * Sets whether String interning is enabled or disabled for element &
568 * attribute names and namespace URIs
569 *
570 * @param stringInternEnabled
571 * DOCUMENT ME!
572 */
573 public void setStringInternEnabled(boolean stringInternEnabled) {
574 this.stringInternEnabled = stringInternEnabled;
575 }
576
577 /**
578 * Returns whether adjacent text nodes should be merged together.
579 *
580 * @return Value of property mergeAdjacentText.
581 */
582 public boolean isMergeAdjacentText() {
583 return mergeAdjacentText;
584 }
585
586 /**
587 * Sets whether or not adjacent text nodes should be merged together when
588 * parsing.
589 *
590 * @param mergeAdjacentText
591 * New value of property mergeAdjacentText.
592 */
593 public void setMergeAdjacentText(boolean mergeAdjacentText) {
594 this.mergeAdjacentText = mergeAdjacentText;
595 }
596
597 /**
598 * Sets whether whitespace between element start and end tags should be
599 * ignored
600 *
601 * @return Value of property stripWhitespaceText.
602 */
603 public boolean isStripWhitespaceText() {
604 return stripWhitespaceText;
605 }
606
607 /**
608 * Sets whether whitespace between element start and end tags should be
609 * ignored.
610 *
611 * @param stripWhitespaceText
612 * New value of property stripWhitespaceText.
613 */
614 public void setStripWhitespaceText(boolean stripWhitespaceText) {
615 this.stripWhitespaceText = stripWhitespaceText;
616 }
617
618 /**
619 * Returns whether we should ignore comments or not.
620 *
621 * @return boolean
622 */
623 public boolean isIgnoreComments() {
624 return ignoreComments;
625 }
626
627 /**
628 * Sets whether we should ignore comments or not.
629 *
630 * @param ignoreComments
631 * whether we should ignore comments or not.
632 */
633 public void setIgnoreComments(boolean ignoreComments) {
634 this.ignoreComments = ignoreComments;
635 }
636
637 /**
638 * DOCUMENT ME!
639 *
640 * @return the <code>DocumentFactory</code> used to create document
641 * objects
642 */
643 public DocumentFactory getDocumentFactory() {
644 if (factory == null) {
645 factory = DocumentFactory.getInstance();
646 }
647
648 return factory;
649 }
650
651 /**
652 * <p>
653 * This sets the <code>DocumentFactory</code> used to create new
654 * documents. This method allows the building of custom DOM4J tree objects
655 * to be implemented easily using a custom derivation of
656 * {@link DocumentFactory}
657 * </p>
658 *
659 * @param documentFactory
660 * <code>DocumentFactory</code> used to create DOM4J objects
661 */
662 public void setDocumentFactory(DocumentFactory documentFactory) {
663 this.factory = documentFactory;
664 }
665
666 /**
667 * DOCUMENT ME!
668 *
669 * @return the <code>ErrorHandler</code> used by SAX
670 */
671 public ErrorHandler getErrorHandler() {
672 return errorHandler;
673 }
674
675 /**
676 * Sets the <code>ErrorHandler</code> used by the SAX
677 * <code>XMLReader</code>.
678 *
679 * @param errorHandler
680 * is the <code>ErrorHandler</code> used by SAX
681 */
682 public void setErrorHandler(ErrorHandler errorHandler) {
683 this.errorHandler = errorHandler;
684 }
685
686 /**
687 * Returns the current entity resolver used to resolve entities
688 *
689 * @return DOCUMENT ME!
690 */
691 public EntityResolver getEntityResolver() {
692 return entityResolver;
693 }
694
695 /**
696 * Sets the entity resolver used to resolve entities.
697 *
698 * @param entityResolver
699 * DOCUMENT ME!
700 */
701 public void setEntityResolver(EntityResolver entityResolver) {
702 this.entityResolver = entityResolver;
703 }
704
705 /**
706 * DOCUMENT ME!
707 *
708 * @return the <code>XMLReader</code> used to parse SAX events
709 *
710 * @throws SAXException
711 * DOCUMENT ME!
712 */
713 public XMLReader getXMLReader() throws SAXException {
714 if (xmlReader == null) {
715 xmlReader = createXMLReader();
716 }
717
718 return xmlReader;
719 }
720
721 /**
722 * Sets the <code>XMLReader</code> used to parse SAX events
723 *
724 * @param reader
725 * is the <code>XMLReader</code> to parse SAX events
726 */
727 public void setXMLReader(XMLReader reader) {
728 this.xmlReader = reader;
729 }
730
731 /**
732 * Returns encoding used for InputSource (null means system default
733 * encoding)
734 *
735 * @return encoding used for InputSource
736 *
737 */
738 public String getEncoding() {
739 return encoding;
740 }
741
742 /**
743 * Sets encoding used for InputSource (null means system default encoding)
744 *
745 * @param encoding
746 * is encoding used for InputSource
747 */
748 public void setEncoding(String encoding) {
749 this.encoding = encoding;
750 }
751
752 /**
753 * Sets the class name of the <code>XMLReader</code> to be used to parse
754 * SAX events.
755 *
756 * @param xmlReaderClassName
757 * is the class name of the <code>XMLReader</code> to parse SAX
758 * events
759 *
760 * @throws SAXException
761 * DOCUMENT ME!
762 */
763 public void setXMLReaderClassName(String xmlReaderClassName)
764 throws SAXException {
765 setXMLReader(XMLReaderFactory.createXMLReader(xmlReaderClassName));
766 }
767
768 /**
769 * Adds the <code>ElementHandler</code> to be called when the specified
770 * path is encounted.
771 *
772 * @param path
773 * is the path to be handled
774 * @param handler
775 * is the <code>ElementHandler</code> to be called by the event
776 * based processor.
777 */
778 public void addHandler(String path, ElementHandler handler) {
779 getDispatchHandler().addHandler(path, handler);
780 }
781
782 /**
783 * Removes the <code>ElementHandler</code> from the event based processor,
784 * for the specified path.
785 *
786 * @param path
787 * is the path to remove the <code>ElementHandler</code> for.
788 */
789 public void removeHandler(String path) {
790 getDispatchHandler().removeHandler(path);
791 }
792
793 /**
794 * When multiple <code>ElementHandler</code> instances have been
795 * registered, this will set a default <code>ElementHandler</code> to be
796 * called for any path which does <b>NOT </b> have a handler registered.
797 *
798 * @param handler
799 * is the <code>ElementHandler</code> to be called by the event
800 * based processor.
801 */
802 public void setDefaultHandler(ElementHandler handler) {
803 getDispatchHandler().setDefaultHandler(handler);
804 }
805
806 /**
807 * This method clears out all the existing handlers and default handler
808 * setting things back as if no handler existed. Useful when reusing an
809 * object instance.
810 */
811 public void resetHandlers() {
812 getDispatchHandler().resetHandlers();
813 }
814
815 /**
816 * Returns the SAX filter being used to filter SAX events.
817 *
818 * @return the SAX filter being used or null if no SAX filter is installed
819 */
820 public XMLFilter getXMLFilter() {
821 return xmlFilter;
822 }
823
824 /**
825 * Sets the SAX filter to be used when filtering SAX events
826 *
827 * @param filter
828 * is the SAX filter to use or null to disable filtering
829 */
830 public void setXMLFilter(XMLFilter filter) {
831 this.xmlFilter = filter;
832 }
833
834 // Implementation methods
835 // -------------------------------------------------------------------------
836
837 /**
838 * Installs any XMLFilter objects required to allow the SAX event stream to
839 * be filtered and preprocessed before it gets to dom4j.
840 *
841 * @param reader
842 * DOCUMENT ME!
843 *
844 * @return the new XMLFilter if applicable or the original XMLReader if no
845 * filter is being used.
846 */
847 protected XMLReader installXMLFilter(XMLReader reader) {
848 XMLFilter filter = getXMLFilter();
849
850 if (filter != null) {
851 // find the root XMLFilter
852 XMLFilter root = filter;
853
854 while (true) {
855 XMLReader parent = root.getParent();
856
857 if (parent instanceof XMLFilter) {
858 root = (XMLFilter) parent;
859 } else {
860 break;
861 }
862 }
863
864 root.setParent(reader);
865
866 return filter;
867 }
868
869 return reader;
870 }
871
872 protected DispatchHandler getDispatchHandler() {
873 if (dispatchHandler == null) {
874 dispatchHandler = new DispatchHandler();
875 }
876
877 return dispatchHandler;
878 }
879
880 protected void setDispatchHandler(DispatchHandler dispatchHandler) {
881 this.dispatchHandler = dispatchHandler;
882 }
883
884 /**
885 * Factory Method to allow alternate methods of creating and configuring
886 * XMLReader objects
887 *
888 * @return DOCUMENT ME!
889 *
890 * @throws SAXException
891 * DOCUMENT ME!
892 */
893 protected XMLReader createXMLReader() throws SAXException {
894 return SAXHelper.createXMLReader(isValidating());
895 }
896
897 /**
898 * Configures the XMLReader before use
899 *
900 * @param reader
901 * DOCUMENT ME!
902 * @param handler
903 * DOCUMENT ME!
904 *
905 * @throws DocumentException
906 * DOCUMENT ME!
907 */
908 protected void configureReader(XMLReader reader, DefaultHandler handler)
909 throws DocumentException {
910 // configure lexical handling
911 SAXHelper.setParserProperty(reader, SAX_LEXICALHANDLER, handler);
912
913 // try alternate property just in case
914 SAXHelper.setParserProperty(reader, SAX_LEXICAL_HANDLER, handler);
915
916 // register the DeclHandler
917 if (includeInternalDTDDeclarations || includeExternalDTDDeclarations) {
918 SAXHelper.setParserProperty(reader, SAX_DECL_HANDLER, handler);
919 }
920
921 // configure namespace support
922 SAXHelper.setParserFeature(reader, SAX_NAMESPACES, true);
923
924 SAXHelper.setParserFeature(reader, SAX_NAMESPACE_PREFIXES, false);
925
926 // string interning
927 SAXHelper.setParserFeature(reader, SAX_STRING_INTERNING,
928 isStringInternEnabled());
929
930 // external entites
931 /*
932 * SAXHelper.setParserFeature( reader,
933 * "http://xml.org/sax/properties/external-general-entities",
934 * includeExternalGeneralEntities ); SAXHelper.setParserFeature( reader,
935 * "http://xml.org/sax/properties/external-parameter-entities",
936 * includeExternalParameterEntities );
937 */
938 // use Locator2 if possible
939 SAXHelper.setParserFeature(reader,
940 "http://xml.org/sax/features/use-locator2", true);
941
942 try {
943 // configure validation support
944 reader.setFeature("http://xml.org/sax/features/validation",
945 isValidating());
946
947 if (errorHandler != null) {
948 reader.setErrorHandler(errorHandler);
949 } else {
950 reader.setErrorHandler(handler);
951 }
952 } catch (Exception e) {
953 if (isValidating()) {
954 throw new DocumentException("Validation not supported for"
955 + " XMLReader: " + reader, e);
956 }
957 }
958 }
959
960 /**
961 * Factory Method to allow user derived SAXContentHandler objects to be used
962 *
963 * @param reader
964 * DOCUMENT ME!
965 *
966 * @return DOCUMENT ME!
967 */
968 protected SAXContentHandler createContentHandler(XMLReader reader) {
969 return new SAXContentHandler(getDocumentFactory(), dispatchHandler);
970 }
971
972 protected EntityResolver createDefaultEntityResolver(String systemId) {
973 String prefix = null;
974
975 if ((systemId != null) && (systemId.length() > 0)) {
976 int idx = systemId.lastIndexOf('/');
977
978 if (idx > 0) {
979 prefix = systemId.substring(0, idx + 1);
980 }
981 }
982
983 return new SAXEntityResolver(prefix);
984 }
985
986 protected static class SAXEntityResolver implements EntityResolver,
987 Serializable {
988 protected String uriPrefix;
989
990 public SAXEntityResolver(String uriPrefix) {
991 this.uriPrefix = uriPrefix;
992 }
993
994 public InputSource resolveEntity(String publicId, String systemId) {
995 // try create a relative URI reader...
996 if ((systemId != null) && (systemId.length() > 0)) {
997 if ((uriPrefix != null) && (systemId.indexOf(':') <= 0)) {
998 systemId = uriPrefix + systemId;
999 }
1000 }
1001
1002 return new InputSource(systemId);
1003 }
1004 }
1005 }
1006
1007 /*
1008 * Redistribution and use of this software and associated documentation
1009 * ("Software"), with or without modification, are permitted provided that the
1010 * following conditions are met:
1011 *
1012 * 1. Redistributions of source code must retain copyright statements and
1013 * notices. Redistributions must also contain a copy of this document.
1014 *
1015 * 2. Redistributions in binary form must reproduce the above copyright notice,
1016 * this list of conditions and the following disclaimer in the documentation
1017 * and/or other materials provided with the distribution.
1018 *
1019 * 3. The name "DOM4J" must not be used to endorse or promote products derived
1020 * from this Software without prior written permission of MetaStuff, Ltd. For
1021 * written permission, please contact dom4j-info@metastuff.com.
1022 *
1023 * 4. Products derived from this Software may not be called "DOM4J" nor may
1024 * "DOM4J" appear in their names without prior written permission of MetaStuff,
1025 * Ltd. DOM4J is a registered trademark of MetaStuff, Ltd.
1026 *
1027 * 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
1028 *
1029 * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND
1030 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1031 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1032 * ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE
1033 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1034 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1035 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1036 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1037 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1038 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1039 * POSSIBILITY OF SUCH DAMAGE.
1040 *
1041 * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
1042 */