3 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
8 * XML Utilities package
10 * PHP versions 4 and 5
14 * Copyright (c) 2003-2008 Stephan Schmidt <schst@php.net>
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
21 * * Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * * Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * * The name of the author may not be used to endorse or promote products
27 * derived from this software without specific prior written permission.
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
30 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
31 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
33 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
34 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
35 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
36 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
37 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
38 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
39 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * @author Stephan Schmidt <schst@php.net>
44 * @copyright 2003-2008 Stephan Schmidt <schst@php.net>
45 * @license http://opensource.org/licenses/bsd-license New BSD License
46 * @version CVS: $Id: Util.php,v 1.38 2008/11/13 00:03:38 ashnazg Exp $
47 * @link http://pear.php.net/package/XML_Util
51 * error code for invalid chars in XML name
53 define('XML_UTIL_ERROR_INVALID_CHARS', 51);
56 * error code for invalid chars in XML name
58 define('XML_UTIL_ERROR_INVALID_START', 52);
61 * error code for non-scalar tag content
63 define('XML_UTIL_ERROR_NON_SCALAR_CONTENT', 60);
66 * error code for missing tag name
68 define('XML_UTIL_ERROR_NO_TAG_NAME', 61);
71 * replace XML entities
73 define('XML_UTIL_REPLACE_ENTITIES', 1);
76 * embedd content in a CData Section
78 define('XML_UTIL_CDATA_SECTION', 5);
81 * do not replace entitites
83 define('XML_UTIL_ENTITIES_NONE', 0);
86 * replace all XML entitites
87 * This setting will replace <, >, ", ' and &
89 define('XML_UTIL_ENTITIES_XML', 1);
92 * replace only required XML entitites
93 * This setting will replace <, " and &
95 define('XML_UTIL_ENTITIES_XML_REQUIRED', 2);
98 * replace HTML entitites
99 * @link http://www.php.net/htmlentities
101 define('XML_UTIL_ENTITIES_HTML', 3);
104 * Collapse all empty tags.
106 define('XML_UTIL_COLLAPSE_ALL', 1);
109 * Collapse only empty XHTML tags that have no end tag.
111 define('XML_UTIL_COLLAPSE_XHTML_ONLY', 2);
114 * utility class for working with XML documents
119 * @author Stephan Schmidt <schst@php.net>
120 * @copyright 2003-2008 Stephan Schmidt <schst@php.net>
121 * @license http://opensource.org/licenses/bsd-license New BSD License
122 * @version Release: 1.2.1
123 * @link http://pear.php.net/package/XML_Util
130 * @return string $version API version
134 function apiVersion()
140 * replace XML entities
142 * With the optional second parameter, you may select, which
143 * entities should be replaced.
146 * require_once 'XML/Util.php';
148 * // replace XML entites:
149 * $string = XML_Util::replaceEntities('This string contains < & >.');
152 * With the optional third parameter, you may pass the character encoding
154 * require_once 'XML/Util.php';
156 * // replace XML entites in UTF-8:
157 * $string = XML_Util::replaceEntities(
158 * 'This string contains < & > as well as ä, ö, ß, à and ê',
159 * XML_UTIL_ENTITIES_HTML,
164 * @param string $string string where XML special chars
166 * @param int $replaceEntities setting for entities in attribute values
167 * (one of XML_UTIL_ENTITIES_XML,
168 * XML_UTIL_ENTITIES_XML_REQUIRED,
169 * XML_UTIL_ENTITIES_HTML)
170 * @param string $encoding encoding value (if any)...
171 * must be a valid encoding as determined
172 * by the htmlentities() function
174 * @return string string with replaced chars
177 * @see reverseEntities()
179 function replaceEntities($string, $replaceEntities = XML_UTIL_ENTITIES_XML
,
180 $encoding = 'ISO-8859-1')
182 switch ($replaceEntities) {
183 case XML_UTIL_ENTITIES_XML
:
184 return strtr($string, array(
191 case XML_UTIL_ENTITIES_XML_REQUIRED
:
192 return strtr($string, array(
197 case XML_UTIL_ENTITIES_HTML
:
198 return htmlentities($string, ENT_COMPAT
, $encoding);
205 * reverse XML entities
207 * With the optional second parameter, you may select, which
208 * entities should be reversed.
211 * require_once 'XML/Util.php';
213 * // reverse XML entites:
214 * $string = XML_Util::reverseEntities('This string contains < & >.');
217 * With the optional third parameter, you may pass the character encoding
219 * require_once 'XML/Util.php';
221 * // reverse XML entites in UTF-8:
222 * $string = XML_Util::reverseEntities(
223 * 'This string contains < & > as well as'
224 * . ' ä, ö, ß, à and ê',
225 * XML_UTIL_ENTITIES_HTML,
230 * @param string $string string where XML special chars
232 * @param int $replaceEntities setting for entities in attribute values
233 * (one of XML_UTIL_ENTITIES_XML,
234 * XML_UTIL_ENTITIES_XML_REQUIRED,
235 * XML_UTIL_ENTITIES_HTML)
236 * @param string $encoding encoding value (if any)...
237 * must be a valid encoding as determined
238 * by the html_entity_decode() function
240 * @return string string with replaced chars
243 * @see replaceEntities()
245 function reverseEntities($string, $replaceEntities = XML_UTIL_ENTITIES_XML
,
246 $encoding = 'ISO-8859-1')
248 switch ($replaceEntities) {
249 case XML_UTIL_ENTITIES_XML
:
250 return strtr($string, array(
257 case XML_UTIL_ENTITIES_XML_REQUIRED
:
258 return strtr($string, array(
263 case XML_UTIL_ENTITIES_HTML
:
264 return html_entity_decode($string, ENT_COMPAT
, $encoding);
271 * build an xml declaration
274 * require_once 'XML/Util.php';
276 * // get an XML declaration:
277 * $xmlDecl = XML_Util::getXMLDeclaration('1.0', 'UTF-8', true);
280 * @param string $version xml version
281 * @param string $encoding character encoding
282 * @param bool $standalone document is standalone (or not)
284 * @return string xml declaration
287 * @uses attributesToString() to serialize the attributes of the XML declaration
289 function getXMLDeclaration($version = '1.0', $encoding = null,
293 'version' => $version,
296 if ($encoding !== null) {
297 $attributes['encoding'] = $encoding;
299 // add standalone, if specified
300 if ($standalone !== null) {
301 $attributes['standalone'] = $standalone ?
'yes' : 'no';
304 return sprintf('<?xml%s?>',
305 XML_Util
::attributesToString($attributes, false));
309 * build a document type declaration
312 * require_once 'XML/Util.php';
314 * // get a doctype declaration:
315 * $xmlDecl = XML_Util::getDocTypeDeclaration('rootTag','myDocType.dtd');
318 * @param string $root name of the root tag
319 * @param string $uri uri of the doctype definition
320 * (or array with uri and public id)
321 * @param string $internalDtd internal dtd entries
323 * @return string doctype declaration
328 function getDocTypeDeclaration($root, $uri = null, $internalDtd = null)
330 if (is_array($uri)) {
331 $ref = sprintf(' PUBLIC "%s" "%s"', $uri['id'], $uri['uri']);
332 } elseif (!empty($uri)) {
333 $ref = sprintf(' SYSTEM "%s"', $uri);
338 if (empty($internalDtd)) {
339 return sprintf('<!DOCTYPE %s%s>', $root, $ref);
341 return sprintf("<!DOCTYPE %s%s [\n%s\n]>", $root, $ref, $internalDtd);
346 * create string representation of an attribute list
349 * require_once 'XML/Util.php';
351 * // build an attribute string
357 * $attList = XML_Util::attributesToString($att);
360 * @param array $attributes attribute array
361 * @param bool|array $sort sort attribute list alphabetically,
362 * may also be an assoc array containing
363 * the keys 'sort', 'multiline', 'indent',
364 * 'linebreak' and 'entities'
365 * @param bool $multiline use linebreaks, if more than
366 * one attribute is given
367 * @param string $indent string used for indentation of
368 * multiline attributes
369 * @param string $linebreak string used for linebreaks of
370 * multiline attributes
371 * @param int $entities setting for entities in attribute values
372 * (one of XML_UTIL_ENTITIES_NONE,
373 * XML_UTIL_ENTITIES_XML,
374 * XML_UTIL_ENTITIES_XML_REQUIRED,
375 * XML_UTIL_ENTITIES_HTML)
377 * @return string string representation of the attributes
380 * @uses replaceEntities() to replace XML entities in attribute values
381 * @todo allow sort also to be an options array
383 function attributesToString($attributes, $sort = true, $multiline = false,
384 $indent = ' ', $linebreak = "\n", $entities = XML_UTIL_ENTITIES_XML
)
387 * second parameter may be an array
389 if (is_array($sort)) {
390 if (isset($sort['multiline'])) {
391 $multiline = $sort['multiline'];
393 if (isset($sort['indent'])) {
394 $indent = $sort['indent'];
396 if (isset($sort['linebreak'])) {
397 $multiline = $sort['linebreak'];
399 if (isset($sort['entities'])) {
400 $entities = $sort['entities'];
402 if (isset($sort['sort'])) {
403 $sort = $sort['sort'];
409 if (is_array($attributes) && !empty($attributes)) {
413 if ( !$multiline ||
count($attributes) == 1) {
414 foreach ($attributes as $key => $value) {
415 if ($entities != XML_UTIL_ENTITIES_NONE
) {
416 if ($entities === XML_UTIL_CDATA_SECTION
) {
417 $entities = XML_UTIL_ENTITIES_XML
;
419 $value = XML_Util
::replaceEntities($value, $entities);
421 $string .= ' ' . $key . '="' . $value . '"';
425 foreach ($attributes as $key => $value) {
426 if ($entities != XML_UTIL_ENTITIES_NONE
) {
427 $value = XML_Util
::replaceEntities($value, $entities);
430 $string .= ' ' . $key . '="' . $value . '"';
433 $string .= $linebreak . $indent . $key . '="' . $value . '"';
442 * Collapses empty tags.
444 * @param string $xml XML
445 * @param int $mode Whether to collapse all empty tags (XML_UTIL_COLLAPSE_ALL)
446 * or only XHTML (XML_UTIL_COLLAPSE_XHTML_ONLY) ones.
451 * @todo PEAR CS - unable to avoid "space after open parens" error
454 function collapseEmptyTags($xml, $mode = XML_UTIL_COLLAPSE_ALL
)
456 if ($mode == XML_UTIL_COLLAPSE_XHTML_ONLY
) {
458 '/<(area|base(?:font)?|br|col|frame|hr|img|input|isindex|link|meta|'
459 . 'param)([^>]*)><\/\\1>/s',
463 return preg_replace('/<(\w+)([^>]*)><\/\\1>/s', '<\\1\\2 />', $xml);
470 * This method will call XML_Util::createTagFromArray(), which
474 * require_once 'XML/Util.php';
476 * // create an XML tag:
477 * $tag = XML_Util::createTag('myNs:myTag',
478 * array('foo' => 'bar'),
479 * 'This is inside the tag',
480 * 'http://www.w3c.org/myNs#');
483 * @param string $qname qualified tagname (including namespace)
484 * @param array $attributes array containg attributes
485 * @param mixed $content the content
486 * @param string $namespaceUri URI of the namespace
487 * @param int $replaceEntities whether to replace XML special chars in
488 * content, embedd it in a CData section
490 * @param bool $multiline whether to create a multiline tag where
491 * each attribute gets written to a single line
492 * @param string $indent string used to indent attributes
493 * (_auto indents attributes so they start
494 * at the same column)
495 * @param string $linebreak string used for linebreaks
496 * @param bool $sortAttributes Whether to sort the attributes or not
498 * @return string XML tag
501 * @see createTagFromArray()
502 * @uses createTagFromArray() to create the tag
504 function createTag($qname, $attributes = array(), $content = null,
505 $namespaceUri = null, $replaceEntities = XML_UTIL_REPLACE_ENTITIES
,
506 $multiline = false, $indent = '_auto', $linebreak = "\n",
507 $sortAttributes = true)
511 'attributes' => $attributes
515 if ($content !== null) {
516 $tag['content'] = $content;
520 if ($namespaceUri !== null) {
521 $tag['namespaceUri'] = $namespaceUri;
524 return XML_Util
::createTagFromArray($tag, $replaceEntities, $multiline,
525 $indent, $linebreak, $sortAttributes);
529 * create a tag from an array
530 * this method awaits an array in the following format
533 * // qualified name of the tag
536 * // namespace prefix (optional, if qname is specified or no namespace)
537 * 'namespace' => $namespace
539 * // local part of the tagname (optional, if qname is specified)
540 * 'localpart' => $localpart,
542 * // array containing all attributes (optional)
543 * 'attributes' => array(),
545 * // tag content (optional)
546 * 'content' => $content,
548 * // namespaceUri for the given namespace (optional)
549 * 'namespaceUri' => $namespaceUri
554 * require_once 'XML/Util.php';
557 * 'qname' => 'foo:bar',
558 * 'namespaceUri' => 'http://foo.com',
559 * 'attributes' => array('key' => 'value', 'argh' => 'fruit&vegetable'),
560 * 'content' => 'I\'m inside the tag',
562 * // creating a tag with qualified name and namespaceUri
563 * $string = XML_Util::createTagFromArray($tag);
566 * @param array $tag tag definition
567 * @param int $replaceEntities whether to replace XML special chars in
568 * content, embedd it in a CData section
570 * @param bool $multiline whether to create a multiline tag where each
571 * attribute gets written to a single line
572 * @param string $indent string used to indent attributes
573 * (_auto indents attributes so they start
574 * at the same column)
575 * @param string $linebreak string used for linebreaks
576 * @param bool $sortAttributes Whether to sort the attributes or not
578 * @return string XML tag
582 * @uses attributesToString() to serialize the attributes of the tag
583 * @uses splitQualifiedName() to get local part and namespace of a qualified name
584 * @uses createCDataSection()
587 function createTagFromArray($tag, $replaceEntities = XML_UTIL_REPLACE_ENTITIES
,
588 $multiline = false, $indent = '_auto', $linebreak = "\n",
589 $sortAttributes = true)
591 if (isset($tag['content']) && !is_scalar($tag['content'])) {
592 return XML_Util
::raiseError('Supplied non-scalar value as tag content',
593 XML_UTIL_ERROR_NON_SCALAR_CONTENT
);
596 if (!isset($tag['qname']) && !isset($tag['localPart'])) {
597 return XML_Util
::raiseError('You must either supply a qualified name '
598 . '(qname) or local tag name (localPart).',
599 XML_UTIL_ERROR_NO_TAG_NAME
);
602 // if no attributes hav been set, use empty attributes
603 if (!isset($tag['attributes']) ||
!is_array($tag['attributes'])) {
604 $tag['attributes'] = array();
607 if (isset($tag['namespaces'])) {
608 foreach ($tag['namespaces'] as $ns => $uri) {
609 $tag['attributes']['xmlns:' . $ns] = $uri;
613 if (!isset($tag['qname'])) {
614 // qualified name is not given
616 // check for namespace
617 if (isset($tag['namespace']) && !empty($tag['namespace'])) {
618 $tag['qname'] = $tag['namespace'] . ':' . $tag['localPart'];
620 $tag['qname'] = $tag['localPart'];
622 } elseif (isset($tag['namespaceUri']) && !isset($tag['namespace'])) {
623 // namespace URI is set, but no namespace
625 $parts = XML_Util
::splitQualifiedName($tag['qname']);
627 $tag['localPart'] = $parts['localPart'];
628 if (isset($parts['namespace'])) {
629 $tag['namespace'] = $parts['namespace'];
633 if (isset($tag['namespaceUri']) && !empty($tag['namespaceUri'])) {
634 // is a namespace given
635 if (isset($tag['namespace']) && !empty($tag['namespace'])) {
636 $tag['attributes']['xmlns:' . $tag['namespace']] =
637 $tag['namespaceUri'];
639 // define this Uri as the default namespace
640 $tag['attributes']['xmlns'] = $tag['namespaceUri'];
644 // check for multiline attributes
645 if ($multiline === true) {
646 if ($indent === '_auto') {
647 $indent = str_repeat(' ', (strlen($tag['qname'])+
2));
651 // create attribute list
652 $attList = XML_Util
::attributesToString($tag['attributes'],
653 $sortAttributes, $multiline, $indent, $linebreak, $replaceEntities);
654 if (!isset($tag['content']) ||
(string)$tag['content'] == '') {
655 $tag = sprintf('<%s%s />', $tag['qname'], $attList);
657 switch ($replaceEntities) {
658 case XML_UTIL_ENTITIES_NONE
:
660 case XML_UTIL_CDATA_SECTION
:
661 $tag['content'] = XML_Util
::createCDataSection($tag['content']);
664 $tag['content'] = XML_Util
::replaceEntities($tag['content'],
668 $tag = sprintf('<%s%s>%s</%s>', $tag['qname'], $attList, $tag['content'],
675 * create a start element
678 * require_once 'XML/Util.php';
680 * // create an XML start element:
681 * $tag = XML_Util::createStartElement('myNs:myTag',
682 * array('foo' => 'bar') ,'http://www.w3c.org/myNs#');
685 * @param string $qname qualified tagname (including namespace)
686 * @param array $attributes array containg attributes
687 * @param string $namespaceUri URI of the namespace
688 * @param bool $multiline whether to create a multiline tag where each
689 * attribute gets written to a single line
690 * @param string $indent string used to indent attributes (_auto indents
691 * attributes so they start at the same column)
692 * @param string $linebreak string used for linebreaks
693 * @param bool $sortAttributes Whether to sort the attributes or not
695 * @return string XML start element
698 * @see createEndElement(), createTag()
700 function createStartElement($qname, $attributes = array(), $namespaceUri = null,
701 $multiline = false, $indent = '_auto', $linebreak = "\n",
702 $sortAttributes = true)
704 // if no attributes hav been set, use empty attributes
705 if (!isset($attributes) ||
!is_array($attributes)) {
706 $attributes = array();
709 if ($namespaceUri != null) {
710 $parts = XML_Util
::splitQualifiedName($qname);
713 // check for multiline attributes
714 if ($multiline === true) {
715 if ($indent === '_auto') {
716 $indent = str_repeat(' ', (strlen($qname)+
2));
720 if ($namespaceUri != null) {
721 // is a namespace given
722 if (isset($parts['namespace']) && !empty($parts['namespace'])) {
723 $attributes['xmlns:' . $parts['namespace']] = $namespaceUri;
725 // define this Uri as the default namespace
726 $attributes['xmlns'] = $namespaceUri;
730 // create attribute list
731 $attList = XML_Util
::attributesToString($attributes, $sortAttributes,
732 $multiline, $indent, $linebreak);
733 $element = sprintf('<%s%s>', $qname, $attList);
738 * create an end element
741 * require_once 'XML/Util.php';
743 * // create an XML start element:
744 * $tag = XML_Util::createEndElement('myNs:myTag');
747 * @param string $qname qualified tagname (including namespace)
749 * @return string XML end element
752 * @see createStartElement(), createTag()
754 function createEndElement($qname)
756 $element = sprintf('</%s>', $qname);
761 * create an XML comment
764 * require_once 'XML/Util.php';
766 * // create an XML start element:
767 * $tag = XML_Util::createComment('I am a comment');
770 * @param string $content content of the comment
772 * @return string XML comment
776 function createComment($content)
778 $comment = sprintf('<!-- %s -->', $content);
783 * create a CData section
786 * require_once 'XML/Util.php';
788 * // create a CData section
789 * $tag = XML_Util::createCDataSection('I am content.');
792 * @param string $data data of the CData section
794 * @return string CData section with content
798 function createCDataSection($data)
800 return sprintf('<![CDATA[%s]]>',
801 preg_replace('/\]\]>/', ']]]]><![CDATA[>', strval($data)));
806 * split qualified name and return namespace and local part
809 * require_once 'XML/Util.php';
811 * // split qualified tag
812 * $parts = XML_Util::splitQualifiedName('xslt:stylesheet');
814 * the returned array will contain two elements:
817 * 'namespace' => 'xslt',
818 * 'localPart' => 'stylesheet'
822 * @param string $qname qualified tag name
823 * @param string $defaultNs default namespace (optional)
825 * @return array array containing namespace and local part
829 function splitQualifiedName($qname, $defaultNs = null)
831 if (strstr($qname, ':')) {
832 $tmp = explode(':', $qname);
834 'namespace' => $tmp[0],
835 'localPart' => $tmp[1]
839 'namespace' => $defaultNs,
840 'localPart' => $qname
845 * check, whether string is valid XML name
847 * <p>XML names are used for tagname, attribute names and various
848 * other, lesser known entities.</p>
849 * <p>An XML name may only consist of alphanumeric characters,
850 * dashes, undescores and periods, and has to start with a letter
851 * or an underscore.</p>
854 * require_once 'XML/Util.php';
857 * $result = XML_Util::isValidName('invalidTag?');
858 * if (is_a($result, 'PEAR_Error')) {
859 * print 'Invalid XML name: ' . $result->getMessage();
863 * @param string $string string that should be checked
865 * @return mixed true, if string is a valid XML name, PEAR error otherwise
868 * @todo support for other charsets
869 * @todo PEAR CS - unable to avoid 85-char limit on second preg_match
871 function isValidName($string)
873 // check for invalid chars
874 if (!preg_match('/^[[:alpha:]_]$/', $string{0})) {
875 return XML_Util
::raiseError('XML names may only start with letter '
876 . 'or underscore', XML_UTIL_ERROR_INVALID_START
);
879 // check for invalid chars
880 if (!preg_match('/^([[:alpha:]_]([[:alnum:]\-\.]*)?:)?[[:alpha:]_]([[:alnum:]\_\-\.]+)?$/',
883 return XML_Util
::raiseError('XML names may only contain alphanumeric '
884 . 'chars, period, hyphen, colon and underscores',
885 XML_UTIL_ERROR_INVALID_CHARS
);
892 * replacement for XML_Util::raiseError
894 * Avoids the necessity to always require
897 * @param string $msg error message
898 * @param int $code error code
903 * @todo PEAR CS - should this use include_once instead?
905 function raiseError($msg, $code)
907 require_once 'PEAR.php';
908 return PEAR
::raiseError($msg, $code);