* @license http://opensource.org/licenses/bsd-license New BSD License
* @version Release: 1.3.0
* @link http://pear.php.net/package/XML_Util
*/
class XML_Util
{
/**
* Return API version
*
* @return string $version API version
*/
public static function apiVersion()
{
return '1.1';
}
/**
* Replace XML entities
*
* With the optional second parameter, you may select, which
* entities should be replaced.
*
*
* require_once 'XML/Util.php';
*
* // replace XML entites:
* $string = XML_Util::replaceEntities('This string contains < & >.');
*
*
* With the optional third parameter, you may pass the character encoding
*
* require_once 'XML/Util.php';
*
* // replace XML entites in UTF-8:
* $string = XML_Util::replaceEntities(
* 'This string contains < & > as well as ä, ö, ß, à and ê',
* XML_UTIL_ENTITIES_HTML,
* 'UTF-8'
* );
*
*
* @param string $string string where XML special chars
* should be replaced
* @param int $replaceEntities setting for entities in attribute values
* (one of XML_UTIL_ENTITIES_XML,
* XML_UTIL_ENTITIES_XML_REQUIRED,
* XML_UTIL_ENTITIES_HTML)
* @param string $encoding encoding value (if any)...
* must be a valid encoding as determined
* by the htmlentities() function
*
* @return string string with replaced chars
* @see reverseEntities()
*/
public static function replaceEntities(
$string, $replaceEntities = XML_UTIL_ENTITIES_XML, $encoding = 'ISO-8859-1'
) {
switch ($replaceEntities) {
case XML_UTIL_ENTITIES_XML:
return strtr(
$string,
array(
'&' => '&',
'>' => '>',
'<' => '<',
'"' => '"',
'\'' => '''
)
);
break;
case XML_UTIL_ENTITIES_XML_REQUIRED:
return strtr(
$string,
array(
'&' => '&',
'<' => '<',
'"' => '"'
)
);
break;
case XML_UTIL_ENTITIES_HTML:
return htmlentities($string, ENT_COMPAT, $encoding);
break;
}
return $string;
}
/**
* Reverse XML entities
*
* With the optional second parameter, you may select, which
* entities should be reversed.
*
*
* require_once 'XML/Util.php';
*
* // reverse XML entites:
* $string = XML_Util::reverseEntities('This string contains < & >.');
*
*
* With the optional third parameter, you may pass the character encoding
*
* require_once 'XML/Util.php';
*
* // reverse XML entites in UTF-8:
* $string = XML_Util::reverseEntities(
* 'This string contains < & > as well as'
* . ' ä, ö, ß, à and ê',
* XML_UTIL_ENTITIES_HTML,
* 'UTF-8'
* );
*
*
* @param string $string string where XML special chars
* should be replaced
* @param int $replaceEntities setting for entities in attribute values
* (one of XML_UTIL_ENTITIES_XML,
* XML_UTIL_ENTITIES_XML_REQUIRED,
* XML_UTIL_ENTITIES_HTML)
* @param string $encoding encoding value (if any)...
* must be a valid encoding as determined
* by the html_entity_decode() function
*
* @return string string with replaced chars
* @see replaceEntities()
*/
public static function reverseEntities(
$string, $replaceEntities = XML_UTIL_ENTITIES_XML, $encoding = 'ISO-8859-1'
) {
switch ($replaceEntities) {
case XML_UTIL_ENTITIES_XML:
return strtr(
$string,
array(
'&' => '&',
'>' => '>',
'<' => '<',
'"' => '"',
''' => '\''
)
);
break;
case XML_UTIL_ENTITIES_XML_REQUIRED:
return strtr(
$string,
array(
'&' => '&',
'<' => '<',
'"' => '"'
)
);
break;
case XML_UTIL_ENTITIES_HTML:
return html_entity_decode($string, ENT_COMPAT, $encoding);
break;
}
return $string;
}
/**
* Build an xml declaration
*
*
* require_once 'XML/Util.php';
*
* // get an XML declaration:
* $xmlDecl = XML_Util::getXMLDeclaration('1.0', 'UTF-8', true);
*
*
* @param string $version xml version
* @param string $encoding character encoding
* @param bool $standalone document is standalone (or not)
*
* @return string xml declaration
* @uses attributesToString() to serialize the attributes of the
* XML declaration
*/
public static function getXMLDeclaration(
$version = '1.0', $encoding = null, $standalone = null
) {
$attributes = array(
'version' => $version,
);
// add encoding
if ($encoding !== null) {
$attributes['encoding'] = $encoding;
}
// add standalone, if specified
if ($standalone !== null) {
$attributes['standalone'] = $standalone ? 'yes' : 'no';
}
return sprintf(
'',
XML_Util::attributesToString($attributes, false)
);
}
/**
* Build a document type declaration
*
*
* require_once 'XML/Util.php';
*
* // get a doctype declaration:
* $xmlDecl = XML_Util::getDocTypeDeclaration('rootTag','myDocType.dtd');
*
*
* @param string $root name of the root tag
* @param string $uri uri of the doctype definition
* (or array with uri and public id)
* @param string $internalDtd internal dtd entries
*
* @return string doctype declaration
* @since 0.2
*/
public static function getDocTypeDeclaration(
$root, $uri = null, $internalDtd = null
) {
if (is_array($uri)) {
$ref = sprintf(' PUBLIC "%s" "%s"', $uri['id'], $uri['uri']);
} elseif (!empty($uri)) {
$ref = sprintf(' SYSTEM "%s"', $uri);
} else {
$ref = '';
}
if (empty($internalDtd)) {
return sprintf('', $root, $ref);
} else {
return sprintf("", $root, $ref, $internalDtd);
}
}
/**
* Create string representation of an attribute list
*
*
* require_once 'XML/Util.php';
*
* // build an attribute string
* $att = array(
* 'foo' => 'bar',
* 'argh' => 'tomato'
* );
*
* $attList = XML_Util::attributesToString($att);
*
*
* @param array $attributes attribute array
* @param bool|array $sort sort attribute list alphabetically,
* may also be an assoc array containing
* the keys 'sort', 'multiline', 'indent',
* 'linebreak' and 'entities'
* @param bool $multiline use linebreaks, if more than
* one attribute is given
* @param string $indent string used for indentation of
* multiline attributes
* @param string $linebreak string used for linebreaks of
* multiline attributes
* @param int $entities setting for entities in attribute values
* (one of XML_UTIL_ENTITIES_NONE,
* XML_UTIL_ENTITIES_XML,
* XML_UTIL_ENTITIES_XML_REQUIRED,
* XML_UTIL_ENTITIES_HTML)
*
* @return string string representation of the attributes
* @uses replaceEntities() to replace XML entities in attribute values
* @todo allow sort also to be an options array
*/
public static function attributesToString(
$attributes, $sort = true, $multiline = false,
$indent = ' ', $linebreak = "\n", $entities = XML_UTIL_ENTITIES_XML
) {
/*
* second parameter may be an array
*/
if (is_array($sort)) {
if (isset($sort['multiline'])) {
$multiline = $sort['multiline'];
}
if (isset($sort['indent'])) {
$indent = $sort['indent'];
}
if (isset($sort['linebreak'])) {
$multiline = $sort['linebreak'];
}
if (isset($sort['entities'])) {
$entities = $sort['entities'];
}
if (isset($sort['sort'])) {
$sort = $sort['sort'];
} else {
$sort = true;
}
}
$string = '';
if (is_array($attributes) && !empty($attributes)) {
if ($sort) {
ksort($attributes);
}
if (!$multiline || count($attributes) == 1) {
foreach ($attributes as $key => $value) {
if ($entities != XML_UTIL_ENTITIES_NONE) {
if ($entities === XML_UTIL_CDATA_SECTION) {
$entities = XML_UTIL_ENTITIES_XML;
}
$value = XML_Util::replaceEntities($value, $entities);
}
$string .= ' ' . $key . '="' . $value . '"';
}
} else {
$first = true;
foreach ($attributes as $key => $value) {
if ($entities != XML_UTIL_ENTITIES_NONE) {
$value = XML_Util::replaceEntities($value, $entities);
}
if ($first) {
$string .= ' ' . $key . '="' . $value . '"';
$first = false;
} else {
$string .= $linebreak . $indent . $key . '="' . $value . '"';
}
}
}
}
return $string;
}
/**
* Collapses empty tags.
*
* @param string $xml XML
* @param int $mode Whether to collapse all empty tags (XML_UTIL_COLLAPSE_ALL)
* or only XHTML (XML_UTIL_COLLAPSE_XHTML_ONLY) ones.
*
* @return string XML
*/
public static function collapseEmptyTags($xml, $mode = XML_UTIL_COLLAPSE_ALL)
{
if ($mode == XML_UTIL_COLLAPSE_XHTML_ONLY) {
return preg_replace(
'/<(area|base(?:font)?|br|col|frame|hr|img|input|isindex|link|meta|'
. 'param)([^>]*)><\/\\1>/s',
'<\\1\\2 />',
$xml
);
} else {
return preg_replace('/<(\w+)([^>]*)><\/\\1>/s', '<\\1\\2 />', $xml);
}
}
/**
* Create a tag
*
* This method will call XML_Util::createTagFromArray(), which
* is more flexible.
*
*
* require_once 'XML/Util.php';
*
* // create an XML tag:
* $tag = XML_Util::createTag('myNs:myTag',
* array('foo' => 'bar'),
* 'This is inside the tag',
* 'http://www.w3c.org/myNs#');
*
*
* @param string $qname qualified tagname (including namespace)
* @param array $attributes array containg attributes
* @param mixed $content the content
* @param string $namespaceUri URI of the namespace
* @param int $replaceEntities whether to replace XML special chars in
* content, embedd it in a CData section
* or none of both
* @param bool $multiline whether to create a multiline tag where
* each attribute gets written to a single line
* @param string $indent string used to indent attributes
* (_auto indents attributes so they start
* at the same column)
* @param string $linebreak string used for linebreaks
* @param bool $sortAttributes Whether to sort the attributes or not
*
* @return string XML tag
* @see createTagFromArray()
* @uses createTagFromArray() to create the tag
*/
public static function createTag(
$qname, $attributes = array(), $content = null,
$namespaceUri = null, $replaceEntities = XML_UTIL_REPLACE_ENTITIES,
$multiline = false, $indent = '_auto', $linebreak = "\n",
$sortAttributes = true
) {
$tag = array(
'qname' => $qname,
'attributes' => $attributes
);
// add tag content
if ($content !== null) {
$tag['content'] = $content;
}
// add namespace Uri
if ($namespaceUri !== null) {
$tag['namespaceUri'] = $namespaceUri;
}
return XML_Util::createTagFromArray(
$tag, $replaceEntities, $multiline,
$indent, $linebreak, $sortAttributes
);
}
/**
* Create a tag from an array.
* This method awaits an array in the following format
*
* array(
* // qualified name of the tag
* 'qname' => $qname
*
* // namespace prefix (optional, if qname is specified or no namespace)
* 'namespace' => $namespace
*
* // local part of the tagname (optional, if qname is specified)
* 'localpart' => $localpart,
*
* // array containing all attributes (optional)
* 'attributes' => array(),
*
* // tag content (optional)
* 'content' => $content,
*
* // namespaceUri for the given namespace (optional)
* 'namespaceUri' => $namespaceUri
* )
*
*
*
* require_once 'XML/Util.php';
*
* $tag = array(
* 'qname' => 'foo:bar',
* 'namespaceUri' => 'http://foo.com',
* 'attributes' => array('key' => 'value', 'argh' => 'fruit&vegetable'),
* 'content' => 'I\'m inside the tag',
* );
* // creating a tag with qualified name and namespaceUri
* $string = XML_Util::createTagFromArray($tag);
*
*
* @param array $tag tag definition
* @param int $replaceEntities whether to replace XML special chars in
* content, embedd it in a CData section
* or none of both
* @param bool $multiline whether to create a multiline tag where each
* attribute gets written to a single line
* @param string $indent string used to indent attributes
* (_auto indents attributes so they start
* at the same column)
* @param string $linebreak string used for linebreaks
* @param bool $sortAttributes Whether to sort the attributes or not
*
* @return string XML tag
*
* @see createTag()
* @uses attributesToString() to serialize the attributes of the tag
* @uses splitQualifiedName() to get local part and namespace of a qualified name
* @uses createCDataSection()
* @uses raiseError()
*/
public static function createTagFromArray(
$tag, $replaceEntities = XML_UTIL_REPLACE_ENTITIES,
$multiline = false, $indent = '_auto', $linebreak = "\n",
$sortAttributes = true
) {
if (isset($tag['content']) && !is_scalar($tag['content'])) {
return XML_Util::raiseError(
'Supplied non-scalar value as tag content',
XML_UTIL_ERROR_NON_SCALAR_CONTENT
);
}
if (!isset($tag['qname']) && !isset($tag['localPart'])) {
return XML_Util::raiseError(
'You must either supply a qualified name '
. '(qname) or local tag name (localPart).',
XML_UTIL_ERROR_NO_TAG_NAME
);
}
// if no attributes hav been set, use empty attributes
if (!isset($tag['attributes']) || !is_array($tag['attributes'])) {
$tag['attributes'] = array();
}
if (isset($tag['namespaces'])) {
foreach ($tag['namespaces'] as $ns => $uri) {
$tag['attributes']['xmlns:' . $ns] = $uri;
}
}
if (!isset($tag['qname'])) {
// qualified name is not given
// check for namespace
if (isset($tag['namespace']) && !empty($tag['namespace'])) {
$tag['qname'] = $tag['namespace'] . ':' . $tag['localPart'];
} else {
$tag['qname'] = $tag['localPart'];
}
} elseif (isset($tag['namespaceUri']) && !isset($tag['namespace'])) {
// namespace URI is set, but no namespace
$parts = XML_Util::splitQualifiedName($tag['qname']);
$tag['localPart'] = $parts['localPart'];
if (isset($parts['namespace'])) {
$tag['namespace'] = $parts['namespace'];
}
}
if (isset($tag['namespaceUri']) && !empty($tag['namespaceUri'])) {
// is a namespace given
if (isset($tag['namespace']) && !empty($tag['namespace'])) {
$tag['attributes']['xmlns:' . $tag['namespace']]
= $tag['namespaceUri'];
} else {
// define this Uri as the default namespace
$tag['attributes']['xmlns'] = $tag['namespaceUri'];
}
}
// check for multiline attributes
if ($multiline === true) {
if ($indent === '_auto') {
$indent = str_repeat(' ', (strlen($tag['qname'])+2));
}
}
// create attribute list
$attList = XML_Util::attributesToString(
$tag['attributes'],
$sortAttributes, $multiline, $indent, $linebreak
);
if (!isset($tag['content']) || (string)$tag['content'] == '') {
$tag = sprintf('<%s%s />', $tag['qname'], $attList);
} else {
switch ($replaceEntities) {
case XML_UTIL_ENTITIES_NONE:
break;
case XML_UTIL_CDATA_SECTION:
$tag['content'] = XML_Util::createCDataSection($tag['content']);
break;
default:
$tag['content'] = XML_Util::replaceEntities(
$tag['content'], $replaceEntities
);
break;
}
$tag = sprintf(
'<%s%s>%s%s>', $tag['qname'], $attList, $tag['content'],
$tag['qname']
);
}
return $tag;
}
/**
* Create a start element
*
*
* require_once 'XML/Util.php';
*
* // create an XML start element:
* $tag = XML_Util::createStartElement('myNs:myTag',
* array('foo' => 'bar') ,'http://www.w3c.org/myNs#');
*
*
* @param string $qname qualified tagname (including namespace)
* @param array $attributes array containg attributes
* @param string $namespaceUri URI of the namespace
* @param bool $multiline whether to create a multiline tag where each
* attribute gets written to a single line
* @param string $indent string used to indent attributes (_auto indents
* attributes so they start at the same column)
* @param string $linebreak string used for linebreaks
* @param bool $sortAttributes Whether to sort the attributes or not
*
* @return string XML start element
* @see createEndElement(), createTag()
*/
public static function createStartElement(
$qname, $attributes = array(), $namespaceUri = null,
$multiline = false, $indent = '_auto', $linebreak = "\n",
$sortAttributes = true
) {
// if no attributes hav been set, use empty attributes
if (!isset($attributes) || !is_array($attributes)) {
$attributes = array();
}
if ($namespaceUri != null) {
$parts = XML_Util::splitQualifiedName($qname);
}
// check for multiline attributes
if ($multiline === true) {
if ($indent === '_auto') {
$indent = str_repeat(' ', (strlen($qname)+2));
}
}
if ($namespaceUri != null) {
// is a namespace given
if (isset($parts['namespace']) && !empty($parts['namespace'])) {
$attributes['xmlns:' . $parts['namespace']] = $namespaceUri;
} else {
// define this Uri as the default namespace
$attributes['xmlns'] = $namespaceUri;
}
}
// create attribute list
$attList = XML_Util::attributesToString(
$attributes, $sortAttributes,
$multiline, $indent, $linebreak
);
$element = sprintf('<%s%s>', $qname, $attList);
return $element;
}
/**
* Create an end element
*
*
* require_once 'XML/Util.php';
*
* // create an XML start element:
* $tag = XML_Util::createEndElement('myNs:myTag');
*
*
* @param string $qname qualified tagname (including namespace)
*
* @return string XML end element
* @see createStartElement(), createTag()
*/
public static function createEndElement($qname)
{
$element = sprintf('%s>', $qname);
return $element;
}
/**
* Create an XML comment
*
*
* require_once 'XML/Util.php';
*
* // create an XML start element:
* $tag = XML_Util::createComment('I am a comment');
*
*
* @param string $content content of the comment
*
* @return string XML comment
*/
public static function createComment($content)
{
$comment = sprintf('', $content);
return $comment;
}
/**
* Create a CData section
*
*
* require_once 'XML/Util.php';
*
* // create a CData section
* $tag = XML_Util::createCDataSection('I am content.');
*
*
* @param string $data data of the CData section
*
* @return string CData section with content
*/
public static function createCDataSection($data)
{
return sprintf(
'',
preg_replace('/\]\]>/', ']]]]>', strval($data))
);
}
/**
* Split qualified name and return namespace and local part
*
*
* require_once 'XML/Util.php';
*
* // split qualified tag
* $parts = XML_Util::splitQualifiedName('xslt:stylesheet');
*
* the returned array will contain two elements:
*
* array(
* 'namespace' => 'xslt',
* 'localPart' => 'stylesheet'
* );
*
*
* @param string $qname qualified tag name
* @param string $defaultNs default namespace (optional)
*
* @return array array containing namespace and local part
*/
public static function splitQualifiedName($qname, $defaultNs = null)
{
if (strstr($qname, ':')) {
$tmp = explode(':', $qname);
return array(
'namespace' => $tmp[0],
'localPart' => $tmp[1]
);
}
return array(
'namespace' => $defaultNs,
'localPart' => $qname
);
}
/**
* Check, whether string is valid XML name
*
* XML names are used for tagname, attribute names and various
* other, lesser known entities.
* An XML name may only consist of alphanumeric characters,
* dashes, undescores and periods, and has to start with a letter
* or an underscore.
*
*
* require_once 'XML/Util.php';
*
* // verify tag name
* $result = XML_Util::isValidName('invalidTag?');
* if (is_a($result, 'PEAR_Error')) {
* print 'Invalid XML name: ' . $result->getMessage();
* }
*
*
* @param string $string string that should be checked
*
* @return mixed true, if string is a valid XML name, PEAR error otherwise
*
* @todo support for other charsets
* @todo PEAR CS - unable to avoid 85-char limit on second preg_match
*/
public static function isValidName($string)
{
// check for invalid chars
if (!preg_match('/^[[:alpha:]_]\\z/', $string{0})) {
return XML_Util::raiseError(
'XML names may only start with letter or underscore',
XML_UTIL_ERROR_INVALID_START
);
}
// check for invalid chars
$match = preg_match(
'/^([[:alpha:]_]([[:alnum:]\-\.]*)?:)?'
. '[[:alpha:]_]([[:alnum:]\_\-\.]+)?\\z/',
$string
);
if (!$match) {
return XML_Util::raiseError(
'XML names may only contain alphanumeric '
. 'chars, period, hyphen, colon and underscores',
XML_UTIL_ERROR_INVALID_CHARS
);
}
// XML name is valid
return true;
}
/**
* Replacement for XML_Util::raiseError
*
* Avoids the necessity to always require
* PEAR.php
*
* @param string $msg error message
* @param int $code error code
*
* @return PEAR_Error
* @todo PEAR CS - should this use include_once instead?
*/
public static function raiseError($msg, $code)
{
include_once 'PEAR.php';
return PEAR::raiseError($msg, $code);
}
}
?>
PK @[Od} } LibXML.pmnu W+A # $Id: LibXML.pm 809 2009-10-04 21:17:41Z pajas $
#
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML;
use strict;
use vars qw($VERSION $ABI_VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS
$skipDTD $skipXMLDeclaration $setTagCompression
$MatchCB $ReadCB $OpenCB $CloseCB %PARSER_FLAGS
);
use Carp;
use constant XML_XMLNS_NS => 'http://www.w3.org/2000/xmlns/';
use constant XML_XML_NS => 'http://www.w3.org/XML/1998/namespace';
use XML::LibXML::Error;
use XML::LibXML::NodeList;
use XML::LibXML::XPathContext;
use IO::Handle; # for FH reads called as methods
BEGIN {
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
$ABI_VERSION = 2;
require Exporter;
require DynaLoader;
@ISA = qw(DynaLoader Exporter);
use vars qw($__PROXY_NODE_REGISTRY $__threads_shared $__PROXY_NODE_REGISTRY_MUTEX $__loaded);
sub VERSION {
my $class = shift;
my ($caller) = caller;
my $req_abi = $ABI_VERSION;
if (UNIVERSAL::can($caller,'REQUIRE_XML_LIBXML_ABI_VERSION')) {
$req_abi = $caller->REQUIRE_XML_LIBXML_ABI_VERSION();
} elsif ($caller eq 'XML::LibXSLT') {
# XML::LibXSLT without REQUIRE_XML_LIBXML_ABI_VERSION is an old and incompatible version
$req_abi = 1;
}
unless ($req_abi == $ABI_VERSION) {
my $ver = @_ ? ' '.$_[0] : '';
die ("This version of $caller requires XML::LibXML$ver (ABI $req_abi), which is incompatible with currently installed XML::LibXML $VERSION (ABI $ABI_VERSION). Please upgrade $caller, XML::LibXML, or both!");
}
return $class->UNIVERSAL::VERSION(@_)
}
#-------------------------------------------------------------------------#
# export information #
#-------------------------------------------------------------------------#
%EXPORT_TAGS = (
all => [qw(
XML_ELEMENT_NODE
XML_ATTRIBUTE_NODE
XML_TEXT_NODE
XML_CDATA_SECTION_NODE
XML_ENTITY_REF_NODE
XML_ENTITY_NODE
XML_PI_NODE
XML_COMMENT_NODE
XML_DOCUMENT_NODE
XML_DOCUMENT_TYPE_NODE
XML_DOCUMENT_FRAG_NODE
XML_NOTATION_NODE
XML_HTML_DOCUMENT_NODE
XML_DTD_NODE
XML_ELEMENT_DECL
XML_ATTRIBUTE_DECL
XML_ENTITY_DECL
XML_NAMESPACE_DECL
XML_XINCLUDE_END
XML_XINCLUDE_START
encodeToUTF8
decodeFromUTF8
XML_XMLNS_NS
XML_XML_NS
)],
libxml => [qw(
XML_ELEMENT_NODE
XML_ATTRIBUTE_NODE
XML_TEXT_NODE
XML_CDATA_SECTION_NODE
XML_ENTITY_REF_NODE
XML_ENTITY_NODE
XML_PI_NODE
XML_COMMENT_NODE
XML_DOCUMENT_NODE
XML_DOCUMENT_TYPE_NODE
XML_DOCUMENT_FRAG_NODE
XML_NOTATION_NODE
XML_HTML_DOCUMENT_NODE
XML_DTD_NODE
XML_ELEMENT_DECL
XML_ATTRIBUTE_DECL
XML_ENTITY_DECL
XML_NAMESPACE_DECL
XML_XINCLUDE_END
XML_XINCLUDE_START
)],
encoding => [qw(
encodeToUTF8
decodeFromUTF8
)],
ns => [qw(
XML_XMLNS_NS
XML_XML_NS
)],
);
@EXPORT_OK = (
@{$EXPORT_TAGS{all}},
);
@EXPORT = (
@{$EXPORT_TAGS{all}},
);
#-------------------------------------------------------------------------#
# initialization of the global variables #
#-------------------------------------------------------------------------#
$skipDTD = 0;
$skipXMLDeclaration = 0;
$setTagCompression = 0;
$MatchCB = undef;
$ReadCB = undef;
$OpenCB = undef;
$CloseCB = undef;
# if ($threads::threads) {
# our $__THREADS_TID = 0;
# eval q{
# use threads::shared;
# our $__PROXY_NODE_REGISTRY_MUTEX :shared = 0;
# };
# die $@ if $@;
# }
#-------------------------------------------------------------------------#
# bootstrapping #
#-------------------------------------------------------------------------#
bootstrap XML::LibXML $VERSION;
undef &AUTOLOAD;
*encodeToUTF8 = \&XML::LibXML::Common::encodeToUTF8;
*decodeFromUTF8 = \&XML::LibXML::Common::decodeFromUTF8;
} # BEGIN
#-------------------------------------------------------------------------#
# libxml2 node names (see also XML::LibXML::Common #
#-------------------------------------------------------------------------#
use constant XML_ELEMENT_NODE => 1;
use constant XML_ATTRIBUTE_NODE => 2;
use constant XML_TEXT_NODE => 3;
use constant XML_CDATA_SECTION_NODE => 4;
use constant XML_ENTITY_REF_NODE => 5;
use constant XML_ENTITY_NODE => 6;
use constant XML_PI_NODE => 7;
use constant XML_COMMENT_NODE => 8;
use constant XML_DOCUMENT_NODE => 9;
use constant XML_DOCUMENT_TYPE_NODE => 10;
use constant XML_DOCUMENT_FRAG_NODE => 11;
use constant XML_NOTATION_NODE => 12;
use constant XML_HTML_DOCUMENT_NODE => 13;
use constant XML_DTD_NODE => 14;
use constant XML_ELEMENT_DECL => 15;
use constant XML_ATTRIBUTE_DECL => 16;
use constant XML_ENTITY_DECL => 17;
use constant XML_NAMESPACE_DECL => 18;
use constant XML_XINCLUDE_START => 19;
use constant XML_XINCLUDE_END => 20;
sub import {
my $package=shift;
if (grep /^:threads_shared$/, @_) {
require threads;
if (!defined($__threads_shared)) {
if (INIT_THREAD_SUPPORT()) {
eval q{
use threads::shared;
share($__PROXY_NODE_REGISTRY_MUTEX);
};
if ($@) { # something went wrong
DISABLE_THREAD_SUPPORT(); # leave the library in a usable state
die $@; # and die
}
$__PROXY_NODE_REGISTRY = XML::LibXML::HashTable->new();
$__threads_shared=1;
} else {
croak("XML::LibXML or Perl compiled without ithread support!");
}
} elsif (!$__threads_shared) {
croak("XML::LibXML already loaded without thread support. Too late to enable thread support!");
}
} elsif (defined $XML::LibXML::__loaded) {
$__threads_shared=0 if not defined $__threads_shared;
}
__PACKAGE__->export_to_level(1,$package,grep !/^:threads(_shared)?$/,@_);
}
sub threads_shared_enabled {
return $__threads_shared ? 1 : 0;
}
# if ($threads::threads) {
# our $__PROXY_NODE_REGISTRY = XML::LibXML::HashTable->new();
# }
#-------------------------------------------------------------------------#
# test exact version (up to patch-level) #
#-------------------------------------------------------------------------#
{
my ($runtime_version) = LIBXML_RUNTIME_VERSION() =~ /^(\d+)/;
if ( $runtime_version < LIBXML_VERSION ) {
warn "Warning: XML::LibXML compiled against libxml2 ".LIBXML_VERSION.
", but runtime libxml2 is older $runtime_version\n";
}
}
#-------------------------------------------------------------------------#
# parser flags #
#-------------------------------------------------------------------------#
# Copied directly from http://xmlsoft.org/html/libxml-parser.html#xmlParserOption
use constant {
XML_PARSE_RECOVER => 1, # recover on errors
XML_PARSE_NOENT => 2, # substitute entities
XML_PARSE_DTDLOAD => 4, # load the external subset
XML_PARSE_DTDATTR => 8, # default DTD attributes
XML_PARSE_DTDVALID => 16, # validate with the DTD
XML_PARSE_NOERROR => 32, # suppress error reports
XML_PARSE_NOWARNING => 64, # suppress warning reports
XML_PARSE_PEDANTIC => 128, # pedantic error reporting
XML_PARSE_NOBLANKS => 256, # remove blank nodes
XML_PARSE_SAX1 => 512, # use the SAX1 interface internally
XML_PARSE_XINCLUDE => 1024, # Implement XInclude substitition
XML_PARSE_NONET => 2048, # Forbid network access
XML_PARSE_NODICT => 4096, # Do not reuse the context dictionnary
XML_PARSE_NSCLEAN => 8192, # remove redundant namespaces declarations
XML_PARSE_NOCDATA => 16384, # merge CDATA as text nodes
XML_PARSE_NOXINCNODE => 32768, # do not generate XINCLUDE START/END nodes
XML_PARSE_COMPACT => 65536, # compact small text nodes; no modification of the tree allowed afterwards
# (will possibly crash if you try to modify the tree)
XML_PARSE_OLD10 => 131072, # parse using XML-1.0 before update 5
XML_PARSE_NOBASEFIX => 262144, # do not fixup XINCLUDE xml#base uris
XML_PARSE_HUGE => 524288, # relax any hardcoded limit from the parser
XML_PARSE_OLDSAX => 1048576, # parse using SAX2 interface from before 2.7.0
};
use constant XML_LIBXML_PARSE_DEFAULTS => ( XML_PARSE_NODICT | XML_PARSE_HUGE | XML_PARSE_DTDLOAD | XML_PARSE_NOENT );
# this hash is made global so that applications can add names for new
# libxml2 parser flags as temporary workaround
%PARSER_FLAGS = (
recover => XML_PARSE_RECOVER,
expand_entities => XML_PARSE_NOENT,
load_ext_dtd => XML_PARSE_DTDLOAD,
complete_attributes => XML_PARSE_DTDATTR,
validation => XML_PARSE_DTDVALID,
suppress_errors => XML_PARSE_NOERROR,
suppress_warnings => XML_PARSE_NOWARNING,
pedantic_parser => XML_PARSE_PEDANTIC,
no_blanks => XML_PARSE_NOBLANKS,
expand_xinclude => XML_PARSE_XINCLUDE,
xinclude => XML_PARSE_XINCLUDE,
no_network => XML_PARSE_NONET,
clean_namespaces => XML_PARSE_NSCLEAN,
no_cdata => XML_PARSE_NOCDATA,
no_xinclude_nodes => XML_PARSE_NOXINCNODE,
old10 => XML_PARSE_OLD10,
no_base_fix => XML_PARSE_NOBASEFIX,
huge => XML_PARSE_HUGE,
oldsax => XML_PARSE_OLDSAX,
);
my %OUR_FLAGS = (
recover => 'XML_LIBXML_RECOVER',
line_numbers => 'XML_LIBXML_LINENUMBERS',
URI => 'XML_LIBXML_BASE_URI',
base_uri => 'XML_LIBXML_BASE_URI',
gdome => 'XML_LIBXML_GDOME',
ext_ent_handler => 'ext_ent_handler',
);
sub _parser_options {
my ($self, $opts) = @_;
# currently dictionaries break XML::LibXML memory management
my $flags;
if (ref($self)) {
$flags = ($self->{XML_LIBXML_PARSER_OPTIONS}||0);
} else {
$flags = XML_LIBXML_PARSE_DEFAULTS; # safety precaution
}
my ($key, $value);
while (($key,$value) = each %$opts) {
my $f = $PARSER_FLAGS{ $key };
if (defined $f) {
if ($value) {
$flags |= $f
} else {
$flags &= ~$f;
}
} elsif ($key eq 'set_parser_flags') { # this can be used to pass flags XML::LibXML does not yet know about
$flags |= $value;
} elsif ($key eq 'unset_parser_flags') {
$flags &= ~$value;
}
}
return $flags;
}
my %compatibility_flags = (
XML_LIBXML_VALIDATION => 'validation',
XML_LIBXML_EXPAND_ENTITIES => 'expand_entities',
XML_LIBXML_PEDANTIC => 'pedantic_parser',
XML_LIBXML_NONET => 'no_network',
XML_LIBXML_EXT_DTD => 'load_ext_dtd',
XML_LIBXML_COMPLETE_ATTR => 'complete_attributes',
XML_LIBXML_EXPAND_XINCLUDE => 'expand_xinclude',
XML_LIBXML_NSCLEAN => 'clean_namespaces',
XML_LIBXML_KEEP_BLANKS => 'keep_blanks',
XML_LIBXML_LINENUMBERS => 'line_numbers',
);
#-------------------------------------------------------------------------#
# parser constructor #
#-------------------------------------------------------------------------#
sub new {
my $class = shift;
my $self = bless {
}, $class;
if (@_) {
my %opts = ();
if (ref($_[0]) eq 'HASH') {
%opts = %{$_[0]};
} else {
# old interface
my %args = @_;
%opts=(
map {
(($compatibility_flags{ $_ }||$_) => $args{ $_ })
} keys %args
);
}
# parser flags
$opts{no_blanks} = !$opts{keep_blanks} if exists($opts{keep_blanks}) and !exists($opts{no_blanks});
for (keys %OUR_FLAGS) {
$self->{$OUR_FLAGS{$_}} = delete $opts{$_};
}
$class->load_catalog(delete($opts{catalog})) if $opts{catalog};
$self->{XML_LIBXML_PARSER_OPTIONS} = XML::LibXML->_parser_options(\%opts);
# store remaining unknown options directly in $self
for (keys %opts) {
$self->{$_}=$opts{$_} unless exists $PARSER_FLAGS{$_};
}
} else {
$self->{XML_LIBXML_PARSER_OPTIONS} = XML_LIBXML_PARSE_DEFAULTS;
}
if ( defined $self->{Handler} ) {
$self->set_handler( $self->{Handler} );
}
$self->{_State_} = 0;
return $self;
}
sub _clone {
my ($self)=@_;
my $new = ref($self)->new({
recover => $self->{XML_LIBXML_RECOVER},
line_nubers => $self->{XML_LIBXML_LINENUMBERS},
base_uri => $self->{XML_LIBXML_BASE_URI},
gdome => $self->{XML_LIBXML_GDOME},
set_parser_flags => $self->{XML_LIBXML_PARSER_OPTIONS},
});
return $new;
}
#-------------------------------------------------------------------------#
# Threads support methods #
#-------------------------------------------------------------------------#
# threads doc says CLONE's API may change in future, which would break
# an XS method prototype
sub CLONE {
if ($XML::LibXML::__threads_shared) {
XML::LibXML::_CLONE( $_[0] );
}
}
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
sub __proxy_registry {
my ($class)=caller;
die "This version of $class uses API of XML::LibXML 1.66 which is not compatible with XML::LibXML $VERSION. Please upgrade $class!\n";
}
#-------------------------------------------------------------------------#
# DOM Level 2 document constructor #
#-------------------------------------------------------------------------#
sub createDocument {
my $self = shift;
if (!@_ or $_[0] =~ m/^\d\.\d$/) {
# for backward compatibility
return XML::LibXML::Document->new(@_);
}
else {
# DOM API: createDocument(namespaceURI, qualifiedName, doctype?)
my $doc = XML::LibXML::Document-> new;
my $el = $doc->createElementNS(shift, shift);
$doc->setDocumentElement($el);
$doc->setExternalSubset(shift) if @_;
return $doc;
}
}
#-------------------------------------------------------------------------#
# callback functions #
#-------------------------------------------------------------------------#
sub input_callbacks {
my $self = shift;
my $icbclass = shift;
if ( defined $icbclass ) {
$self->{XML_LIBXML_CALLBACK_STACK} = $icbclass;
}
return $self->{XML_LIBXML_CALLBACK_STACK};
}
sub match_callback {
my $self = shift;
if ( ref $self ) {
if ( scalar @_ ) {
$self->{XML_LIBXML_MATCH_CB} = shift;
$self->{XML_LIBXML_CALLBACK_STACK} = undef;
}
return $self->{XML_LIBXML_MATCH_CB};
}
else {
$MatchCB = shift if scalar @_;
return $MatchCB;
}
}
sub read_callback {
my $self = shift;
if ( ref $self ) {
if ( scalar @_ ) {
$self->{XML_LIBXML_READ_CB} = shift;
$self->{XML_LIBXML_CALLBACK_STACK} = undef;
}
return $self->{XML_LIBXML_READ_CB};
}
else {
$ReadCB = shift if scalar @_;
return $ReadCB;
}
}
sub close_callback {
my $self = shift;
if ( ref $self ) {
if ( scalar @_ ) {
$self->{XML_LIBXML_CLOSE_CB} = shift;
$self->{XML_LIBXML_CALLBACK_STACK} = undef;
}
return $self->{XML_LIBXML_CLOSE_CB};
}
else {
$CloseCB = shift if scalar @_;
return $CloseCB;
}
}
sub open_callback {
my $self = shift;
if ( ref $self ) {
if ( scalar @_ ) {
$self->{XML_LIBXML_OPEN_CB} = shift;
$self->{XML_LIBXML_CALLBACK_STACK} = undef;
}
return $self->{XML_LIBXML_OPEN_CB};
}
else {
$OpenCB = shift if scalar @_;
return $OpenCB;
}
}
sub callbacks {
my $self = shift;
if ( ref $self ) {
if (@_) {
my ($match, $open, $read, $close) = @_;
@{$self}{qw(XML_LIBXML_MATCH_CB XML_LIBXML_OPEN_CB XML_LIBXML_READ_CB XML_LIBXML_CLOSE_CB)} = ($match, $open, $read, $close);
$self->{XML_LIBXML_CALLBACK_STACK} = undef;
}
else {
return @{$self}{qw(XML_LIBXML_MATCH_CB XML_LIBXML_OPEN_CB XML_LIBXML_READ_CB XML_LIBXML_CLOSE_CB)};
}
}
else {
if (@_) {
( $MatchCB, $OpenCB, $ReadCB, $CloseCB ) = @_;
}
else {
return ( $MatchCB, $OpenCB, $ReadCB, $CloseCB );
}
}
}
#-------------------------------------------------------------------------#
# internal member variable manipulation #
#-------------------------------------------------------------------------#
sub __parser_option {
my ($self, $opt) = @_;
if (@_>2) {
if ($_[2]) {
$self->{XML_LIBXML_PARSER_OPTIONS} |= $opt;
return 1;
} else {
$self->{XML_LIBXML_PARSER_OPTIONS} &= ~$opt;
return 0;
}
} else {
return ($self->{XML_LIBXML_PARSER_OPTIONS} & $opt) ? 1 : 0;
}
}
sub option_exists {
my ($self,$name)=@_;
return ($PARSER_FLAGS{$name} || $OUR_FLAGS{$name}) ? 1 : 0;
}
sub get_option {
my ($self,$name)=@_;
my $flag = $OUR_FLAGS{$name};
return $self->{$flag} if $flag;
$flag = $PARSER_FLAGS{$name};
return $self->__parser_option($flag) if $flag;
warn "XML::LibXML::get_option: unknown parser option $name\n";
return undef;
}
sub set_option {
my ($self,$name,$value)=@_;
my $flag = $OUR_FLAGS{$name};
return ($self->{$flag}=$value) if $flag;
$flag = $PARSER_FLAGS{$name};
return $self->__parser_option($flag,$value) if $flag;
warn "XML::LibXML::get_option: unknown parser option $name\n";
return undef;
}
sub set_options {
my $self=shift;
my $opts;
if (@_==1 and ref($_[0]) eq 'HASH') {
$opts = $_[0];
} elsif (@_ % 2 == 0) {
$opts={@_};
} else {
croak("Odd number of elements passed to set_options");
}
$self->set_option($_=>$opts->{$_}) foreach keys %$opts;
return;
}
sub validation {
my $self = shift;
return $self->__parser_option(XML_PARSE_DTDVALID,@_);
}
sub recover {
my $self = shift;
if (scalar @_) {
$self->{XML_LIBXML_RECOVER} = $_[0];
$self->__parser_option(XML_PARSE_RECOVER,@_);
}
return $self->{XML_LIBXML_RECOVER};
}
sub recover_silently {
my $self = shift;
my $arg = shift;
(($arg == 1) ? $self->recover(2) : $self->recover($arg)) if defined($arg);
return (($self->recover()||0) == 2) ? 1 : 0;
}
sub expand_entities {
my $self = shift;
if (scalar(@_) and $_[0]) {
return $self->__parser_option(XML_PARSE_NOENT | XML_PARSE_DTDLOAD,1);
}
return $self->__parser_option(XML_PARSE_NOENT,@_);
}
sub keep_blanks {
my $self = shift;
my @args; # we have to negate the argument and return negated value, since
# the actual flag is no_blanks
if (scalar @_) {
@args=($_[0] ? 0 : 1);
}
return $self->__parser_option(XML_PARSE_NOBLANKS,@args) ? 0 : 1;
}
sub pedantic_parser {
my $self = shift;
return $self->__parser_option(XML_PARSE_PEDANTIC,@_);
}
sub line_numbers {
my $self = shift;
$self->{XML_LIBXML_LINENUMBERS} = shift if scalar @_;
return $self->{XML_LIBXML_LINENUMBERS};
}
sub no_network {
my $self = shift;
return $self->__parser_option(XML_PARSE_NONET,@_);
}
sub load_ext_dtd {
my $self = shift;
return $self->__parser_option(XML_PARSE_DTDLOAD,@_);
}
sub complete_attributes {
my $self = shift;
return $self->__parser_option(XML_PARSE_DTDATTR,@_);
}
sub expand_xinclude {
my $self = shift;
return $self->__parser_option(XML_PARSE_XINCLUDE,@_);
}
sub base_uri {
my $self = shift;
$self->{XML_LIBXML_BASE_URI} = shift if scalar @_;
return $self->{XML_LIBXML_BASE_URI};
}
sub gdome_dom {
my $self = shift;
$self->{XML_LIBXML_GDOME} = shift if scalar @_;
return $self->{XML_LIBXML_GDOME};
}
sub clean_namespaces {
my $self = shift;
return $self->__parser_option(XML_PARSE_NSCLEAN,@_);
}
#-------------------------------------------------------------------------#
# set the optional SAX(2) handler #
#-------------------------------------------------------------------------#
sub set_handler {
my $self = shift;
if ( defined $_[0] ) {
$self->{HANDLER} = $_[0];
$self->{SAX_ELSTACK} = [];
$self->{SAX} = {State => 0};
}
else {
# undef SAX handling
$self->{SAX_ELSTACK} = [];
delete $self->{HANDLER};
delete $self->{SAX};
}
}
#-------------------------------------------------------------------------#
# helper functions #
#-------------------------------------------------------------------------#
sub _auto_expand {
my ( $self, $result, $uri ) = @_;
$result->setBaseURI( $uri ) if defined $uri;
if ( $self->expand_xinclude ) {
$self->{_State_} = 1;
eval { $self->processXIncludes($result); };
my $err = $@;
$self->{_State_} = 0;
if ($err) {
$self->_cleanup_callbacks();
$result = undef;
croak $err;
}
}
return $result;
}
sub _init_callbacks {
my $self = shift;
my $icb = $self->{XML_LIBXML_CALLBACK_STACK};
unless ( defined $icb ) {
$self->{XML_LIBXML_CALLBACK_STACK} = XML::LibXML::InputCallback->new();
$icb = $self->{XML_LIBXML_CALLBACK_STACK};
}
my $mcb = $self->match_callback();
my $ocb = $self->open_callback();
my $rcb = $self->read_callback();
my $ccb = $self->close_callback();
if ( defined $mcb and defined $ocb and defined $rcb and defined $ccb ) {
$icb->register_callbacks( [$mcb, $ocb, $rcb, $ccb] );
}
$icb->init_callbacks();
}
sub _cleanup_callbacks {
my $self = shift;
$self->{XML_LIBXML_CALLBACK_STACK}->cleanup_callbacks();
my $mcb = $self->match_callback();
$self->{XML_LIBXML_CALLBACK_STACK}->unregister_callbacks( [$mcb] );
}
sub __read {
read($_[0], $_[1], $_[2]);
}
sub __write {
if ( ref( $_[0] ) ) {
$_[0]->write( $_[1], $_[2] );
}
else {
$_[0]->write( $_[1] );
}
}
sub load_xml {
my ($class_or_self) = shift;
my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_;
my $URI = delete($args{URI});
$URI = "$URI" if defined $URI; # stringify in case it is an URI object
my $parser;
if (ref($class_or_self)) {
$parser = $class_or_self->_clone();
$parser->{XML_LIBXML_PARSER_OPTIONS} = $parser->_parser_options(\%args);
} else {
$parser = $class_or_self->new(\%args);
}
my $dom;
if ( defined $args{location} ) {
$dom = $parser->parse_file( "$args{location}" );
}
elsif ( defined $args{string} ) {
$dom = $parser->parse_string( $args{string}, $URI );
}
elsif ( defined $args{IO} ) {
$dom = $parser->parse_fh( $args{IO}, $URI );
}
else {
croak("XML::LibXML->load: specify location, string, or IO");
}
return $dom;
}
sub load_html {
my ($class_or_self) = shift;
my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_;
my $URI = delete($args{URI});
$URI = "$URI" if defined $URI; # stringify in case it is an URI object
my $parser;
if (ref($class_or_self)) {
$parser = $class_or_self->_clone();
} else {
$parser = $class_or_self->new();
}
my $dom;
if ( defined $args{location} ) {
$dom = $parser->parse_html_file( "$args{location}", \%args );
}
elsif ( defined $args{string} ) {
$dom = $parser->parse_html_string( $args{string}, \%args );
}
elsif ( defined $args{IO} ) {
$dom = $parser->parse_html_fh( $args{IO}, \%args );
}
else {
croak("XML::LibXML->load: specify location, string, or IO");
}
return $dom;
}
#-------------------------------------------------------------------------#
# parsing functions #
#-------------------------------------------------------------------------#
# all parsing functions handle normal as SAX parsing at the same time.
# note that SAX parsing is handled incomplete! use XML::LibXML::SAX for
# complete parsing sequences
#-------------------------------------------------------------------------#
sub parse_string {
my $self = shift;
croak("parse_string is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self;
croak("parse already in progress") if $self->{_State_};
unless ( defined $_[0] and length $_[0] ) {
croak("Empty String");
}
$self->{_State_} = 1;
my $result;
$self->_init_callbacks();
if ( defined $self->{SAX} ) {
my $string = shift;
$self->{SAX_ELSTACK} = [];
eval { $result = $self->_parse_sax_string($string); };
my $err = $@;
$self->{_State_} = 0;
if ($err) {
chomp $err unless ref $err;
$self->_cleanup_callbacks();
croak $err;
}
}
else {
eval { $result = $self->_parse_string( @_ ); };
my $err = $@;
$self->{_State_} = 0;
if ($err) {
chomp $err unless ref $err;
$self->_cleanup_callbacks();
croak $err;
}
$result = $self->_auto_expand( $result, $self->{XML_LIBXML_BASE_URI} );
}
$self->_cleanup_callbacks();
return $result;
}
sub parse_fh {
my $self = shift;
croak("parse_fh is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self;
croak("parse already in progress") if $self->{_State_};
$self->{_State_} = 1;
my $result;
$self->_init_callbacks();
if ( defined $self->{SAX} ) {
$self->{SAX_ELSTACK} = [];
eval { $self->_parse_sax_fh( @_ ); };
my $err = $@;
$self->{_State_} = 0;
if ($err) {
chomp $err unless ref $err;
$self->_cleanup_callbacks();
croak $err;
}
}
else {
eval { $result = $self->_parse_fh( @_ ); };
my $err = $@;
$self->{_State_} = 0;
if ($err) {
chomp $err unless ref $err;
$self->_cleanup_callbacks();
croak $err;
}
$result = $self->_auto_expand( $result, $self->{XML_LIBXML_BASE_URI} );
}
$self->_cleanup_callbacks();
return $result;
}
sub parse_file {
my $self = shift;
croak("parse_file is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self;
croak("parse already in progress") if $self->{_State_};
$self->{_State_} = 1;
my $result;
$self->_init_callbacks();
if ( defined $self->{SAX} ) {
$self->{SAX_ELSTACK} = [];
eval { $self->_parse_sax_file( @_ ); };
my $err = $@;
$self->{_State_} = 0;
if ($err) {
chomp $err unless ref $err;
$self->_cleanup_callbacks();
croak $err;
}
}
else {
eval { $result = $self->_parse_file(@_); };
my $err = $@;
$self->{_State_} = 0;
if ($err) {
chomp $err unless ref $err;
$self->_cleanup_callbacks();
croak $err;
}
$result = $self->_auto_expand( $result );
}
$self->_cleanup_callbacks();
return $result;
}
sub parse_xml_chunk {
my $self = shift;
# max 2 parameter:
# 1: the chunk
# 2: the encoding of the string
croak("parse_xml_chunk is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self;
croak("parse already in progress") if $self->{_State_}; my $result;
unless ( defined $_[0] and length $_[0] ) {
croak("Empty String");
}
$self->{_State_} = 1;
$self->_init_callbacks();
if ( defined $self->{SAX} ) {
eval {
$self->_parse_sax_xml_chunk( @_ );
# this is required for XML::GenericChunk.
# in normal case is_filter is not defined, an thus the parsing
# will be terminated. in case of a SAX filter the parsing is not
# finished at that state. therefore we must not reset the parsing
unless ( $self->{IS_FILTER} ) {
$result = $self->{HANDLER}->end_document();
}
};
}
else {
eval { $result = $self->_parse_xml_chunk( @_ ); };
}
$self->_cleanup_callbacks();
my $err = $@;
$self->{_State_} = 0;
if ($err) {
chomp $err unless ref $err;
croak $err;
}
return $result;
}
sub parse_balanced_chunk {
my $self = shift;
$self->_init_callbacks();
my $rv;
eval {
$rv = $self->parse_xml_chunk( @_ );
};
my $err = $@;
$self->_cleanup_callbacks();
if ( $err ) {
chomp $err unless ref $err;
croak $err;
}
return $rv
}
# java style
sub processXIncludes {
my $self = shift;
my $doc = shift;
my $opts = shift;
my $options = $self->_parser_options($opts);
if ( $self->{_State_} != 1 ) {
$self->_init_callbacks();
}
my $rv;
eval {
$rv = $self->_processXIncludes($doc || " ", $options);
};
my $err = $@;
if ( $self->{_State_} != 1 ) {
$self->_cleanup_callbacks();
}
if ( $err ) {
chomp $err unless ref $err;
croak $err;
}
return $rv;
}
# perl style
sub process_xincludes {
my $self = shift;
my $doc = shift;
my $opts = shift;
my $options = $self->_parser_options($opts);
my $rv;
$self->_init_callbacks();
eval {
$rv = $self->_processXIncludes($doc || " ", $options);
};
my $err = $@;
$self->_cleanup_callbacks();
if ( $err ) {
chomp $err unless ref $err;
croak $@;
}
return $rv;
}
#-------------------------------------------------------------------------#
# HTML parsing functions #
#-------------------------------------------------------------------------#
sub _html_options {
my ($self,$opts)=@_;
$opts = {} unless ref $opts;
# return (undef,undef) unless ref $opts;
my $flags = 0;
$flags |= 1 if exists $opts->{recover} ? $opts->{recover} : $self->recover;
$flags |= 32 if $opts->{suppress_errors};
$flags |= 64 if $opts->{suppress_warnings};
$flags |= 128 if exists $opts->{pedantic_parser} ? $opts->{pedantic_parser} : $self->pedantic_parser;
$flags |= 256 if exists $opts->{no_blanks} ? $opts->{no_blanks} : !$self->keep_blanks;
$flags |= 2048 if exists $opts->{no_network} ? $opts->{no_network} : !$self->no_network;
$flags |= 16384 if $opts->{no_cdata};
$flags |= 65536 if $opts->{compact}; # compact small text nodes; no modification
# of the tree allowed afterwards
# (WILL possibly CRASH IF YOU try to MODIFY THE TREE)
$flags |= 524288 if $opts->{huge}; # relax any hardcoded limit from the parser
$flags |= 1048576 if $opts->{oldsax}; # parse using SAX2 interface from before 2.7.0
return ($opts->{URI},$opts->{encoding},$flags);
}
sub parse_html_string {
my ($self,$str,$opts) = @_;
croak("parse_html_string is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self;
croak("parse already in progress") if $self->{_State_};
unless ( defined $str and length $str ) {
croak("Empty String");
}
$self->{_State_} = 1;
my $result;
$self->_init_callbacks();
eval {
$result = $self->_parse_html_string( $str,
$self->_html_options($opts)
);
};
my $err = $@;
$self->{_State_} = 0;
if ($err) {
chomp $err unless ref $err;
$self->_cleanup_callbacks();
croak $err;
}
$self->_cleanup_callbacks();
return $result;
}
sub parse_html_file {
my ($self,$file,$opts) = @_;
croak("parse_html_file is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self;
croak("parse already in progress") if $self->{_State_};
$self->{_State_} = 1;
my $result;
$self->_init_callbacks();
eval { $result = $self->_parse_html_file($file,
$self->_html_options($opts)
); };
my $err = $@;
$self->{_State_} = 0;
if ($err) {
chomp $err unless ref $err;
$self->_cleanup_callbacks();
croak $err;
}
$self->_cleanup_callbacks();
return $result;
}
sub parse_html_fh {
my ($self,$fh,$opts) = @_;
croak("parse_html_fh is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self;
croak("parse already in progress") if $self->{_State_};
$self->{_State_} = 1;
my $result;
$self->_init_callbacks();
eval { $result = $self->_parse_html_fh( $fh,
$self->_html_options($opts)
); };
my $err = $@;
$self->{_State_} = 0;
if ($err) {
chomp $err unless ref $err;
$self->_cleanup_callbacks();
croak $err;
}
$self->_cleanup_callbacks();
return $result;
}
#-------------------------------------------------------------------------#
# push parser interface #
#-------------------------------------------------------------------------#
sub init_push {
my $self = shift;
if ( defined $self->{CONTEXT} ) {
delete $self->{CONTEXT};
}
if ( defined $self->{SAX} ) {
$self->{CONTEXT} = $self->_start_push(1);
}
else {
$self->{CONTEXT} = $self->_start_push(0);
}
}
sub push {
my $self = shift;
$self->_init_callbacks();
if ( not defined $self->{CONTEXT} ) {
$self->init_push();
}
eval {
foreach ( @_ ) {
$self->_push( $self->{CONTEXT}, $_ );
}
};
my $err = $@;
$self->_cleanup_callbacks();
if ( $err ) {
chomp $err unless ref $err;
croak $err;
}
}
# this function should be promoted!
# the reason is because libxml2 uses xmlParseChunk() for this purpose!
sub parse_chunk {
my $self = shift;
my $chunk = shift;
my $terminate = shift;
if ( not defined $self->{CONTEXT} ) {
$self->init_push();
}
if ( defined $chunk and length $chunk ) {
$self->_push( $self->{CONTEXT}, $chunk );
}
if ( $terminate ) {
return $self->finish_push();
}
}
sub finish_push {
my $self = shift;
my $restore = shift || 0;
return undef unless defined $self->{CONTEXT};
my $retval;
if ( defined $self->{SAX} ) {
eval {
$self->_end_sax_push( $self->{CONTEXT} );
$retval = $self->{HANDLER}->end_document( {} );
};
}
else {
eval { $retval = $self->_end_push( $self->{CONTEXT}, $restore ); };
}
my $err = $@;
delete $self->{CONTEXT};
if ( $err ) {
chomp $err unless ref $err;
croak( $err );
}
return $retval;
}
1;
#-------------------------------------------------------------------------#
# XML::LibXML::Node Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::Node;
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
sub isSupported {
my $self = shift;
my $feature = shift;
return $self->can($feature) ? 1 : 0;
}
sub getChildNodes { my $self = shift; return $self->childNodes(); }
sub childNodes {
my $self = shift;
my @children = $self->_childNodes(0);
return wantarray ? @children : XML::LibXML::NodeList->new_from_ref(\@children , 1);
}
sub nonBlankChildNodes {
my $self = shift;
my @children = $self->_childNodes(1);
return wantarray ? @children : XML::LibXML::NodeList->new_from_ref(\@children , 1);
}
sub attributes {
my $self = shift;
my @attr = $self->_attributes();
return wantarray ? @attr : XML::LibXML::NamedNodeMap->new( @attr );
}
sub findnodes {
my ($node, $xpath) = @_;
my @nodes = $node->_findnodes($xpath);
if (wantarray) {
return @nodes;
}
else {
return XML::LibXML::NodeList->new_from_ref(\@nodes, 1);
}
}
sub exists {
my ($node, $xpath) = @_;
my (undef, $value) = $node->_find($xpath,1);
return $value;
}
sub findvalue {
my ($node, $xpath) = @_;
my $res;
$res = $node->find($xpath);
return $res->to_literal->value;
}
sub findbool {
my ($node, $xpath) = @_;
my ($type, @params) = $node->_find($xpath,1);
if ($type) {
return $type->new(@params);
}
return undef;
}
sub find {
my ($node, $xpath) = @_;
my ($type, @params) = $node->_find($xpath,0);
if ($type) {
return $type->new(@params);
}
return undef;
}
sub setOwnerDocument {
my ( $self, $doc ) = @_;
$doc->adoptNode( $self );
}
sub toStringC14N {
my ($self, $comments, $xpath, $xpc) = @_;
return $self->_toStringC14N( $comments || 0,
(defined $xpath ? $xpath : undef),
0,
undef,
(defined $xpc ? $xpc : undef)
);
}
sub toStringEC14N {
my ($self, $comments, $xpath, $xpc, $inc_prefix_list) = @_;
unless (UNIVERSAL::isa($xpc,'XML::LibXML::XPathContext')) {
if ($inc_prefix_list) {
croak("toStringEC14N: 3rd argument is not an XML::LibXML::XPathContext");
} else {
$inc_prefix_list=$xpc;
$xpc=undef;
}
}
if (defined($inc_prefix_list) and !UNIVERSAL::isa($inc_prefix_list,'ARRAY')) {
croak("toStringEC14N: inclusive_prefix_list must be undefined or ARRAY");
}
return $self->_toStringC14N( $comments || 0,
(defined $xpath ? $xpath : undef),
1,
(defined $inc_prefix_list ? $inc_prefix_list : undef),
(defined $xpc ? $xpc : undef)
);
}
*serialize_c14n = \&toStringC14N;
*serialize_exc_c14n = \&toStringEC14N;
1;
#-------------------------------------------------------------------------#
# XML::LibXML::Document Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::Document;
use vars qw(@ISA);
@ISA = ('XML::LibXML::Node');
sub actualEncoding {
my $doc = shift;
my $enc = $doc->encoding;
return (defined $enc and length $enc) ? $enc : 'UTF-8';
}
sub setDocumentElement {
my $doc = shift;
my $element = shift;
my $oldelem = $doc->documentElement;
if ( defined $oldelem ) {
$doc->removeChild($oldelem);
}
$doc->_setDocumentElement($element);
}
sub toString {
my $self = shift;
my $flag = shift;
my $retval = "";
if ( defined $XML::LibXML::skipXMLDeclaration
and $XML::LibXML::skipXMLDeclaration == 1 ) {
foreach ( $self->childNodes ){
next if $_->nodeType == XML::LibXML::XML_DTD_NODE()
and $XML::LibXML::skipDTD;
$retval .= $_->toString;
}
}
else {
$flag ||= 0 unless defined $flag;
$retval = $self->_toString($flag);
}
return $retval;
}
sub serialize {
my $self = shift;
return $self->toString( @_ );
}
#-------------------------------------------------------------------------#
# bad style xinclude processing #
#-------------------------------------------------------------------------#
sub process_xinclude {
my $self = shift;
my $opts = shift;
XML::LibXML->new->processXIncludes( $self, $opts );
}
sub insertProcessingInstruction {
my $self = shift;
my $target = shift;
my $data = shift;
my $pi = $self->createPI( $target, $data );
my $root = $self->documentElement;
if ( defined $root ) {
# this is actually not correct, but i guess it's what the user
# intends
$self->insertBefore( $pi, $root );
}
else {
# if no documentElement was found we just append the PI
$self->appendChild( $pi );
}
}
sub insertPI {
my $self = shift;
$self->insertProcessingInstruction( @_ );
}
#-------------------------------------------------------------------------#
# DOM L3 Document functions.
# added after robins implicit feature requst
#-------------------------------------------------------------------------#
*getElementsByTagName = \&XML::LibXML::Element::getElementsByTagName;
*getElementsByTagNameNS = \&XML::LibXML::Element::getElementsByTagNameNS;
*getElementsByLocalName = \&XML::LibXML::Element::getElementsByLocalName;
1;
#-------------------------------------------------------------------------#
# XML::LibXML::DocumentFragment Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::DocumentFragment;
use vars qw(@ISA);
@ISA = ('XML::LibXML::Node');
sub toString {
my $self = shift;
my $retval = "";
if ( $self->hasChildNodes() ) {
foreach my $n ( $self->childNodes() ) {
$retval .= $n->toString(@_);
}
}
return $retval;
}
*serialize = \&toString;
1;
#-------------------------------------------------------------------------#
# XML::LibXML::Element Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::Element;
use vars qw(@ISA);
@ISA = ('XML::LibXML::Node');
use XML::LibXML qw(:ns :libxml);
use Carp;
sub setNamespace {
my $self = shift;
my $n = $self->nodeName;
if ( $self->_setNamespace(@_) ){
if ( scalar @_ < 3 || $_[2] == 1 ){
$self->setNodeName( $n );
}
return 1;
}
return 0;
}
sub getAttribute {
my $self = shift;
my $name = $_[0];
if ( $name =~ /^xmlns(?::|$)/ ) {
# user wants to get a namespace ...
(my $prefix = $name )=~s/^xmlns:?//;
$self->_getNamespaceDeclURI($prefix);
}
else {
$self->_getAttribute(@_);
}
}
sub setAttribute {
my ( $self, $name, $value ) = @_;
if ( $name =~ /^xmlns(?::|$)/ ) {
# user wants to set the special attribute for declaring XML namespace ...
# this is fine but not exactly DOM conformant behavior, btw (according to DOM we should
# probably declare an attribute which looks like XML namespace declaration
# but isn't)
(my $nsprefix = $name )=~s/^xmlns:?//;
my $nn = $self->nodeName;
if ( $nn =~ /^\Q${nsprefix}\E:/ ) {
# the element has the same prefix
$self->setNamespaceDeclURI($nsprefix,$value) ||
$self->setNamespace($value,$nsprefix,1);
##
## We set the namespace here.
## This is helpful, as in:
##
## | $e = XML::LibXML::Element->new('foo:bar');
## | $e->setAttribute('xmlns:foo','http://yoyodine')
##
}
else {
# just modify the namespace
$self->setNamespaceDeclURI($nsprefix, $value) ||
$self->setNamespace($value,$nsprefix,0);
}
}
else {
$self->_setAttribute($name, $value);
}
}
sub getAttributeNS {
my $self = shift;
my ($nsURI, $name) = @_;
croak("invalid attribute name") if !defined($name) or $name eq q{};
if ( defined($nsURI) and $nsURI eq XML_XMLNS_NS ) {
$self->_getNamespaceDeclURI($name eq 'xmlns' ? undef : $name);
}
else {
$self->_getAttributeNS(@_);
}
}
sub setAttributeNS {
my ($self, $nsURI, $qname, $value)=@_;
unless (defined $qname and length $qname) {
croak("bad name");
}
if (defined($nsURI) and $nsURI eq XML_XMLNS_NS) {
if ($qname !~ /^xmlns(?::|$)/) {
croak("NAMESPACE ERROR: Namespace declartions must have the prefix 'xmlns'");
}
$self->setAttribute($qname,$value); # see implementation above
return;
}
if ($qname=~/:/ and not (defined($nsURI) and length($nsURI))) {
croak("NAMESPACE ERROR: Attribute without a prefix cannot be in a namespace");
}
if ($qname=~/^xmlns(?:$|:)/) {
croak("NAMESPACE ERROR: 'xmlns' prefix and qualified-name are reserved for the namespace ".XML_XMLNS_NS);
}
if ($qname=~/^xml:/ and not (defined $nsURI and $nsURI eq XML_XML_NS)) {
croak("NAMESPACE ERROR: 'xml' prefix is reserved for the namespace ".XML_XML_NS);
}
$self->_setAttributeNS( defined $nsURI ? $nsURI : undef, $qname, $value );
}
sub getElementsByTagName {
my ( $node , $name ) = @_;
my $xpath = $name eq '*' ? "descendant::*" : "descendant::*[name()='$name']";
my @nodes = $node->_findnodes($xpath);
return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1);
}
sub getElementsByTagNameNS {
my ( $node, $nsURI, $name ) = @_;
my $xpath;
if ( $name eq '*' ) {
if ( $nsURI eq '*' ) {
$xpath = "descendant::*";
} else {
$xpath = "descendant::*[namespace-uri()='$nsURI']";
}
} elsif ( $nsURI eq '*' ) {
$xpath = "descendant::*[local-name()='$name']";
} else {
$xpath = "descendant::*[local-name()='$name' and namespace-uri()='$nsURI']";
}
my @nodes = $node->_findnodes($xpath);
return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1);
}
sub getElementsByLocalName {
my ( $node,$name ) = @_;
my $xpath;
if ($name eq '*') {
$xpath = "descendant::*";
} else {
$xpath = "descendant::*[local-name()='$name']";
}
my @nodes = $node->_findnodes($xpath);
return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1);
}
sub getChildrenByTagName {
my ( $node, $name ) = @_;
my @nodes;
if ($name eq '*') {
@nodes = grep { $_->nodeType == XML_ELEMENT_NODE() }
$node->childNodes();
} else {
@nodes = grep { $_->nodeName eq $name } $node->childNodes();
}
return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1);
}
sub getChildrenByLocalName {
my ( $node, $name ) = @_;
# my @nodes;
# if ($name eq '*') {
# @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() }
# $node->childNodes();
# } else {
# @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() and
# $_->localName eq $name } $node->childNodes();
# }
# return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1);
my @nodes = $node->_getChildrenByTagNameNS('*',$name);
return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1);
}
sub getChildrenByTagNameNS {
my ( $node, $nsURI, $name ) = @_;
my @nodes = $node->_getChildrenByTagNameNS($nsURI,$name);
return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1);
}
sub appendWellBalancedChunk {
my ( $self, $chunk ) = @_;
my $local_parser = XML::LibXML->new();
my $frag = $local_parser->parse_xml_chunk( $chunk );
$self->appendChild( $frag );
}
1;
#-------------------------------------------------------------------------#
# XML::LibXML::Text Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::Text;
use vars qw(@ISA);
@ISA = ('XML::LibXML::Node');
sub attributes { return undef; }
sub deleteDataString {
my $node = shift;
my $string = shift;
my $all = shift;
my $data = $node->nodeValue();
$string =~ s/([\\\*\+\^\{\}\&\?\[\]\(\)\$\%\@])/\\$1/g;
if ( $all ) {
$data =~ s/$string//g;
}
else {
$data =~ s/$string//;
}
$node->setData( $data );
}
sub replaceDataString {
my ( $node, $left, $right,$all ) = @_;
#ashure we exchange the strings and not expressions!
$left =~ s/([\\\*\+\^\{\}\&\?\[\]\(\)\$\%\@])/\\$1/g;
my $datastr = $node->nodeValue();
if ( $all ) {
$datastr =~ s/$left/$right/g;
}
else{
$datastr =~ s/$left/$right/;
}
$node->setData( $datastr );
}
sub replaceDataRegEx {
my ( $node, $leftre, $rightre, $flags ) = @_;
return unless defined $leftre;
$rightre ||= "";
my $datastr = $node->nodeValue();
my $restr = "s/" . $leftre . "/" . $rightre . "/";
$restr .= $flags if defined $flags;
eval '$datastr =~ '. $restr;
$node->setData( $datastr );
}
1;
package XML::LibXML::Comment;
use vars qw(@ISA);
@ISA = ('XML::LibXML::Text');
1;
package XML::LibXML::CDATASection;
use vars qw(@ISA);
@ISA = ('XML::LibXML::Text');
1;
#-------------------------------------------------------------------------#
# XML::LibXML::Attribute Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::Attr;
use vars qw( @ISA ) ;
@ISA = ('XML::LibXML::Node') ;
sub setNamespace {
my ($self,$href,$prefix) = @_;
my $n = $self->nodeName;
if ( $self->_setNamespace($href,$prefix) ) {
$self->setNodeName($n);
return 1;
}
return 0;
}
1;
#-------------------------------------------------------------------------#
# XML::LibXML::Dtd Interface #
#-------------------------------------------------------------------------#
# this is still under construction
#
package XML::LibXML::Dtd;
use vars qw( @ISA );
@ISA = ('XML::LibXML::Node');
# at least DESTROY and CLONE_SKIP must be inherited
1;
#-------------------------------------------------------------------------#
# XML::LibXML::PI Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::PI;
use vars qw( @ISA );
@ISA = ('XML::LibXML::Node');
sub setData {
my $pi = shift;
my $string = "";
if ( scalar @_ == 1 ) {
$string = shift;
}
else {
my %h = @_;
$string = join " ", map {$_.'="'.$h{$_}.'"'} keys %h;
}
# the spec says any char but "?>" [17]
$pi->_setData( $string ) unless $string =~ /\?>/;
}
1;
#-------------------------------------------------------------------------#
# XML::LibXML::Namespace Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::Namespace;
sub CLONE_SKIP { 1 }
# this is infact not a node!
sub prefix { return "xmlns"; }
sub getPrefix { return "xmlns"; }
sub getNamespaceURI { return "http://www.w3.org/2000/xmlns/" };
sub getNamespaces { return (); }
sub nodeName {
my $self = shift;
my $nsP = $self->localname;
return ( defined($nsP) && length($nsP) ) ? "xmlns:$nsP" : "xmlns";
}
sub name { goto &nodeName }
sub getName { goto &nodeName }
sub isEqualNode {
my ( $self, $ref ) = @_;
if ( ref($ref) eq "XML::LibXML::Namespace" ) {
return $self->_isEqual($ref);
}
return 0;
}
sub isSameNode {
my ( $self, $ref ) = @_;
if ( $$self == $$ref ){
return 1;
}
return 0;
}
1;
#-------------------------------------------------------------------------#
# XML::LibXML::NamedNodeMap Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::NamedNodeMap;
use XML::LibXML qw(:libxml);
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
sub new {
my $class = shift;
my $self = bless { Nodes => [@_] }, $class;
$self->{NodeMap} = { map { $_->nodeName => $_ } @_ };
return $self;
}
sub length { return scalar( @{$_[0]->{Nodes}} ); }
sub nodes { return $_[0]->{Nodes}; }
sub item { $_[0]->{Nodes}->[$_[1]]; }
sub getNamedItem {
my $self = shift;
my $name = shift;
return $self->{NodeMap}->{$name};
}
sub setNamedItem {
my $self = shift;
my $node = shift;
my $retval;
if ( defined $node ) {
if ( scalar @{$self->{Nodes}} ) {
my $name = $node->nodeName();
if ( $node->nodeType() == XML_NAMESPACE_DECL ) {
return;
}
if ( defined $self->{NodeMap}->{$name} ) {
if ( $node->isSameNode( $self->{NodeMap}->{$name} ) ) {
return;
}
$retval = $self->{NodeMap}->{$name}->replaceNode( $node );
}
else {
$self->{Nodes}->[0]->addSibling($node);
}
$self->{NodeMap}->{$name} = $node;
push @{$self->{Nodes}}, $node;
}
else {
# not done yet
# can this be properly be done???
warn "not done yet\n";
}
}
return $retval;
}
sub removeNamedItem {
my $self = shift;
my $name = shift;
my $retval;
if ( $name =~ /^xmlns/ ) {
warn "not done yet\n";
}
elsif ( exists $self->{NodeMap}->{$name} ) {
$retval = $self->{NodeMap}->{$name};
$retval->unbindNode;
delete $self->{NodeMap}->{$name};
$self->{Nodes} = [grep {not($retval->isSameNode($_))} @{$self->{Nodes}}];
}
return $retval;
}
sub getNamedItemNS {
my $self = shift;
my $nsURI = shift;
my $name = shift;
return undef;
}
sub setNamedItemNS {
my $self = shift;
my $nsURI = shift;
my $node = shift;
return undef;
}
sub removeNamedItemNS {
my $self = shift;
my $nsURI = shift;
my $name = shift;
return undef;
}
1;
package XML::LibXML::_SAXParser;
# this is pseudo class!!! and it will be removed as soon all functions
# moved to XS level
use XML::SAX::Exception;
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
# these functions will use SAX exceptions as soon i know how things really work
sub warning {
my ( $parser, $message, $line, $col ) = @_;
my $error = XML::SAX::Exception::Parse->new( LineNumber => $line,
ColumnNumber => $col,
Message => $message, );
$parser->{HANDLER}->warning( $error );
}
sub error {
my ( $parser, $message, $line, $col ) = @_;
my $error = XML::SAX::Exception::Parse->new( LineNumber => $line,
ColumnNumber => $col,
Message => $message, );
$parser->{HANDLER}->error( $error );
}
sub fatal_error {
my ( $parser, $message, $line, $col ) = @_;
my $error = XML::SAX::Exception::Parse->new( LineNumber => $line,
ColumnNumber => $col,
Message => $message, );
$parser->{HANDLER}->fatal_error( $error );
}
1;
package XML::LibXML::RelaxNG;
sub CLONE_SKIP { 1 }
sub new {
my $class = shift;
my %args = @_;
my $self = undef;
if ( defined $args{location} ) {
$self = $class->parse_location( $args{location} );
}
elsif ( defined $args{string} ) {
$self = $class->parse_buffer( $args{string} );
}
elsif ( defined $args{DOM} ) {
$self = $class->parse_document( $args{DOM} );
}
return $self;
}
1;
package XML::LibXML::Schema;
sub CLONE_SKIP { 1 }
sub new {
my $class = shift;
my %args = @_;
my $self = undef;
if ( defined $args{location} ) {
$self = $class->parse_location( $args{location} );
}
elsif ( defined $args{string} ) {
$self = $class->parse_buffer( $args{string} );
}
return $self;
}
1;
#-------------------------------------------------------------------------#
# XML::LibXML::Pattern Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::Pattern;
sub CLONE_SKIP { 1 }
sub new {
my $class = shift;
my ($pattern,$ns_map)=@_;
my $self = undef;
unless (UNIVERSAL::can($class,'_compilePattern')) {
croak("Cannot create XML::LibXML::Pattern - ".
"your libxml2 is compiled without pattern support!");
}
if (ref($ns_map) eq 'HASH') {
# translate prefix=>URL hash to a (URL,prefix) list
$self = $class->_compilePattern($pattern,0,[reverse %$ns_map]);
} else {
$self = $class->_compilePattern($pattern,0);
}
return $self;
}
1;
#-------------------------------------------------------------------------#
# XML::LibXML::RegExp Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::RegExp;
sub CLONE_SKIP { 1 }
sub new {
my $class = shift;
my ($regexp)=@_;
unless (UNIVERSAL::can($class,'_compile')) {
croak("Cannot create XML::LibXML::RegExp - ".
"your libxml2 is compiled without regexp support!");
}
return $class->_compile($regexp);
}
1;
#-------------------------------------------------------------------------#
# XML::LibXML::XPathExpression Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::XPathExpression;
sub CLONE_SKIP { 1 }
1;
#-------------------------------------------------------------------------#
# XML::LibXML::InputCallback Interface #
#-------------------------------------------------------------------------#
package XML::LibXML::InputCallback;
use vars qw($_CUR_CB @_GLOBAL_CALLBACKS @_CB_STACK);
BEGIN {
$_CUR_CB = undef;
@_GLOBAL_CALLBACKS = ();
@_CB_STACK = ();
}
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
#-------------------------------------------------------------------------#
# global callbacks #
#-------------------------------------------------------------------------#
sub _callback_match {
my $uri = shift;
my $retval = 0;
# loop through the callbacks and and find the first matching
# The callbacks are stored in execution order (reverse stack order)
# any new global callbacks are shifted to the callback stack.
foreach my $cb ( @_GLOBAL_CALLBACKS ) {
# callbacks have to return 1, 0 or undef, while 0 and undef
# are handled the same way.
# in fact, if callbacks return other values, the global match
# assumes silently that the callback failed.
$retval = $cb->[0]->($uri);
if ( defined $retval and $retval == 1 ) {
# make the other callbacks use this callback
$_CUR_CB = $cb;
unshift @_CB_STACK, $cb;
last;
}
}
return $retval;
}
sub _callback_open {
my $uri = shift;
my $retval = undef;
# the open callback has to return a defined value.
# if one works on files this can be a file handle. But
# depending on the needs of the callback it also can be a
# database handle or a integer labeling a certain dataset.
if ( defined $_CUR_CB ) {
$retval = $_CUR_CB->[1]->( $uri );
# reset the callbacks, if one callback cannot open an uri
if ( not defined $retval or $retval == 0 ) {
shift @_CB_STACK;
$_CUR_CB = $_CB_STACK[0];
}
}
return $retval;
}
sub _callback_read {
my $fh = shift;
my $buflen = shift;
my $retval = undef;
if ( defined $_CUR_CB ) {
$retval = $_CUR_CB->[2]->( $fh, $buflen );
}
return $retval;
}
sub _callback_close {
my $fh = shift;
my $retval = 0;
if ( defined $_CUR_CB ) {
$retval = $_CUR_CB->[3]->( $fh );
shift @_CB_STACK;
$_CUR_CB = $_CB_STACK[0];
}
return $retval;
}
#-------------------------------------------------------------------------#
# member functions and methods #
#-------------------------------------------------------------------------#
sub new {
my $CLASS = shift;
return bless {'_CALLBACKS' => []}, $CLASS;
}
# add a callback set to the callback stack
# synopsis: $icb->register_callbacks( [$match_cb, $open_cb, $read_cb, $close_cb] );
sub register_callbacks {
my $self = shift;
my $cbset = shift;
# test if callback set is complete
if ( ref $cbset eq "ARRAY" and scalar( @$cbset ) == 4 ) {
unshift @{$self->{_CALLBACKS}}, $cbset;
}
}
# remove a callback set to the callback stack
# if a callback set is passed, this function will check for the match function
sub unregister_callbacks {
my $self = shift;
my $cbset = shift;
if ( ref $cbset eq "ARRAY" and scalar( @$cbset ) == 4 ) {
$self->{_CALLBACKS} = [grep { $_->[0] != $cbset->[0] } @{$self->{_CALLBACKS}}];
}
else {
shift @{$self->{_CALLBACKS}};
}
}
# make libxml2 use the callbacks
sub init_callbacks {
my $self = shift;
$_CUR_CB = undef;
@_CB_STACK = ();
@_GLOBAL_CALLBACKS = @{ $self->{_CALLBACKS} };
if ( defined $XML::LibXML::match_cb and
defined $XML::LibXML::open_cb and
defined $XML::LibXML::read_cb and
defined $XML::LibXML::close_cb ) {
push @_GLOBAL_CALLBACKS, [$XML::LibXML::match_cb,
$XML::LibXML::open_cb,
$XML::LibXML::read_cb,
$XML::LibXML::close_cb];
}
$self->lib_init_callbacks();
}
# reset libxml2's callbacks
sub cleanup_callbacks {
my $self = shift;
$_CUR_CB = undef;
@_GLOBAL_CALLBACKS = ();
@_CB_STACK = ();
$self->lib_cleanup_callbacks();
}
$XML::LibXML::__loaded=1;
1;
__END__
PK @[ܖ: :
LibXML.podnu W+A =head1 NAME
XML::LibXML - Perl Binding for libxml2
=head1 SYNOPSIS
use XML::LibXML;
my $dom = XML::LibXML->load_xml(string => <<'EOT');
EOT
$Version_String = XML::LibXML::LIBXML_DOTTED_VERSION;
$Version_ID = XML::LibXML::LIBXML_VERSION;
$DLL_Version = XML::LibXML::LIBXML_RUNTIME_VERSION;
$libxmlnode = XML::LibXML->import_GDOME( $node, $deep );
$gdomenode = XML::LibXML->export_GDOME( $node, $deep );
=head1 DESCRIPTION
This module is an interface to libxml2, providing XML and HTML parsers with
DOM, SAX and XMLReader interfaces, a large subset of DOM Layer 3 interface and
a XML::XPath-like interface to XPath API of libxml2. The module is split into
several packages which are not described in this section; unless stated
otherwise, you only need to C<<<<<< use XML::LibXML; >>>>>> in your programs.
For further information, please check the following documentation:
=over 4
=item L<<<<<< XML::LibXML::Parser >>>>>>
Parsing XML files with XML::LibXML
=item L<<<<<< XML::LibXML::DOM >>>>>>
XML::LibXML Document Object Model (DOM) Implementation
=item L<<<<<< XML::LibXML::SAX >>>>>>
XML::LibXML direct SAX parser
=item L<<<<<< XML::LibXML::Reader >>>>>>
Reading XML with a pull-parser
=item L<<<<<< XML::LibXML::Dtd >>>>>>
XML::LibXML frontend for DTD validation
=item L<<<<<< XML::LibXML::RelaxNG >>>>>>
XML::LibXML frontend for RelaxNG schema validation
=item L<<<<<< XML::LibXML::Schema >>>>>>
XML::LibXML frontend for W3C Schema schema validation
=item L<<<<<< XML::LibXML::XPathContext >>>>>>
API for evaluating XPath expressions with enhanced support for the evaluation
context
=item L<<<<<< XML::LibXML::InputCallback >>>>>>
Implementing custom URI Resolver and input callbacks
=item L<<<<<< XML::LibXML::Common >>>>>>
Common functions for XML::LibXML related Classes
=back
The nodes in the Document Object Model (DOM) are represented by the following
classes (most of which "inherit" from L<<<<<< XML::LibXML::Node >>>>>>):
=over 4
=item L<<<<<< XML::LibXML::Document >>>>>>
XML::LibXML class for DOM document nodes
=item L<<<<<< XML::LibXML::Node >>>>>>
Abstract base class for XML::LibXML DOM nodes
=item L<<<<<< XML::LibXML::Element >>>>>>
XML::LibXML class for DOM element nodes
=item L<<<<<< XML::LibXML::Text >>>>>>
XML::LibXML class for DOM text nodes
=item L<<<<<< XML::LibXML::Comment >>>>>>
XML::LibXML class for comment DOM nodes
=item L<<<<<< XML::LibXML::CDATASection >>>>>>
XML::LibXML class for DOM CDATA sections
=item L<<<<<< XML::LibXML::Attr >>>>>>
XML::LibXML DOM attribute class
=item L<<<<<< XML::LibXML::DocumentFragment >>>>>>
XML::LibXML's DOM L2 Document Fragment implementation
=item L<<<<<< XML::LibXML::Namespace >>>>>>
XML::LibXML DOM namespace nodes
=item L<<<<<< XML::LibXML::PI >>>>>>
XML::LibXML DOM processing instruction nodes
=back
=head1 ENCODINGS SUPPORT IN XML::LIBXML
Recall that since version 5.6.1, Perl distinguishes between character strings
(internally encoded in UTF-8) and so called binary data and, accordingly,
applies either character or byte semantics to them. A scalar representing a
character string is distinguished from a byte string by special flag (UTF8).
Please refer to I<<<<<< perlunicode >>>>>> for details.
XML::LibXML's API is designed to deal with many encodings of XML documents
completely transparently, so that the application using XML::LibXML can be
completely ignorant about the encoding of the XML documents it works with. On
the other hand, functions like C<<<<<< XML::LibXML::Document->setEncoding >>>>>> give the user control over the document encoding.
To ensure the aforementioned transparency and uniformity, most functions of
XML::LibXML that work with in-memory trees accept and return data as character
strings (i.e. UTF-8 encoded with the UTF8 flag on) regardless of the original
document encoding; however, the functions related to I/O operations (i.e.
parsing and saving) operate with binary data (in the original document
encoding) obeying the encoding declaration of the XML documents.
Below we summarize basic rules and principles regarding encoding:
=over 4
=item 1.
Do NOT apply any encoding-related PerlIO layers (C<<<<<< :utf8 >>>>>> or C<<<<<< :encoding(...) >>>>>>) to file handles that are an input for the parses or an output for a
serializer of (full) XML documents. This is because the conversion of the data
to/from the internal character representation is provided by libxml2 itself
which must be able to enforce the encoding specified by the C<<<<<< >>>>>> declaration. Here is an example to follow:
use XML::LibXML;
open my $fh, "file.xml";
binmode $fh; # drop all PerlIO layers possibly created by a use open pragma
$doc = XML::LibXML->load_xml(IO => $fh);
open my $out, "out.xml";
binmode $fh; # as above
$doc->toFh($fh);
# or
print $fh $doc->toString();
=item 2.
All functions working with DOM accept and return character strings (UTF-8
encoded with UTF8 flag on). E.g.
my $doc = XML::LibXML:Document->new('1.0',$some_encoding);
my $element = $doc->createElement($name);
$element->appendText($text);
$xml_fragment = $element->toString(); # returns a character string
$xml_document = $doc->toString(); # returns a byte string
where C<<<<<< $some_encoding >>>>>> is the document encoding that will be used when saving the document, and C<<<<<< $name >>>>>> and C<<<<<< $text >>>>>> contain character strings (UTF-8 encoded with UTF8 flag on). Note that the
method C<<<<<< toString >>>>>> returns XML as a character string if applied to other node than the Document
node and a byte string containing the apropriate
declaration if applied to a L<<<<<< XML::LibXML::Document >>>>>>.
=item 3.
DOM methods also accept binary strings in the original encoding of the document
to which the node belongs (UTF-8 is assumed if the node is not attached to any
document). Exploiting this feature is NOT RECOMMENDED since it is considered a
bad practice.
my $doc = XML::LibXML:Document->new('1.0','iso-8859-2');
my $text = $doc->createTextNode($some_latin2_encoded_byte_string);
# WORKS, BUT NOT RECOMMENDED!
=back
I<<<<<< NOTE: >>>>>> libxml2 support for many encodings is based on the iconv library. The actual
list of supported encodings may vary from platform to platform. To test if your
platform works correctly with your language encoding, build a simple document
in the particular encoding and try to parse it with XML::LibXML to see if the
parser produces any errors. Occasional crashes were reported on rare platforms
that ship with a broken version of iconv.
=head1 THREAD SUPPORT
XML::LibXML since 1.67 partially supports Perl threads in Perl >= 5.8.8.
XML::LibXML can be used with threads in two ways:
By default, all XML::LibXML classes use CLONE_SKIP class method to prevent Perl
from copying XML::LibXML::* objects when a new thread is spawn. In this mode,
all XML::LibXML::* objects are thread specific. This is the safest way to work
with XML::LibXML in threads.
Alternatively, one may use
use threads;
use XML::LibXML qw(:threads_shared);
to indicate, that all XML::LibXML node and parser objects should be shared
between the main thread and any thread spawn from there. For example, in
my $doc = XML::LibXML->load_xml(location => $filename);
my $thr = threads->new(sub{
# code working with $doc
1;
});
$thr->join;
the variable C<<<<<< $doc >>>>>> refers to the exact same XML::LibXML::Document in the spawned thread as in the
main thread.
Without using mutex locks, oaralel threads may read the same document (i.e. any
node that belongs to the document), parse files, and modify different
documents.
However, if there is a chance that some of the threads will attempt to modify a
document ( or even create new nodes based on that document, e.g. with C<<<<<< $doc->createElement >>>>>>) that other threads may be reading at the same time, the user is responsible
for creating a mutex lock and using it in I<<<<<< both >>>>>> in the thread that modifies and the thread that reads:
my $doc = XML::LibXML->load_xml(location => $filename);
my $mutex : shared;
my $thr = threads->new(sub{
lock $mutex;
my $el = $doc->createElement('foo');
# ...
1;
});
{
lock $mutex;
my $root = $doc->documentElement;
say $root->name;
}
$thr->join;
Note that libxml2 uses dictionaries to store short strings and these
dicionaries are kept on a document node. Without mutex locks, it could happen
in the previous example that the thread modifies the dictionary while other
threads attempt to read from it, which could easily lead to a crash.
=head1 VERSION INFORMATION
Sometimes it is useful to figure out, for which version XML::LibXML was
compiled for. In most cases this is for debugging or to check if a given
installation meets all functionality for the package. The functions
XML::LibXML::LIBXML_DOTTED_VERSION and XML::LibXML::LIBXML_VERSION provide this
version information. Both functions simply pass through the values of the
similar named macros of libxml2. Similarly, XML::LibXML::LIBXML_RUNTIME_VERSION
returns the version of the (usually dynamically) linked libxml2.
=over 4
=item XML::LibXML::LIBXML_DOTTED_VERSION
$Version_String = XML::LibXML::LIBXML_DOTTED_VERSION;
Returns the version string of the libxml2 version XML::LibXML was compiled for.
This will be "2.6.2" for "libxml2 2.6.2".
=item XML::LibXML::LIBXML_VERSION
$Version_ID = XML::LibXML::LIBXML_VERSION;
Returns the version id of the libxml2 version XML::LibXML was compiled for.
This will be "20602" for "libxml2 2.6.2". Don't mix this version id with
$XML::LibXML::VERSION. The latter contains the version of XML::LibXML itself
while the first contains the version of libxml2 XML::LibXML was compiled for.
=item XML::LibXML::LIBXML_RUNTIME_VERSION
$DLL_Version = XML::LibXML::LIBXML_RUNTIME_VERSION;
Returns a version string of the libxml2 which is (usually dynamically) linked
by XML::LibXML. This will be "20602" for libxml2 released as "2.6.2" and
something like "20602-CVS2032" for a CVS build of libxml2.
XML::LibXML issues a warning if the version of libxml2 dynamically linked to it
is less than the version of libxml2 which it was compiled against.
=back
=head1 EXPORTS
By default the module exports all constants and functions listed in the :all
tag, described below.
=head1 EXPORT TAGS
=over 4
=item C<<<<<< :all >>>>>>
Includes the tags C<<<<<< :libxml >>>>>>, C<<<<<< :encoding >>>>>>, and C<<<<<< :ns >>>>>> described below.
=item C<<<<<< :libxml >>>>>>
Exports integer constants for DOM node types.
XML_ELEMENT_NODE => 1
XML_ATTRIBUTE_NODE => 2
XML_TEXT_NODE => 3
XML_CDATA_SECTION_NODE => 4
XML_ENTITY_REF_NODE => 5
XML_ENTITY_NODE => 6
XML_PI_NODE => 7
XML_COMMENT_NODE => 8
XML_DOCUMENT_NODE => 9
XML_DOCUMENT_TYPE_NODE => 10
XML_DOCUMENT_FRAG_NODE => 11
XML_NOTATION_NODE => 12
XML_HTML_DOCUMENT_NODE => 13
XML_DTD_NODE => 14
XML_ELEMENT_DECL => 15
XML_ATTRIBUTE_DECL => 16
XML_ENTITY_DECL => 17
XML_NAMESPACE_DECL => 18
XML_XINCLUDE_START => 19
XML_XINCLUDE_END => 20
=item C<<<<<< :encoding >>>>>>
Exports two encoding conversion functions from XML::LibXML::Common.
encodeToUTF8()
decodeFromUTF8()
=item C<<<<<< :ns >>>>>>
Exports two convenience constants: the implicit namespace of the reserved C<<<<<< xml: >>>>>> prefix, and the implicit namespace for the reserved C<<<<<< xmlns: >>>>>> prefix.
XML_XML_NS => 'http://www.w3.org/XML/1998/namespace'
XML_XMLNS_NS => 'http://www.w3.org/2000/xmlns/'
=back
=head1 RELATED MODULES
The modules described in this section are not part of the XML::LibXML package
itself. As they support some additional features, they are mentioned here.
=over 4
=item L<<<<<< XML::LibXSLT >>>>>>
XSLT 1.0 Processor using libxslt and XML::LibXML
=item L<<<<<< XML::LibXML::Iterator >>>>>>
XML::LibXML Implementation of the DOM Traversal Specification
=item L<<<<<< XML::CompactTree::XS >>>>>>
Uses XML::LibXML::Reader to very efficiently to parse XML document or element
into native Perl data structures, which are less flexible but significantly
faster to process then DOM.
=back
=head1 XML::LIBXML AND XML::GDOME
Note: I<<<<<< THE FUNCTIONS DESCRIBED HERE ARE STILL EXPERIMENTAL >>>>>>
Although both modules make use of libxml2's XML capabilities, the DOM
implementation of both modules are not compatible. But still it is possible to
exchange nodes from one DOM to the other. The concept of this exchange is
pretty similar to the function cloneNode(): The particular node is copied on
the low-level to the opposite DOM implementation.
Since the DOM implementations cannot coexist within one document, one is forced
to copy each node that should be used. Because you are always keeping two nodes
this may cause quite an impact on a machines memory usage.
XML::LibXML provides two functions to export or import GDOME nodes:
import_GDOME() and export_GDOME(). Both function have two parameters: the node
and a flag for recursive import. The flag works as in cloneNode().
The two functions allow to export and import XML::GDOME nodes explicitly,
however, XML::LibXML allows also the transparent import of XML::GDOME nodes in
functions such as appendChild(), insertAfter() and so on. While native nodes
are automatically adopted in most functions XML::GDOME nodes are always cloned
in advance. Thus if the original node is modified after the operation, the node
in the XML::LibXML document will not have this information.
=over 4
=item import_GDOME
$libxmlnode = XML::LibXML->import_GDOME( $node, $deep );
This clones an XML::GDOME node to a XML::LibXML node explicitly.
=item export_GDOME
$gdomenode = XML::LibXML->export_GDOME( $node, $deep );
Allows to clone an XML::LibXML node into a XML::GDOME node.
=back
=head1 CONTACTS
For bug reports, please use the CPAN request tracker on
http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-LibXML
For suggestions etc., and other issues related to XML::LibXML you may use the
perl XML mailing list (C<<<<<< perl-xml@listserv.ActiveState.com >>>>>>), where most XML-related Perl modules are discussed. In case of problems you
should check the archives of that list first. Many problems are already
discussed there. You can find the list's archives and subscription options at L<<<<<< http://aspn.activestate.com/ASPN/Mail/Browse/Threaded/perl-xml >>>>>>.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[no LibXML/XPathContext.pmnu W+A # $Id: XPathContext.pm 422 2002-11-08 17:10:30Z phish $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::XPathContext;
use strict;
use vars qw($VERSION @ISA $USE_LIBXML_DATA_TYPES);
use Carp;
use XML::LibXML;
use XML::LibXML::NodeList;
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
# should LibXML XPath data types be used for simple objects
# when passing parameters to extension functions (default: no)
$USE_LIBXML_DATA_TYPES = 0;
sub CLONE_SKIP { 1 }
sub findnodes {
my ($self, $xpath, $node) = @_;
my @nodes = $self->_guarded_find_call('_findnodes', $node, $xpath);
if (wantarray) {
return @nodes;
}
else {
return XML::LibXML::NodeList->new(@nodes);
}
}
sub find {
my ($self, $xpath, $node) = @_;
my ($type, @params) = $self->_guarded_find_call('_find', $node, $xpath,0);
if ($type) {
return $type->new(@params);
}
return undef;
}
sub exists {
my ($self, $xpath, $node) = @_;
my (undef, $value) = $self->_guarded_find_call('_find', $node, $xpath,1);
return $value;
}
sub findvalue {
my $self = shift;
return $self->find(@_)->to_literal->value;
}
sub _guarded_find_call {
my ($self, $method, $node)=(shift,shift,shift);
my $prev_node;
if (ref($node)) {
$prev_node = $self->getContextNode();
$self->setContextNode($node);
}
my @ret;
eval {
@ret = $self->$method(@_);
};
$self->_free_node_pool;
$self->setContextNode($prev_node) if ref($node);
if ($@) {
my $err = $@;
chomp $err;
croak $err;
}
return @ret;
}
sub registerFunction {
my ($self, $name, $sub) = @_;
$self->registerFunctionNS($name, undef, $sub);
return;
}
sub unregisterNs {
my ($self, $prefix) = @_;
$self->registerNs($prefix, undef);
return;
}
sub unregisterFunction {
my ($self, $name) = @_;
$self->registerFunctionNS($name, undef, undef);
return;
}
sub unregisterFunctionNS {
my ($self, $name, $ns) = @_;
$self->registerFunctionNS($name, $ns, undef);
return;
}
sub unregisterVarLookupFunc {
my ($self) = @_;
$self->registerVarLookupFunc(undef, undef);
return;
}
# extension function perl dispatcher
# borrowed from XML::LibXSLT
sub _perl_dispatcher {
my $func = shift;
my @params = @_;
my @perlParams;
my $i = 0;
while (@params) {
my $type = shift(@params);
if ($type eq 'XML::LibXML::Literal' or
$type eq 'XML::LibXML::Number' or
$type eq 'XML::LibXML::Boolean')
{
my $val = shift(@params);
unshift(@perlParams, $USE_LIBXML_DATA_TYPES ? $type->new($val) : $val);
}
elsif ($type eq 'XML::LibXML::NodeList') {
my $node_count = shift(@params);
unshift(@perlParams, $type->new(splice(@params, 0, $node_count)));
}
}
$func = "main::$func" unless ref($func) || $func =~ /(.+)::/;
no strict 'refs';
my $res = $func->(@perlParams);
return $res;
}
1;
PK @[+>k- k- LibXML/XPathContext.podnu W+A =head1 NAME
XML::LibXML::XPathContext - XPath Evaluation
=head1 SYNOPSIS
my $xpc = XML::LibXML::XPathContext->new();
my $xpc = XML::LibXML::XPathContext->new($node);
$xpc->registerNs($prefix, $namespace_uri)
$xpc->unregisterNs($prefix)
$uri = $xpc->lookupNs($prefix)
$xpc->registerVarLookupFunc($callback, $data)
$data = $xpc->getVarLookupData();
$callback = $xpc->getVarLookupFunc();
$xpc->unregisterVarLookupFunc($name);
$xpc->registerFunctionNS($name, $uri, $callback)
$xpc->unregisterFunctionNS($name, $uri)
$xpc->registerFunction($name, $callback)
$xpc->unregisterFunction($name)
@nodes = $xpc->findnodes($xpath)
@nodes = $xpc->findnodes($xpath, $context_node )
$nodelist = $xpc->findnodes($xpath, $context_node )
$object = $xpc->find($xpath )
$object = $xpc->find($xpath, $context_node )
$value = $xpc->findvalue($xpath )
$value = $xpc->findvalue($xpath, $context_node )
$bool = $xpc->exists( $xpath_expression, $context_node );
$xpc->setContextNode($node)
my $node = $xpc->getContextNode;
$xpc->setContextPosition($position)
my $position = $xpc->getContextPosition;
$xpc->setContextSize($size)
my $size = $xpc->getContextSize;
$xpc->setContextNode($node)
The XML::LibXML::XPathContext class provides an almost complete interface to
libxml2's XPath implementation. With XML::LibXML::XPathContext is is possible
to evaluate XPath expressions in the context of arbitrary node, context size,
and context position, with a user-defined namespace-prefix mapping, custom
XPath functions written in Perl, and even a custom XPath variable resolver.
=head1 EXAMPLES
=head2 Namespaces
This example demonstrates C<<<<<< registerNs() >>>>>> method. It finds all paragraph nodes in an XHTML document.
my $xc = XML::LibXML::XPathContext->new($xhtml_doc);
$xc->registerNs('xhtml', 'http://www.w3.org/1999/xhtml');
my @nodes = $xc->findnodes('//xhtml:p');
=head2 Custom XPath functions
This example demonstrates C<<<<<< registerFunction() >>>>>> method by defining a function filtering nodes based on a Perl regular
expression:
sub grep_nodes {
my ($nodelist,$regexp) = @_;
my $result = XML::LibXML::NodeList->new;
for my $node ($nodelist->get_nodelist()) {
$result->push($node) if $node->textContent =~ $regexp;
}
return $result;
};
my $xc = XML::LibXML::XPathContext->new($node);
$xc->registerFunction('grep_nodes', \&grep_nodes);
my @nodes = $xc->findnodes('//section[grep_nodes(para,"\bsearch(ing|es)?\b")]');
=head2 Variables
This example demonstrates C<<<<<< registerVarLookup() >>>>>> method. We use XPath variables to recycle results of previous evaluations:
sub var_lookup {
my ($varname,$ns,$data)=@_;
return $data->{$varname};
}
my $areas = XML::LibXML->new->parse_file('areas.xml');
my $empl = XML::LibXML->new->parse_file('employees.xml');
my $xc = XML::LibXML::XPathContext->new($empl);
my %variables = (
A => $xc->find('/employees/employee[@salary>10000]'),
B => $areas->find('/areas/area[district='Brooklyn']/street'),
);
# get names of employees from $A working in an area listed in $B
$xc->registerVarLookupFunc(\&var_lookup, \%variables);
my @nodes = $xc->findnodes('$A[work_area/street = $B]/name');
=head1 METHODS
=over 4
=item new
my $xpc = XML::LibXML::XPathContext->new();
Creates a new XML::LibXML::XPathContext object without a context node.
my $xpc = XML::LibXML::XPathContext->new($node);
Creates a new XML::LibXML::XPathContext object with the context node set to C<<<<<< $node >>>>>>.
=item registerNs
$xpc->registerNs($prefix, $namespace_uri)
Registers namespace C<<<<<< $prefix >>>>>> to C<<<<<< $namespace_uri >>>>>>.
=item unregisterNs
$xpc->unregisterNs($prefix)
Unregisters namespace C<<<<<< $prefix >>>>>>.
=item lookupNs
$uri = $xpc->lookupNs($prefix)
Returns namespace URI registered with C<<<<<< $prefix >>>>>>. If C<<<<<< $prefix >>>>>> is not registered to any namespace URI returns C<<<<<< undef >>>>>>.
=item registerVarLookupFunc
$xpc->registerVarLookupFunc($callback, $data)
Registers variable lookup function C<<<<<< $prefix >>>>>>. The registered function is executed by the XPath engine each time an XPath
variable is evaluated. It takes three arguments: C<<<<<< $data >>>>>>, variable name, and variable ns-URI and must return one value: a number or
string or any C<<<<<< XML::LibXML:: >>>>>> object that can be a result of findnodes: Boolean, Literal, Number, Node (e.g.
Document, Element, etc.), or NodeList. For convenience, simple (non-blessed)
array references containing only L<<<<<< XML::LibXML::Node >>>>>> objects can be used instead of a L<<<<<< XML::LibXML::NodeList >>>>>>.
=item getVarLookupData
$data = $xpc->getVarLookupData();
Returns the data that have been associated with a variable lookup function
during a previous call to C<<<<<< registerVarLookupFunc >>>>>>.
=item getVarLookupFunc
$callback = $xpc->getVarLookupFunc();
Returns the variable lookup function previously registered with C<<<<<< registerVarLookupFunc >>>>>>.
=item unregisterVarLookupFunc
$xpc->unregisterVarLookupFunc($name);
Unregisters variable lookup function and the associated lookup data.
=item registerFunctionNS
$xpc->registerFunctionNS($name, $uri, $callback)
Registers an extension function C<<<<<< $name >>>>>> in C<<<<<< $uri >>>>>> namespace. C<<<<<< $callback >>>>>> must be a CODE reference. The arguments of the callback function are either
simple scalars or C<<<<<< XML::LibXML::* >>>>>> objects depending on the XPath argument types. The function is responsible for
checking the argument number and types. Result of the callback code must be a
single value of the following types: a simple scalar (number, string) or an
arbitrary C<<<<<< XML::LibXML::* >>>>>> object that can be a result of findnodes: Boolean, Literal, Number, Node (e.g.
Document, Element, etc.), or NodeList. For convenience, simple (non-blessed)
array references containing only L<<<<<< XML::LibXML::Node >>>>>> objects can be used instead of a L<<<<<< XML::LibXML::NodeList >>>>>>.
=item unregisterFunctionNS
$xpc->unregisterFunctionNS($name, $uri)
Unregisters extension function C<<<<<< $name >>>>>> in C<<<<<< $uri >>>>>> namespace. Has the same effect as passing C<<<<<< undef >>>>>> as C<<<<<< $callback >>>>>> to registerFunctionNS.
=item registerFunction
$xpc->registerFunction($name, $callback)
Same as C<<<<<< registerFunctionNS >>>>>> but without a namespace.
=item unregisterFunction
$xpc->unregisterFunction($name)
Same as C<<<<<< unregisterFunctionNS >>>>>> but without a namespace.
=item findnodes
@nodes = $xpc->findnodes($xpath)
@nodes = $xpc->findnodes($xpath, $context_node )
$nodelist = $xpc->findnodes($xpath, $context_node )
Performs the xpath statement on the current node and returns the result as an
array. In scalar context returns a L<<<<<< XML::LibXML::NodeList >>>>>> object. Optionally, a node may be passed as a second argument to set the
context node for the query.
The xpath expression can be passed either as a string or or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item find
$object = $xpc->find($xpath )
$object = $xpc->find($xpath, $context_node )
Performs the xpath expression using the current node as the context of the
expression, and returns the result depending on what type of result the XPath
expression had. For example, the XPath C<<<<<< 1 * 3 + 52 >>>>>> results in a L<<<<<< XML::LibXML::Number >>>>>> object being returned. Other expressions might return a L<<<<<< XML::LibXML::Boolean >>>>>> object, or a L<<<<<< XML::LibXML::Literal >>>>>> object (a string). Each of those objects uses Perl's overload feature to ``do
the right thing'' in different contexts. Optionally, a node may be passed as a
second argument to set the context node for the query.
The xpath expression can be passed either as a string or or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item findvalue
$value = $xpc->findvalue($xpath )
$value = $xpc->findvalue($xpath, $context_node )
Is exactly equivalent to:
$xpc->find( $xpath, $context_node )->to_literal;
That is, it returns the literal value of the results. This enables you to
ensure that you get a string back from your search, allowing certain shortcuts.
This could be used as the equivalent of .
Optionally, a node may be passed in the second argument to set the context node
for the query.
The xpath expression can be passed either as a string or or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item exists
$bool = $xpc->exists( $xpath_expression, $context_node );
This method behaves like I<<<<<< findnodes >>>>>>, except that it only returns a boolean value (1 if the expression matches a
node, 0 otherwise) and may be faster than I<<<<<< findnodes >>>>>>, because the XPath evaluation may stop early on the first match (this is true
for libxml2 >= 2.6.27).
For XPath expressions that do not return node-set, the method returns true if
the returned value is a non-zero number or a non-empty string.
=item setContextNode
$xpc->setContextNode($node)
Set the current context node.
=item getContextNode
my $node = $xpc->getContextNode;
Get the current context node.
=item setContextPosition
$xpc->setContextPosition($position)
Set the current context position. By default, this value is -1 (and evaluating
XPath function C<<<<<< position() >>>>>> in the initial context raises an XPath error), but can be set to any value up
to context size. This usually only serves to cheat the XPath engine to return
given position when C<<<<<< position() >>>>>> XPath function is called. Setting this value to -1 restores the default
behavior.
=item getContextPosition
my $position = $xpc->getContextPosition;
Get the current context position.
=item setContextSize
$xpc->setContextSize($size)
Set the current context size. By default, this value is -1 (and evaluating
XPath function C<<<<<< last() >>>>>> in the initial context raises an XPath error), but can be set to any
non-negative value. This usually only serves to cheat the XPath engine to
return the given value when C<<<<<< last() >>>>>> XPath function is called. If context size is set to 0, position is
automatically also set to 0. If context size is positive, position is
automatically set to 1. Setting context size to -1 restores the default
behavior.
=item getContextSize
my $size = $xpc->getContextSize;
Get the current context size.
=item setContextNode
$xpc->setContextNode($node)
Set the current context node.
=back
=head1 BUGS AND CAVEATS
XML::LibXML::XPathContext objects I<<<<<< are >>>>>> reentrant, meaning that you can call methods of an XML::LibXML::XPathContext
even from XPath extension functions registered with the same object or from a
variable lookup function. On the other hand, you should rather avoid
registering new extension functions, namespaces and a variable lookup function
from within extension functions and a variable lookup function, unless you want
to experience untested behavior.
=head1 AUTHORS
Ilya Martynov and Petr Pajas, based on XML::LibXML and XML::LibXSLT code by
Matt Sergeant and Christian Glahn.
=head1 HISTORICAL REMARK
Prior to XML::LibXML 1.61 this module was distributed separately for
maintenance reasons.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[±R R LibXML/Document.podnu W+A =head1 NAME
XML::LibXML::Document - XML::LibXML DOM Document Class
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Document nodes are listed here,
# see XML::LibXML::Node manpage for other methods
$dom = XML::LibXML::Document->new( $version, $encoding );
$dom = XML::LibXML::Document->createDocument( $version, $encoding );
$strURI = $doc->URI();
$doc->setURI($strURI);
$strEncoding = $doc->encoding();
$strEncoding = $doc->actualEncoding();
$doc->setEncoding($new_encoding);
$strVersion = $doc->version();
$doc->standalone
$doc->setStandalone($numvalue);
my $compression = $doc->compression;
$doc->setCompression($ziplevel);
$docstring = $dom->toString($format);
$c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]);
$ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list);
$str = $doc->serialize($format);
$state = $doc->toFile($filename, $format);
$state = $doc->toFH($fh, $format);
$str = $document->toStringHTML();
$str = $document->serialize_html();
$bool = $dom->is_valid();
$dom->validate();
$root = $dom->documentElement();
$dom->setDocumentElement( $root );
$element = $dom->createElement( $nodename );
$element = $dom->createElementNS( $namespaceURI, $qname );
$text = $dom->createTextNode( $content_text );
$comment = $dom->createComment( $comment_text );
$attrnode = $doc->createAttribute($name [,$value]);
$attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] );
$fragment = $doc->createDocumentFragment();
$cdata = $dom->create( $cdata_content );
my $pi = $doc->createProcessingInstruction( $target, $data );
my $entref = $doc->createEntityReference($refname);
$dtd = $document->createInternalSubset( $rootnode, $public, $system);
$dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId);
$document->importNode( $node );
$document->adoptNode( $node );
my $dtd = $doc->externalSubset;
my $dtd = $doc->internalSubset;
$doc->setExternalSubset($dtd);
$doc->setInternalSubset($dtd);
my $dtd = $doc->removeExternalSubset();
my $dtd = $doc->removeInternalSubset();
my @nodelist = $doc->getElementsByTagName($tagname);
my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname);
my @nodelist = $doc->getElementsByLocalName($localname);
my $node = $doc->getElementById($id);
$dom->indexElements();
=head1 DESCRIPTION
The Document Class is in most cases the result of a parsing process. But
sometimes it is necessary to create a Document from scratch. The DOM Document
Class provides functions that conform to the DOM Core naming style.
It inherits all functions from L<<<<<< XML::LibXML::Node >>>>>> as specified in the DOM specification. This enables access to the nodes besides
the root element on document level - a C<<<<<< DTD >>>>>> for example. The support for these nodes is limited at the moment.
While generally nodes are bound to a document in the DOM concept it is
suggested that one should always create a node not bound to any document. There
is no need of really including the node to the document, but once the node is
bound to a document, it is quite safe that all strings have the correct
encoding. If an unbound text node with an ISO encoded string is created (e.g.
with $CLASS->new()), the C<<<<<< toString >>>>>> function may not return the expected result.
To prevent such problems, it is recommended to pass all data to XML::LibXML
methods as character strings (i.e. UTF-8 encoded, with the UTF8 flag on).
=head1 METHODS
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$dom = XML::LibXML::Document->new( $version, $encoding );
alias for createDocument()
=item createDocument
$dom = XML::LibXML::Document->createDocument( $version, $encoding );
The constructor for the document class. As Parameter it takes the version
string and (optionally) the encoding string. Simply calling I<<<<<< createDocument >>>>>>() will create the document:
Both parameter are optional. The default value for I<<<<<< $version >>>>>> is C<<<<<< 1.0 >>>>>>, of course. If the I<<<<<< $encoding >>>>>> parameter is not set, the encoding will be left unset, which means UTF-8 is
implied.
The call of I<<<<<< createDocument >>>>>>() without any parameter will result the following code:
Alternatively one can call this constructor directly from the XML::LibXML class
level, to avoid some typing. This will not have any effect on the class
instance, which is always XML::LibXML::Document.
my $document = XML::LibXML->createDocument( "1.0", "UTF-8" );
is therefore a shortcut for
my $document = XML::LibXML::Document->createDocument( "1.0", "UTF-8" );
=item URI
$strURI = $doc->URI();
Returns the URI (or filename) of the original document. For documents obtained
by parsing a string of a FH without using the URI parsing argument of the
corresponding C<<<<<< parse_* >>>>>> function, the result is a generated string unknown-XYZ where XYZ is some
number; for documents created with the constructor C<<<<<< new >>>>>>, the URI is undefined.
The value can be modified by calling C<<<<<< setURI >>>>>> method on the document node.
=item setURI
$doc->setURI($strURI);
Sets the URI of the document reported by the method URI (see also the URI
argument to the various C<<<<<< parse_* >>>>>> functions).
=item encoding
$strEncoding = $doc->encoding();
returns the encoding string of the document.
my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" );
print $doc->encoding; # prints ISO-8859-15
=item actualEncoding
$strEncoding = $doc->actualEncoding();
returns the encoding in which the XML will be returned by $doc->toString().
This is usually the original encoding of the document as declared in the XML
declaration and returned by $doc->encoding. If the original encoding is not
known (e.g. if created in memory or parsed from a XML without a declared
encoding), 'UTF-8' is returned.
my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" );
print $doc->encoding; # prints ISO-8859-15
=item setEncoding
$doc->setEncoding($new_encoding);
This method allows to change the declaration of encoding in the XML declaration
of the document. The value also affects the encoding in which the document is
serialized to XML by $doc->toString(). Use setEncoding() to remove the encoding
declaration.
=item version
$strVersion = $doc->version();
returns the version string of the document
I<<<<<< getVersion() >>>>>> is an alternative form of this function.
=item standalone
$doc->standalone
This function returns the Numerical value of a documents XML declarations
standalone attribute. It returns I<<<<<< 1 >>>>>> if standalone="yes" was found, I<<<<<< 0 >>>>>> if standalone="no" was found and I<<<<<< -1 >>>>>> if standalone was not specified (default on creation).
=item setStandalone
$doc->setStandalone($numvalue);
Through this method it is possible to alter the value of a documents standalone
attribute. Set it to I<<<<<< 1 >>>>>> to set standalone="yes", to I<<<<<< 0 >>>>>> to set standalone="no" or set it to I<<<<<< -1 >>>>>> to remove the standalone attribute from the XML declaration.
=item compression
my $compression = $doc->compression;
libxml2 allows reading of documents directly from gzipped files. In this case
the compression variable is set to the compression level of that file (0-8). If
XML::LibXML parsed a different source or the file wasn't compressed, the
returned value will be I<<<<<< -1 >>>>>>.
=item setCompression
$doc->setCompression($ziplevel);
If one intends to write the document directly to a file, it is possible to set
the compression level for a given document. This level can be in the range from
0 to 8. If XML::LibXML should not try to compress use I<<<<<< -1 >>>>>> (default).
Note that this feature will I<<<<<< only >>>>>> work if libxml2 is compiled with zlib support and toFile() is used for output.
=item toString
$docstring = $dom->toString($format);
I<<<<<< toString >>>>>> is a DOM serializing function, so the DOM Tree is serialized into a XML string,
ready for output.
IMPORTANT: unlike toString for other nodes, on document nodes this function
returns the XML as a byte string in the original encoding of the document (see
the actualEncoding() method)! This means you can simply do:
open OUT, $file;
print OUT $doc->toString;
regardless of the actual encoding of the document. See the section on encodings
in L<<<<<< XML::LibXML >>>>>> for more details.
The optional I<<<<<< $format >>>>>> parameter sets the indenting of the output. This parameter is expected to be an C<<<<<< integer >>>>>> value, that specifies that indentation should be used. The format parameter can
have three different values if it is used:
If $format is 0, than the document is dumped as it was originally parsed
If $format is 1, libxml2 will add ignorable white spaces, so the nodes content
is easier to read. Existing text nodes will not be altered
If $format is 2 (or higher), libxml2 will act as $format == 1 but it add a
leading and a trailing line break to each text node.
libxml2 uses a hard-coded indentation of 2 space characters per indentation
level. This value can not be altered on run-time.
=item toStringC14N
$c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]);
See the documentation in L<<<<<< XML::LibXML::Node >>>>>>.
=item toStringEC14N
$ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list);
See the documentation in L<<<<<< XML::LibXML::Node >>>>>>.
=item serialize
$str = $doc->serialize($format);
An alias for toString(). This function was name added to be more consistent
with libxml2.
=item serialize_c14n
An alias for toStringC14N().
=item serialize_exc_c14n
An alias for toStringEC14N().
=item toFile
$state = $doc->toFile($filename, $format);
This function is similar to toString(), but it writes the document directly
into a filesystem. This function is very useful, if one needs to store large
documents.
The format parameter has the same behaviour as in toString().
=item toFH
$state = $doc->toFH($fh, $format);
This function is similar to toString(), but it writes the document directly to
a filehandle or a stream. A byte stream in the document encoding is passed to
the file handle. Do NOT apply any C<<<<<< :encoding(...) >>>>>> or C<<<<<< :utf8 >>>>>> PerlIO layer to the filehandle! See the section on encodings in L<<<<<< XML::LibXML >>>>>> for more details.
The format parameter has the same behaviour as in toString().
=item toStringHTML
$str = $document->toStringHTML();
I<<<<<< toStringHTML >>>>>> serialize the tree to a byte string in the document encoding as HTML. With this
method indenting is automatic and managed by libxml2 internally.
=item serialize_html
$str = $document->serialize_html();
An alias for toStringHTML().
=item is_valid
$bool = $dom->is_valid();
Returns either TRUE or FALSE depending on whether the DOM Tree is a valid
Document or not.
You may also pass in a L<<<<<< XML::LibXML::Dtd >>>>>> object, to validate against an external DTD:
if (!$dom->is_valid($dtd)) {
warn("document is not valid!");
}
=item validate
$dom->validate();
This is an exception throwing equivalent of is_valid. If the document is not
valid it will throw an exception containing the error. This allows you much
better error reporting than simply is_valid or not.
Again, you may pass in a DTD object
=item documentElement
$root = $dom->documentElement();
Returns the root element of the Document. A document can have just one root
element to contain the documents data.
Optionally one can use I<<<<<< getDocumentElement >>>>>>.
=item setDocumentElement
$dom->setDocumentElement( $root );
This function enables you to set the root element for a document. The function
supports the import of a node from a different document tree, but does not
support a document fragment as $root.
=item createElement
$element = $dom->createElement( $nodename );
This function creates a new Element Node bound to the DOM with the name C<<<<<< $nodename >>>>>>.
=item createElementNS
$element = $dom->createElementNS( $namespaceURI, $qname );
This function creates a new Element Node bound to the DOM with the name C<<<<<< $nodename >>>>>> and placed in the given namespace.
=item createTextNode
$text = $dom->createTextNode( $content_text );
As an equivalent of I<<<<<< createElement >>>>>>, but it creates a I<<<<<< Text Node >>>>>> bound to the DOM.
=item createComment
$comment = $dom->createComment( $comment_text );
As an equivalent of I<<<<<< createElement >>>>>>, but it creates a I<<<<<< Comment Node >>>>>> bound to the DOM.
=item createAttribute
$attrnode = $doc->createAttribute($name [,$value]);
Creates a new Attribute node.
=item createAttributeNS
$attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] );
Creates an Attribute bound to a namespace.
=item createDocumentFragment
$fragment = $doc->createDocumentFragment();
This function creates a DocumentFragment.
=item createCDATASection
$cdata = $dom->create( $cdata_content );
Similar to createTextNode and createComment, this function creates a
CDataSection bound to the current DOM.
=item createProcessingInstruction
my $pi = $doc->createProcessingInstruction( $target, $data );
create a processing instruction node.
Since this method is quite long one may use its short form I<<<<<< createPI() >>>>>>.
=item createEntityReference
my $entref = $doc->createEntityReference($refname);
If a document has a DTD specified, one can create entity references by using
this function. If one wants to add a entity reference to the document, this
reference has to be created by this function.
An entity reference is unique to a document and cannot be passed to other
documents as other nodes can be passed.
I<<<<<< NOTE: >>>>>> A text content containing something that looks like an entity reference, will
not be expanded to a real entity reference unless it is a predefined entity
my $string = "&foo;";
$some_element->appendText( $string );
print $some_element->textContent; # prints "&foo;"
=item createInternalSubset
$dtd = $document->createInternalSubset( $rootnode, $public, $system);
This function creates and adds an internal subset to the given document.
Because the function automatically adds the DTD to the document there is no
need to add the created node explicitly to the document.
my $document = XML::LibXML::Document->new();
my $dtd = $document->createInternalSubset( "foo", undef, "foo.dtd" );
will result in the following XML document:
By setting the public parameter it is possible to set PUBLIC DTDs to a given
document. So
my $document = XML::LibXML::Document->new();
my $dtd = $document->createInternalSubset( "foo", "-//FOO//DTD FOO 0.1//EN", undef );
will cause the following declaration to be created on the document:
=item createExternalSubset
$dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId);
This function is similar to C<<<<<< createInternalSubset() >>>>>> but this DTD is considered to be external and is therefore not added to the
document itself. Nevertheless it can be used for validation purposes.
=item importNode
$document->importNode( $node );
If a node is not part of a document, it can be imported to another document. As
specified in DOM Level 2 Specification the Node will not be altered or removed
from its original document (C<<<<<< $node->cloneNode(1) >>>>>> will get called implicitly).
I<<<<<< NOTE: >>>>>> Don't try to use importNode() to import sub-trees that contain an entity
reference - even if the entity reference is the root node of the sub-tree. This
will cause serious problems to your program. This is a limitation of libxml2
and not of XML::LibXML itself.
=item adoptNode
$document->adoptNode( $node );
If a node is not part of a document, it can be imported to another document. As
specified in DOM Level 3 Specification the Node will not be altered but it will
removed from its original document.
After a document adopted a node, the node, its attributes and all its
descendants belong to the new document. Because the node does not belong to the
old document, it will be unlinked from its old location first.
I<<<<<< NOTE: >>>>>> Don't try to adoptNode() to import sub-trees that contain entity references -
even if the entity reference is the root node of the sub-tree. This will cause
serious problems to your program. This is a limitation of libxml2 and not of
XML::LibXML itself.
=item externalSubset
my $dtd = $doc->externalSubset;
If a document has an external subset defined it will be returned by this
function.
I<<<<<< NOTE >>>>>> Dtd nodes are no ordinary nodes in libxml2. The support for these nodes in
XML::LibXML is still limited. In particular one may not want use common node
function on doctype declaration nodes!
=item internalSubset
my $dtd = $doc->internalSubset;
If a document has an internal subset defined it will be returned by this
function.
I<<<<<< NOTE >>>>>> Dtd nodes are no ordinary nodes in libxml2. The support for these nodes in
XML::LibXML is still limited. In particular one may not want use common node
function on doctype declaration nodes!
=item setExternalSubset
$doc->setExternalSubset($dtd);
I<<<<<< EXPERIMENTAL! >>>>>>
This method sets a DTD node as an external subset of the given document.
=item setInternalSubset
$doc->setInternalSubset($dtd);
I<<<<<< EXPERIMENTAL! >>>>>>
This method sets a DTD node as an internal subset of the given document.
=item removeExternalSubset
my $dtd = $doc->removeExternalSubset();
I<<<<<< EXPERIMENTAL! >>>>>>
If a document has an external subset defined it can be removed from the
document by using this function. The removed dtd node will be returned.
=item removeInternalSubset
my $dtd = $doc->removeInternalSubset();
I<<<<<< EXPERIMENTAL! >>>>>>
If a document has an internal subset defined it can be removed from the
document by using this function. The removed dtd node will be returned.
=item getElementsByTagName
my @nodelist = $doc->getElementsByTagName($tagname);
Implements the DOM Level 2 function
In SCALAR context this function returns a L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByTagNameNS
my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname);
Implements the DOM Level 2 function
In SCALAR context this function returns a L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByLocalName
my @nodelist = $doc->getElementsByLocalName($localname);
This allows the fetching of all nodes from a given document with the given
Localname.
In SCALAR context this function returns a L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementById
my $node = $doc->getElementById($id);
Returns the element that has an ID attribute with the given value. If no such
element exists, this returns undef.
Note: the ID of an element may change while manipulating the document. For
documents with a DTD, the information about ID attributes is only available if
DTD loading/validation has been requested. For HTML documents parsed with the
HTML parser ID detection is done automatically. In XML documents, all "xml:id"
attributes are considered to be of type ID. You can test ID-ness of an
attribute node with $attr->isId().
In versions 1.59 and earlier this method was called getElementsById() (plural)
by mistake. Starting from 1.60 this name is maintained as an alias only for
backward compatibility.
=item indexElements
$dom->indexElements();
This function causes libxml2 to stamp all elements in a document with their
document position index which considerably speeds up XPath queries for large
documents. It should only be used with static documents that won't be further
changed by any DOM methods, because once a document is indexed, XPath will
always prefer the index to other methods of determining the document order of
nodes. XPath could therefore return improperly ordered node-lists when applied
on a document that has been changed after being indexed. It is of course
possible to use this method to re-index a modified document before using it
with XPath again. This function is not a part of the DOM specification.
This function returns number of elements indexed, -1 if error occurred, or -2
if this feature is not available in the running libxml2.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[C\ LibXML/Comment.podnu W+A =head1 NAME
XML::LibXML::Comment - XML::LibXML Comment Class
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Comment nodes are listed here,
# see XML::LibXML::Node manpage for other methods
$node = XML::LibXML::Comment( $content );
=head1 DESCRIPTION
This class provides all functions of L<<<<<< XML::LibXML::Text >>>>>>, but for comment nodes. This can be done, since only the output of the node
types is different, but not the data structure. :-)
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$node = XML::LibXML::Comment( $content );
The constructor is the only provided function for this package. It is required,
because I<<<<<< libxml2 >>>>>> treats text nodes and comment nodes slightly differently.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[{
E E LibXML/Reader.podnu W+A =head1 NAME
XML::LibXML::Reader - XML::LibXML::Reader - interface to libxml2 pull parser
=head1 SYNOPSIS
use XML::LibXML::Reader;
my $reader = new XML::LibXML::Reader(location => "file.xml")
or die "cannot read file.xml\n";
while ($reader->read) {
processNode($reader);
}
sub processNode {
$reader = shift;
printf "%d %d %s %d\n", ($reader->depth,
$reader->nodeType,
$reader->name,
$reader->isEmptyElement);
}
or
$reader = new XML::LibXML::Reader(location => "file.xml")
or die "cannot read file.xml\n";
$reader->preservePattern('//table/tr');
$reader->finish;
print $reader->document->toString(1);
=head1 DESCRIPTION
This is a perl interface to libxml2's pull-parser implementation xmlTextReader I<<<<<< http://xmlsoft.org/html/libxml-xmlreader.html >>>>>>. This feature requires at least libxml2-2.6.21. Pull-parser (StAX in Java,
XmlReader in C#) use an iterator approach to parse a xml-file. They are easier
to program than event-based parser (SAX) and much more lightweight than
tree-based parser (DOM), which load the complete tree into memory.
The Reader acts as a cursor going forward on the document stream and stopping
at each node in the way. At every point DOM-like methods of the Reader object
allow to examine the current node (name, namespace, attributes, etc.)
The user's code keeps control of the progress and simply calls the C<<<<<< read() >>>>>> function repeatedly to progress to the next node in the document order. Other
functions provide means for skipping complete sub-trees, or nodes until a
specific element, etc.
At every time, only a very limited portion of the document is kept in the
memory, which makes the API more memory-efficient than using DOM. However, it
is also possible to mix Reader with DOM. At every point the user may copy the
current node (optionally expanded into a complete sub-tree) from the processed
document to another DOM tree, or to instruct the Reader to collect sub-document
in form of a DOM tree consisting of selected nodes.
Reader API also supports namespaces, xml:base, entity handling, and DTD
validation. Schema and RelaxNG validation support will probably be added in
some later revision of the Perl interface.
The naming of methods compared to libxml2 and C# XmlTextReader has been changed
slightly to match the conventions of XML::LibXML. Some functions have been
changed or added with respect to the C interface.
=head1 CONSTRUCTOR
Depending on the XML source, the Reader object can be created with either of:
my $reader = XML::LibXML::Reader->new( location => "file.xml", ... );
my $reader = XML::LibXML::Reader->new( string => $xml_string, ... );
my $reader = XML::LibXML::Reader->new( IO => $file_handle, ... );
my $reader = XML::LibXML::Reader->new( FD => fileno(STDIN), ... );
my $reader = XML::LibXML::Reader->new( DOM => $dom, ... );
where ... are (optional) reader options described below in L<<<<<< Reader options >>>>>> or various parser options described in L<<<<<< XML::LibXML::Parser >>>>>>. The constructor recognizes the following XML sources:
=head2 Source specification
=over 4
=item location
Read XML from a local file or URL.
=item string
Read XML from a string.
=item IO
Read XML a Perl IO filehandle.
=item FD
Read XML from a file descriptor (bypasses Perl I/O layer, only applicable to
filehandles for regular files or pipes). Possibly faster than IO.
=item DOM
Use reader API to walk through a pre-parsed L<<<<<< XML::LibXML::Document >>>>>>.
=back
=head2 Reader options
=over 4
=item encoding => $encoding
override document encoding.
=item RelaxNG => $rng_schema
can be used to pass either a L<<<<<< XML::LibXML::RelaxNG >>>>>> object or a filename or URL of a RelaxNG schema to the constructor. The schema
is then used to validate the document as it is processed.
=item Schema => $xsd_schema
can be used to pass either a L<<<<<< XML::LibXML::Schema >>>>>> object or a filename or URL of a W3C XSD schema to the constructor. The schema
is then used to validate the document as it is processed.
=item ...
the reader further supports various parser options described in L<<<<<< XML::LibXML::Parser >>>>>> (specificly those labeled by /reader/).
=back
=head1 METHODS CONTROLLING PARSING PROGRESS
=over 4
=item read ()
Moves the position to the next node in the stream, exposing its properties.
Returns 1 if the node was read successfully, 0 if there is no more nodes to
read, or -1 in case of error
=item readAttributeValue ()
Parses an attribute value into one or more Text and EntityReference nodes.
Returns 1 in case of success, 0 if the reader was not positioned on an
attribute node or all the attribute values have been read, or -1 in case of
error.
=item readState ()
Gets the read state of the reader. Returns the state value, or -1 in case of
error. The module exports constants for the Reader states, see STATES below.
=item depth ()
The depth of the node in the tree, starts at 0 for the root node.
=item next ()
Skip to the node following the current one in the document order while avoiding
the sub-tree if any. Returns 1 if the node was read successfully, 0 if there is
no more nodes to read, or -1 in case of error.
=item nextElement (localname?,nsURI?)
Skip nodes following the current one in the document order until a specific
element is reached. The element's name must be equal to a given localname if
defined, and its namespace must equal to a given nsURI if defined. Either of
the arguments can be undefined (or omitted, in case of the latter or both).
Returns 1 if the element was found, 0 if there is no more nodes to read, or -1
in case of error.
=item nextPatternMatch (compiled_pattern)
Skip nodes following the current one in the document order until an element
matching a given compiled pattern is reached. See L<<<<<< XML::LibXML::Pattern >>>>>> for information on compiled patterns. See also the C<<<<<< matchesPattern >>>>>> method.
Returns 1 if the element was found, 0 if there is no more nodes to read, or -1
in case of error.
=item skipSiblings ()
Skip all nodes on the same or lower level until the first node on a higher
level is reached. In particular, if the current node occurs in an element, the
reader stops at the end tag of the parent element, otherwise it stops at a node
immediately following the parent node.
Returns 1 if successful, 0 if end of the document is reached, or -1 in case of
error.
=item nextSibling ()
It skips to the node following the current one in the document order while
avoiding the sub-tree if any.
Returns 1 if the node was read successfully, 0 if there is no more nodes to
read, or -1 in case of error
=item nextSiblingElement (name?,nsURI?)
Like nextElement but only processes sibling elements of the current node
(moving forward using C<<<<<< nextSibling () >>>>>> rather than C<<<<<< read () >>>>>>, internally).
Returns 1 if the element was found, 0 if there is no more sibling nodes, or -1
in case of error.
=item finish ()
Skip all remaining nodes in the document, reaching end of the document.
Returns 1 if successful, 0 in case of error.
=item close ()
This method releases any resources allocated by the current instance and closes
any underlying input. It returns 0 on failure and 1 on success. This method is
automatically called by the destructor when the reader is forgotten, therefore
you do not have to call it directly.
=back
=head1 METHODS EXTRACTING INFORMATION
=over 4
=item name ()
Returns the qualified name of the current node, equal to (Prefix:)LocalName.
=item nodeType ()
Returns the type of the current node. See NODE TYPES below.
=item localName ()
Returns the local name of the node.
=item prefix ()
Returns the prefix of the namespace associated with the node.
=item namespaceURI ()
Returns the URI defining the namespace associated with the node.
=item isEmptyElement ()
Check if the current node is empty, this is a bit bizarre in the sense that
will be considered empty while will not.
=item hasValue ()
Returns true if the node can have a text value.
=item value ()
Provides the text value of the node if present or undef if not available.
=item readInnerXml ()
Reads the contents of the current node, including child nodes and markup.
Returns a string containing the XML of the node's content, or undef if the
current node is neither an element nor attribute, or has no child nodes.
=item readOuterXml ()
Reads the contents of the current node, including child nodes and markup.
Returns a string containing the XML of the node including its content, or undef
if the current node is neither an element nor attribute.
=item nodePath()
Returns a cannonical location path to the current element from the root node to
the current node. Namespaced elements are matched by '*', because there is no
way to declare prefixes within XPath patterns. Unlike C<<<<<< XML::LibXML::Node::nodePath() >>>>>>, this function does not provide sibling counts (i.e. instead of e.g. '/a/b[1]'
and '/a/b[2]' you get '/a/b' for both matches).
=item matchesPattern(compiled_pattern)
Returns a true value if the current node matches a compiled pattern. See L<<<<<< XML::LibXML::Pattern >>>>>> for information on compiled patterns. See also the C<<<<<< nextPatternMatch >>>>>> method.
=back
=head1 METHODS EXTRACTING DOM NODES
=over 4
=item document ()
Provides access to the document tree built by the reader. This function can be
used to collect the preserved nodes (see C<<<<<< preserveNode() >>>>>> and preservePattern).
CAUTION: Never use this function to modify the tree unless reading of the whole
document is completed!
=item copyCurrentNode (deep)
This function is similar a DOM function C<<<<<< copyNode() >>>>>>. It returns a copy of the currently processed node as a corresponding DOM
object. Use deep = 1 to obtain the full sub-tree.
=item preserveNode ()
This tells the XML Reader to preserve the current node in the document tree. A
document tree consisting of the preserved nodes and their content can be
obtained using the method C<<<<<< document() >>>>>> once parsing is finished.
Returns the node or NULL in case of error.
=item preservePattern (pattern,\%ns_map)
This tells the XML Reader to preserve all nodes matched by the pattern (which
is a streaming XPath subset). A document tree consisting of the preserved nodes
and their content can be obtained using the method C<<<<<< document() >>>>>> once parsing is finished.
An optional second argument can be used to provide a HASH reference mapping
prefixes used by the XPath to namespace URIs.
The XPath subset available with this function is described at
http://www.w3.org/TR/xmlschema-1/#Selector
and matches the production
Path ::= ('.//')? ( Step '/' )* ( Step | '@' NameTest )
Returns a positive number in case of success and -1 in case of error
=back
=head1 METHODS PROCESSING ATTRIBUTES
=over 4
=item attributeCount ()
Provides the number of attributes of the current node.
=item hasAttributes ()
Whether the node has attributes.
=item getAttribute (name)
Provides the value of the attribute with the specified qualified name.
Returns a string containing the value of the specified attribute, or undef in
case of error.
=item getAttributeNs (localName, namespaceURI)
Provides the value of the specified attribute.
Returns a string containing the value of the specified attribute, or undef in
case of error.
=item getAttributeNo (no)
Provides the value of the attribute with the specified index relative to the
containing element.
Returns a string containing the value of the specified attribute, or undef in
case of error.
=item isDefault ()
Returns true if the current attribute node was generated from the default value
defined in the DTD.
=item moveToAttribute (name)
Moves the position to the attribute with the specified local name and namespace
URI.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToAttributeNo (no)
Moves the position to the attribute with the specified index relative to the
containing element.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToAttributeNs (localName,namespaceURI)
Moves the position to the attribute with the specified local name and namespace
URI.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToFirstAttribute ()
Moves the position to the first attribute associated with the current node.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToNextAttribute ()
Moves the position to the next attribute associated with the current node.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToElement ()
Moves the position to the node that contains the current attribute node.
Returns 1 in case of success, -1 in case of error, 0 if not moved
=item isNamespaceDecl ()
Determine whether the current node is a namespace declaration rather than a
regular attribute.
Returns 1 if the current node is a namespace declaration, 0 if it is a regular
attribute or other type of node, or -1 in case of error.
=back
=head1 OTHER METHODS
=over 4
=item lookupNamespace (prefix)
Resolves a namespace prefix in the scope of the current element.
Returns a string containing the namespace URI to which the prefix maps or undef
in case of error.
=item encoding ()
Returns a string containing the encoding of the document or undef in case of
error.
=item standalone ()
Determine the standalone status of the document being read. Returns 1 if the
document was declared to be standalone, 0 if it was declared to be not
standalone, or -1 if the document did not specify its standalone status or in
case of error.
=item xmlVersion ()
Determine the XML version of the document being read. Returns a string
containing the XML version of the document or undef in case of error.
=item baseURI ()
Returns the base URI of a given node.
=item isValid ()
Retrieve the validity status from the parser.
Returns 1 if valid, 0 if no, and -1 in case of error.
=item xmlLang ()
The xml:lang scope within which the node resides.
=item lineNumber ()
Provide the line number of the current parsing point.
=item columnNumber ()
Provide the column number of the current parsing point.
=item byteConsumed ()
This function provides the current index of the parser relative to the start of
the current entity. This function is computed in bytes from the beginning
starting at zero and finishing at the size in bytes of the file if parsing a
file. The function is of constant cost if the input is UTF-8 but can be costly
if run on non-UTF-8 input.
=item setParserProp (prop => value, ...)
Change the parser processing behaviour by changing some of its internal
properties. The following properties are available with this function:
``load_ext_dtd'', ``complete_attributes'', ``validation'', ``expand_entities''.
Since some of the properties can only be changed before any read has been done,
it is best to set the parsing properties at the constructor.
Returns 0 if the call was successful, or -1 in case of error
=item getParserProp (prop)
Get value of an parser internal property. The following property names can be
used: ``load_ext_dtd'', ``complete_attributes'', ``validation'',
``expand_entities''.
Returns the value, usually 0 or 1, or -1 in case of error.
=back
=head1 DESTRUCTION
XML::LibXML takes care of the reader object destruction when the last reference
to the reader object goes out of scope. The document tree is preserved, though,
if either of $reader->document or $reader->preserveNode was used and references
to the document tree exist.
=head1 NODE TYPES
The reader interface provides the following constants for node types (the
constant symbols are exported by default or if tag C<<<<<< :types >>>>>> is used).
XML_READER_TYPE_NONE => 0
XML_READER_TYPE_ELEMENT => 1
XML_READER_TYPE_ATTRIBUTE => 2
XML_READER_TYPE_TEXT => 3
XML_READER_TYPE_CDATA => 4
XML_READER_TYPE_ENTITY_REFERENCE => 5
XML_READER_TYPE_ENTITY => 6
XML_READER_TYPE_PROCESSING_INSTRUCTION => 7
XML_READER_TYPE_COMMENT => 8
XML_READER_TYPE_DOCUMENT => 9
XML_READER_TYPE_DOCUMENT_TYPE => 10
XML_READER_TYPE_DOCUMENT_FRAGMENT => 11
XML_READER_TYPE_NOTATION => 12
XML_READER_TYPE_WHITESPACE => 13
XML_READER_TYPE_SIGNIFICANT_WHITESPACE => 14
XML_READER_TYPE_END_ELEMENT => 15
XML_READER_TYPE_END_ENTITY => 16
XML_READER_TYPE_XML_DECLARATION => 17
=head1 STATES
The following constants represent the values returned by C<<<<<< readState() >>>>>>. They are exported by default, or if tag C<<<<<< :states >>>>>> is used:
XML_READER_NONE => -1
XML_READER_START => 0
XML_READER_ELEMENT => 1
XML_READER_END => 2
XML_READER_EMPTY => 3
XML_READER_BACKTRACK => 4
XML_READER_DONE => 5
XML_READER_ERROR => 6
=head1 SEE ALSO
L<<<<<< XML::LibXML::Pattern >>>>>> for information about compiled patterns.
http://xmlsoft.org/html/libxml-xmlreader.html
http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html
=head1 ORIGINAL IMPLEMENTATION
Heiko Klein, and Petr Pajas
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[M LibXML/Attr.podnu W+A =head1 NAME
XML::LibXML::Attr - XML::LibXML Attribute Class
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Attribute nodes are listed here,
# see XML::LibXML::Node manpage for other methods
$attr = XML::LibXML::Attr->new($name [,$value]);
$string = $attr->getValue();
$string = $attr->value;
$attr->setValue( $string );
$node = $attr->getOwnerElement();
$attr->setNamespace($nsURI, $prefix);
$bool = $attr->isId;
$string = $attr->serializeContent;
=head1 DESCRIPTION
This is the interface to handle Attributes like ordinary nodes. The naming of
the class relies on the W3C DOM documentation.
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$attr = XML::LibXML::Attr->new($name [,$value]);
Class constructor. If you need to work with ISO encoded strings, you should I<<<<<< always >>>>>> use the C<<<<<< createAttrbute >>>>>> of L<<<<<< XML::LibXML::Document >>>>>>.
=item getValue
$string = $attr->getValue();
Returns the value stored for the attribute. If undef is returned, the attribute
has no value, which is different of being C<<<<<< not specified >>>>>>.
=item value
$string = $attr->value;
Alias for I<<<<<< getValue() >>>>>>
=item setValue
$attr->setValue( $string );
This is needed to set a new attribute value. If ISO encoded strings are passed
as parameter, the node has to be bound to a document, otherwise the encoding
might be done incorrectly.
=item getOwnerElement
$node = $attr->getOwnerElement();
returns the node the attribute belongs to. If the attribute is not bound to a
node, undef will be returned. Overwriting the underlying implementation, the I<<<<<< parentNode >>>>>> function will return undef, instead of the owner element.
=item setNamespace
$attr->setNamespace($nsURI, $prefix);
This function tries to bound the attribute to a given namespace. If C<<<<<< $nsURI >>>>>> is undefined or empty, the function discards any previous association of the
attribute with a namespace. If the namespace was not previously declared in the
context of the attribute, this function will fail. In this case you may wish to
call setNamespace() on the ownerElement. If the namespace URI is non-empty and
declared in the context of the attribute, but only with a different (non-empty)
prefix, then the attribute is still bound to the namespace but gets a different
prefix than C<<<<<< $prefix >>>>>>. The function also fails if the prefix is empty but the namespace URI is not
(because unprefixed attributes should by definition belong to no namespace).
This function returns 1 on success, 0 otherwise.
=item isId
$bool = $attr->isId;
Determine whether an attribute is of type ID. For documents with a DTD, this
information is only available if DTD loading/validation has been requested. For
HTML documents parsed with the HTML parser ID detection is done automatically.
In XML documents, all "xml:id" attributes are considered to be of type ID.
=item serializeContent($docencoding)
$string = $attr->serializeContent;
This function is not part of DOM API. It returns attribute content in the form
in which it serializes into XML, that is with all meta-characters properly
quoted and with raw entity references (except for entities expanded during
parse time). Setting the optional $docencoding flag to 1 enforces document
encoding for the output string (which is then passed to Perl as a byte string).
Otherwise the string is passed to Perl as (UTF-8 encoded) characters.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[/j` ` LibXML/Error.podnu W+A =head1 NAME
XML::LibXML::Error - Structured Errors
=head1 SYNOPSIS
eval { ... };
if (ref($@)) {
# handle a structured error (XML::LibXML::Error object)
} elsif ($@) {
# error, but not an XML::LibXML::Error object
} else {
# no error
}
$XML::LibXML::Error::WARNINGS=1;
$message = $@->as_string();
print $@->dump();
$error_domain = $@->domain();
$error_code = $@->code();
$error_message = $@->message();
$error_level = $@->level();
$filename = $@->file();
$line = $@->line();
$nodename = $@->nodename();
$error_str1 = $@->str1();
$error_str2 = $@->str2();
$error_str3 = $@->str3();
$error_num1 = $@->num1();
$error_num2 = $@->num2();
$string = $@->context();
$offset = $@->column();
$previous_error = $@->_prev();
=head1 DESCRIPTION
The XML::LibXML::Error class is a tiny frontend to I<<<<<< libxml2 >>>>>>'s structured error support. If XML::LibXML is compied with structured error
support, all errors reported by libxml2 are transformed to XML::LibXML:Error
objects. These objects automatically serialize to the corresponding error
messages when printed or used in a string operation, but as objects, can also
be used to get a detailed and structured information about the error that
occurred.
Unlike most other XML::LibXML objects, XML::LibXML::Error doesn't wrap an
underlying I<<<<<< libxml2 >>>>>> structure directly, but rather transforms it to a blessed Perl hash reference
containing the individual fields of the structured error information as hash
key-value pairs. Individual items (fields) of a structured error can either be
obtained directly as $@->{field}, or using autoloaded methods such as as
$@->field() (where field is the field name). XML::LibXML::Error objects have
the following fields: domain, code, level, file, line, nodename, message, str1,
str2, str3, num1, num2, and _prev (some of them may be undefined).
=over 4
=item $XML::LibXML::Error::WARNINGS
$XML::LibXML::Error::WARNINGS=1;
Traditionally, XML::LibXML was supressing parser warnings by setting libxml2's
global variable xmlGetWarningsDefaultValue to 0. Since 1.70 we do not change
libxml2's global variables anymore; for backward compatibility, XML::LibXML
suppresses warnings. This variable can be set to 1 to enable reporting of these
warnings via Perl C<<<<<< warn >>>>>> and to 2 to report hem via C<<<<<< die >>>>>>.
$message = $@->as_string();
This functions takes serializes a XML::LibXML::Error object to a string
containing the full error message close to the message produced by I<<<<<< libxml2 >>>>>> default error handlers and tools like xmllint. This method is also used to
overload "" operator on XML::LibXML::Error, so it is automatically called
whenever XML::LibXML::Error object is treated as a string (e.g. in print $@).
=item dump
print $@->dump();
This function serializes a XML::LibXML::Error to a string displaying all fields
of the error structure individually on separate lines of the form 'name' =>
'value'.
=item domain
$error_domain = $@->domain();
Returns string containing information about what part of the library raised the
error. Can be one of: "parser", "tree", "namespace", "validity", "HTML parser",
"memory", "output", "I/O", "ftp", "http", "XInclude", "XPath", "xpointer",
"regexp", "Schemas datatype", "Schemas parser", "Schemas validity", "Relax-NG
parser", "Relax-NG validity", "Catalog", "C14N", "XSLT", "validity".
=item code
$error_code = $@->code();
Returns the actual libxml2 error code. The XML::LibXML::ErrNo module defines
constants for individual error codes. Currently libxml2 uses over 480 different
error codes.
=item message
$error_message = $@->message();
Returns a human-readable informative error message.
=item level
$error_level = $@->level();
Returns an integer value describing how consequent is the error.
XML::LibXML::Error defines the following constants:
=over 4
=item *
XML_ERR_NONE = 0
=item *
XML_ERR_WARNING = 1 : A simple warning.
=item *
XML_ERR_ERROR = 2 : A recoverable error.
=item *
XML_ERR_FATAL = 3 : A fatal error.
=back
=item file
$filename = $@->file();
Returns the filename of the file being processed while the error occurred.
=item line
$line = $@->line();
The line number, if available.
=item nodename
$nodename = $@->nodename();
Name of the node where error occurred, if available. When this field is
non-empty, libxml2 actually returned a physical pointer to the specified node.
Due to memory management issues, it is very difficult to implement a way to
expose the pointer to the Perl level as a XML::LibXML::Node. For this reason,
XML::LibXML::Error currently only exposes the name the node.
=item str1
$error_str1 = $@->str1();
Error specific. Extra string information.
=item str2
$error_str2 = $@->str2();
Error specific. Extra string information.
=item str3
$error_str3 = $@->str3();
Error specific. Extra string information.
=item num1
$error_num1 = $@->num1();
Error specific. Extra numeric information.
=item num2
$error_num2 = $@->num2();
In recent libxml2 versions, this value contains a column number of the error or
0 if N/A.
=item context
$string = $@->context();
For parsing errors, this field contains about 80 characters of the XML near the
place where the error occurred. The field C<<<<<< $@->column() >>>>>> contains the corresponding offset. Where N/A, the field is undefined.
=item column
$offset = $@->column();
See C<<<<<< $@->column() >>>>>> above.
=item _prev
$previous_error = $@->_prev();
This field can possibly hold a reference to another XML::LibXML::Error object
representing an error which occurred just before this error.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[5 LibXML/DocumentFragment.podnu W+A =head1 NAME
XML::LibXML::DocumentFragment - XML::LibXML's DOM L2 Document Fragment Implementation
=head1 SYNOPSIS
use XML::LibXML;
=head1 DESCRIPTION
This class is a helper class as described in the DOM Level 2 Specification. It
is implemented as a node without name. All adding, inserting or replacing
functions are aware of document fragments now.
As well I<<<<<< all >>>>>> unbound nodes (all nodes that do not belong to any document sub-tree) are
implicit members of document fragments.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[BI>d LibXML/RelaxNG.podnu W+A =head1 NAME
XML::LibXML::RelaxNG - RelaxNG Schema Validation
=head1 SYNOPSIS
use XML::LibXML;
$doc = XML::LibXML->new->parse_file($url);
$rngschema = XML::LibXML::RelaxNG->new( location => $filename_or_url );
$rngschema = XML::LibXML::RelaxNG->new( string => $xmlschemastring );
$rngschema = XML::LibXML::RelaxNG->new( DOM => $doc );
eval { $rngschema->validate( $doc ); };
=head1 DESCRIPTION
The XML::LibXML::RelaxNG class is a tiny frontend to libxml2's RelaxNG
implementation. Currently it supports only schema parsing and document
validation.
=head1 METHODS
=over 4
=item new
$rngschema = XML::LibXML::RelaxNG->new( location => $filename_or_url );
$rngschema = XML::LibXML::RelaxNG->new( string => $xmlschemastring );
$rngschema = XML::LibXML::RelaxNG->new( DOM => $doc );
The constructor of XML::LibXML::RelaxNG may get called with either one of three
parameters. The parameter tells the class from which source it should generate
a validation schema. It is important, that each schema only have a single
source.
The location parameter allows to parse a schema from the filesystem or a URL.
The string parameter will parse the schema from the given XML string.
The DOM parameter allows to parse the schema from a pre-parsed L<<<<<< XML::LibXML::Document >>>>>>.
Note that the constructor will die() if the schema does not meed the
constraints of the RelaxNG specification.
=item validate
eval { $rngschema->validate( $doc ); };
This function allows to validate a (parsed) document against the given RelaxNG
schema. The argument of this function should be a XML::LibXML::Document object.
If this function succeeds, it will return 0, otherwise it will die() and report
the errors found. Because of this validate() should be always evaluated.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[
[ LibXML/XPathExpression.podnu W+A =head1 NAME
XML::LibXML::XPathExpression - XML::LibXML::XPathExpression - interface to libxml2 pre-compiled XPath expressions
=head1 SYNOPSIS
use XML::LibXML;
my $compiled_xpath = new XML::LibXML::XPathExpression('//foo[@bar="baz"][position()<4]');
# interface from XML::LibXML::Node
my $result = $node->find($compiled_xpath);
my @nodes = $node->findnodes($compiled_xpath);
my $value = $node->findvalue($compiled_xpath);
# interface from XML::LibXML::XPathContext
my $result = $xpc->find($compiled_xpath,$node);
my @nodes = $xpc->findnodes($compiled_xpath,$node);
my $value = $xpc->findvalue($compiled_xpath,$node);
$compiled = XML::LibXML::XPathExpression->new( xpath_string );
=head1 DESCRIPTION
This is a perl interface to libxml2's pre-compiled XPath expressions.
Pre-compiling an XPath expression can give in some performance benefit if the
same XPath query is evaluated many times. C<<<<<< XML::LibXML::XPathExpression >>>>>> objects can be passed to all C<<<<<< find... >>>>>> functions C<<<<<< XML::LibXML >>>>>> that expect an XPath expression.
=over 4
=item new()
$compiled = XML::LibXML::XPathExpression->new( xpath_string );
The constructor takes an XPath 1.0 expression as a string and returns an object
representing the pre-compiled expressions (the actual data structure is
internal to libxml2).
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[j LibXML/RegExp.podnu W+A =head1 NAME
XML::LibXML::RegExp - XML::LibXML::RegExp - interface to libxml2 regular expressions
=head1 SYNOPSIS
use XML::LibXML;
my $compiled_re = new XML::LibXML::RegExp('[0-9]{5}(-[0-9]{4})?');
if ($compiled_re->isDeterministic()) { ... }
if ($compiled_re->matches($string)) { ... }
$compiled_re = XML::LibXML::RegExp->new( $regexp_str );
$bool = $compiled_re->matches($string);
$bool = $compiled_re->isDeterministic();
=head1 DESCRIPTION
This is a perl interface to libxml2's implementation of regular expressions,
which are used e.g. for validation of XML Schema simple types (pattern facet).
=over 4
=item new()
$compiled_re = XML::LibXML::RegExp->new( $regexp_str );
The constructor takes a string containing a regular expression and returns a
compiled regexp object.
=item matches($string)
$bool = $compiled_re->matches($string);
Given a string value, returns a true value if the value is matched by the
compiled regular expression.
=item isDeterministic()
$bool = $compiled_re->isDeterministic();
Returns a true value if the regular expression is deterministic; returns false
otherwise. (See the definition of determinism in the XML spec (L<<<<<< http://www.w3.org/TR/REC-xml/#determinism >>>>>>))
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[n LibXML/Pattern.podnu W+A =head1 NAME
XML::LibXML::Pattern - XML::LibXML::Pattern - interface to libxml2 XPath patterns
=head1 SYNOPSIS
use XML::LibXML;
my $pattern = new XML::LibXML::Pattern('/x:html/x:body//x:div', { 'x' => 'http://www.w3.org/1999/xhtml' });
# test a match on a XML::LibXML::Node $node
if ($pattern->matchesNode($node)) { ... }
# or on a XML::LibXML::Reader
if ($reader->matchesPattern($pattern)) { ... }
# or skip reading all nodes that do not match
print $reader->nodePath while $reader->nextPatternMatch($pattern);
$pattern = XML::LibXML::Pattern->new( pattern, { prefix => namespace_URI, ... } );
$bool = $pattern->matchesNode($node);
=head1 DESCRIPTION
This is a perl interface to libxml2's pattern matching support I<<<<<< http://xmlsoft.org/html/libxml-pattern.html >>>>>>. This feature requires recent versions of libxml2.
Patterns are a small subset of XPath language, which is limitted to
(disjunctions of) location paths involving the child and descendant axes in
abbreviated form as described by the extended BNF given below:
Selector ::= Path ( '|' Path )*
Path ::= ('.//' | '//' | '/' )? Step ( '/' Step )*
Step ::= '.' | NameTest
NameTest ::= QName | '*' | NCName ':' '*'
For readability, whitespace may be used in selector XPath expressions even
though not explicitly allowed by the grammar: whitespace may be freely added
within patterns before or after any token, where
token ::= '.' | '/' | '//' | '|' | NameTest
Note that no predicates or attribute tests are allowed.
Patterns are particularly useful for stream parsing provided via the C<<<<<< XML::LibXML::Reader >>>>>> interface.
=over 4
=item new()
$pattern = XML::LibXML::Pattern->new( pattern, { prefix => namespace_URI, ... } );
The constructor of a pattern takes a pattern expression (as described by the
BNF grammar above) and an optional HASH reference mapping prefixes to namespace
URIs. The method returns a compiled pattern object.
Note that if the document has a default namespace, it must still be given an
prefix in order to be matched (as demanded by the XPath 1.0 specification). For
example, to match an element C<<<<<< >>>>>>, one should use a pattern like this:
$pattern = XML::LibXML::Pattern->new( 'foo:a', { foo => 'http://foo.bar' });
=item matchesNode($node)
$bool = $pattern->matchesNode($node);
Given a XML::LibXML::Node object, returns a true value if the node is matched
by the compiled pattern expression.
=back
=head1 SEE ALSO
L<<<<<< XML::LibXML::Reader >>>>>> for other methods involving compiled patterns.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[j LibXML/NodeList.pmnu W+A # $Id: NodeList.pm 785 2009-07-16 14:17:46Z pajas $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::NodeList;
use strict;
use XML::LibXML::Boolean;
use XML::LibXML::Literal;
use XML::LibXML::Number;
use vars qw ($VERSION);
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&to_literal,
'bool' => \&to_boolean,
;
sub new {
my $class = shift;
bless [@_], $class;
}
sub new_from_ref {
my ($class,$array_ref,$reuse) = @_;
return bless $reuse ? $array_ref : [@$array_ref], $class;
}
sub pop {
my $self = CORE::shift;
CORE::pop @$self;
}
sub push {
my $self = CORE::shift;
CORE::push @$self, @_;
}
sub append {
my $self = CORE::shift;
my ($nodelist) = @_;
CORE::push @$self, $nodelist->get_nodelist;
}
sub shift {
my $self = CORE::shift;
CORE::shift @$self;
}
sub unshift {
my $self = CORE::shift;
CORE::unshift @$self, @_;
}
sub prepend {
my $self = CORE::shift;
my ($nodelist) = @_;
CORE::unshift @$self, $nodelist->get_nodelist;
}
sub size {
my $self = CORE::shift;
scalar @$self;
}
sub get_node {
# uses array index starting at 1, not 0
# this is mainly because of XPath.
my $self = CORE::shift;
my ($pos) = @_;
$self->[$pos - 1];
}
*item = \&get_node;
sub get_nodelist {
my $self = CORE::shift;
@$self;
}
sub to_boolean {
my $self = CORE::shift;
return (@$self > 0) ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False;
}
# string-value of a nodelist is the string-value of the first node
sub string_value {
my $self = CORE::shift;
return '' unless @$self;
return $self->[0]->string_value;
}
sub to_literal {
my $self = CORE::shift;
return XML::LibXML::Literal->new(
join('', grep {defined $_} map { $_->string_value } @$self)
);
}
sub to_number {
my $self = CORE::shift;
return XML::LibXML::Number->new(
$self->to_literal
);
}
sub iterator {
warn "this function is obsolete!\nIt was disabled in version 1.54\n";
return undef;
}
1;
__END__
=head1 NAME
XML::LibXML::NodeList - a list of XML document nodes
=head1 DESCRIPTION
An XML::LibXML::NodeList object contains an ordered list of nodes, as
detailed by the W3C DOM documentation of Node Lists.
=head1 SYNOPSIS
my $results = $dom->findnodes('//somepath');
foreach my $context ($results->get_nodelist) {
my $newresults = $context->findnodes('./other/element');
...
}
=head1 API
=head2 new()
You will almost never have to create a new NodeSet object, as it is all
done for you by XPath.
=head2 get_nodelist()
Returns a list of nodes, the contents of the node list, as a perl list.
=head2 string_value()
Returns the string-value of the first node in the list.
See the XPath specification for what "string-value" means.
=head2 to_literal()
Returns the concatenation of all the string-values of all
the nodes in the list.
=head2 get_node($pos)
Returns the node at $pos. The node position in XPath is based at 1, not 0.
=head2 size()
Returns the number of nodes in the NodeSet.
=head2 pop()
Equivalent to perl's pop function.
=head2 push(@nodes)
Equivalent to perl's push function.
=head2 append($nodelist)
Given a nodelist, appends the list of nodes in $nodelist to the end of the
current list.
=head2 shift()
Equivalent to perl's shift function.
=head2 unshift(@nodes)
Equivalent to perl's unshift function.
=head2 prepend($nodeset)
Given a nodelist, prepends the list of nodes in $nodelist to the front of
the current list.
=head2 iterator()
Will return a new nodelist iterator for the current nodelist. A
nodelist iterator is usefull if more complex nodelist processing is
needed.
=cut
PK @[.Co Co LibXML/ErrNo.pmnu W+A # $Id: ErrNo.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $
#
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::ErrNo;
use strict;
use vars qw($VERSION);
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
use constant ERR_OK => 0;
use constant ERR_INTERNAL_ERROR => 1;
use constant ERR_NO_MEMORY => 2;
use constant ERR_DOCUMENT_START => 3;
use constant ERR_DOCUMENT_EMPTY => 4;
use constant ERR_DOCUMENT_END => 5;
use constant ERR_INVALID_HEX_CHARREF => 6;
use constant ERR_INVALID_DEC_CHARREF => 7;
use constant ERR_INVALID_CHARREF => 8;
use constant ERR_INVALID_CHAR => 9;
use constant ERR_CHARREF_AT_EOF => 10;
use constant ERR_CHARREF_IN_PROLOG => 11;
use constant ERR_CHARREF_IN_EPILOG => 12;
use constant ERR_CHARREF_IN_DTD => 13;
use constant ERR_ENTITYREF_AT_EOF => 14;
use constant ERR_ENTITYREF_IN_PROLOG => 15;
use constant ERR_ENTITYREF_IN_EPILOG => 16;
use constant ERR_ENTITYREF_IN_DTD => 17;
use constant ERR_PEREF_AT_EOF => 18;
use constant ERR_PEREF_IN_PROLOG => 19;
use constant ERR_PEREF_IN_EPILOG => 20;
use constant ERR_PEREF_IN_INT_SUBSET => 21;
use constant ERR_ENTITYREF_NO_NAME => 22;
use constant ERR_ENTITYREF_SEMICOL_MISSING => 23;
use constant ERR_PEREF_NO_NAME => 24;
use constant ERR_PEREF_SEMICOL_MISSING => 25;
use constant ERR_UNDECLARED_ENTITY => 26;
use constant WAR_UNDECLARED_ENTITY => 27;
use constant ERR_UNPARSED_ENTITY => 28;
use constant ERR_ENTITY_IS_EXTERNAL => 29;
use constant ERR_ENTITY_IS_PARAMETER => 30;
use constant ERR_UNKNOWN_ENCODING => 31;
use constant ERR_UNSUPPORTED_ENCODING => 32;
use constant ERR_STRING_NOT_STARTED => 33;
use constant ERR_STRING_NOT_CLOSED => 34;
use constant ERR_NS_DECL_ERROR => 35;
use constant ERR_ENTITY_NOT_STARTED => 36;
use constant ERR_ENTITY_NOT_FINISHED => 37;
use constant ERR_LT_IN_ATTRIBUTE => 38;
use constant ERR_ATTRIBUTE_NOT_STARTED => 39;
use constant ERR_ATTRIBUTE_NOT_FINISHED => 40;
use constant ERR_ATTRIBUTE_WITHOUT_VALUE => 41;
use constant ERR_ATTRIBUTE_REDEFINED => 42;
use constant ERR_LITERAL_NOT_STARTED => 43;
use constant ERR_LITERAL_NOT_FINISHED => 44;
use constant ERR_COMMENT_NOT_FINISHED => 45;
use constant ERR_PI_NOT_STARTED => 46;
use constant ERR_PI_NOT_FINISHED => 47;
use constant ERR_NOTATION_NOT_STARTED => 48;
use constant ERR_NOTATION_NOT_FINISHED => 49;
use constant ERR_ATTLIST_NOT_STARTED => 50;
use constant ERR_ATTLIST_NOT_FINISHED => 51;
use constant ERR_MIXED_NOT_STARTED => 52;
use constant ERR_MIXED_NOT_FINISHED => 53;
use constant ERR_ELEMCONTENT_NOT_STARTED => 54;
use constant ERR_ELEMCONTENT_NOT_FINISHED => 55;
use constant ERR_XMLDECL_NOT_STARTED => 56;
use constant ERR_XMLDECL_NOT_FINISHED => 57;
use constant ERR_CONDSEC_NOT_STARTED => 58;
use constant ERR_CONDSEC_NOT_FINISHED => 59;
use constant ERR_EXT_SUBSET_NOT_FINISHED => 60;
use constant ERR_DOCTYPE_NOT_FINISHED => 61;
use constant ERR_MISPLACED_CDATA_END => 62;
use constant ERR_CDATA_NOT_FINISHED => 63;
use constant ERR_RESERVED_XML_NAME => 64;
use constant ERR_SPACE_REQUIRED => 65;
use constant ERR_SEPARATOR_REQUIRED => 66;
use constant ERR_NMTOKEN_REQUIRED => 67;
use constant ERR_NAME_REQUIRED => 68;
use constant ERR_PCDATA_REQUIRED => 69;
use constant ERR_URI_REQUIRED => 70;
use constant ERR_PUBID_REQUIRED => 71;
use constant ERR_LT_REQUIRED => 72;
use constant ERR_GT_REQUIRED => 73;
use constant ERR_LTSLASH_REQUIRED => 74;
use constant ERR_EQUAL_REQUIRED => 75;
use constant ERR_TAG_NAME_MISMATCH => 76;
use constant ERR_TAG_NOT_FINISHED => 77;
use constant ERR_STANDALONE_VALUE => 78;
use constant ERR_ENCODING_NAME => 79;
use constant ERR_HYPHEN_IN_COMMENT => 80;
use constant ERR_INVALID_ENCODING => 81;
use constant ERR_EXT_ENTITY_STANDALONE => 82;
use constant ERR_CONDSEC_INVALID => 83;
use constant ERR_VALUE_REQUIRED => 84;
use constant ERR_NOT_WELL_BALANCED => 85;
use constant ERR_EXTRA_CONTENT => 86;
use constant ERR_ENTITY_CHAR_ERROR => 87;
use constant ERR_ENTITY_PE_INTERNAL => 88;
use constant ERR_ENTITY_LOOP => 89;
use constant ERR_ENTITY_BOUNDARY => 90;
use constant ERR_INVALID_URI => 91;
use constant ERR_URI_FRAGMENT => 92;
use constant WAR_CATALOG_PI => 93;
use constant ERR_NO_DTD => 94;
use constant ERR_CONDSEC_INVALID_KEYWORD => 95;
use constant ERR_VERSION_MISSING => 96;
use constant WAR_UNKNOWN_VERSION => 97;
use constant WAR_LANG_VALUE => 98;
use constant WAR_NS_URI => 99;
use constant WAR_NS_URI_RELATIVE => 100;
use constant NS_ERR_XML_NAMESPACE => 200;
use constant NS_ERR_UNDEFINED_NAMESPACE => 201;
use constant NS_ERR_QNAME => 202;
use constant NS_ERR_ATTRIBUTE_REDEFINED => 203;
use constant DTD_ATTRIBUTE_DEFAULT => 500;
use constant DTD_ATTRIBUTE_REDEFINED => 501;
use constant DTD_ATTRIBUTE_VALUE => 502;
use constant DTD_CONTENT_ERROR => 503;
use constant DTD_CONTENT_MODEL => 504;
use constant DTD_CONTENT_NOT_DETERMINIST => 505;
use constant DTD_DIFFERENT_PREFIX => 506;
use constant DTD_ELEM_DEFAULT_NAMESPACE => 507;
use constant DTD_ELEM_NAMESPACE => 508;
use constant DTD_ELEM_REDEFINED => 509;
use constant DTD_EMPTY_NOTATION => 510;
use constant DTD_ENTITY_TYPE => 511;
use constant DTD_ID_FIXED => 512;
use constant DTD_ID_REDEFINED => 513;
use constant DTD_ID_SUBSET => 514;
use constant DTD_INVALID_CHILD => 515;
use constant DTD_INVALID_DEFAULT => 516;
use constant DTD_LOAD_ERROR => 517;
use constant DTD_MISSING_ATTRIBUTE => 518;
use constant DTD_MIXED_CORRUPT => 519;
use constant DTD_MULTIPLE_ID => 520;
use constant DTD_NO_DOC => 521;
use constant DTD_NO_DTD => 522;
use constant DTD_NO_ELEM_NAME => 523;
use constant DTD_NO_PREFIX => 524;
use constant DTD_NO_ROOT => 525;
use constant DTD_NOTATION_REDEFINED => 526;
use constant DTD_NOTATION_VALUE => 527;
use constant DTD_NOT_EMPTY => 528;
use constant DTD_NOT_PCDATA => 529;
use constant DTD_NOT_STANDALONE => 530;
use constant DTD_ROOT_NAME => 531;
use constant DTD_STANDALONE_WHITE_SPACE => 532;
use constant DTD_UNKNOWN_ATTRIBUTE => 533;
use constant DTD_UNKNOWN_ELEM => 534;
use constant DTD_UNKNOWN_ENTITY => 535;
use constant DTD_UNKNOWN_ID => 536;
use constant DTD_UNKNOWN_NOTATION => 537;
use constant HTML_STRUCURE_ERROR => 800;
use constant HTML_UNKNOWN_TAG => 801;
use constant RNGP_ANYNAME_ATTR_ANCESTOR => 1000;
use constant RNGP_ATTR_CONFLICT => 1001;
use constant RNGP_ATTRIBUTE_CHILDREN => 1002;
use constant RNGP_ATTRIBUTE_CONTENT => 1003;
use constant RNGP_ATTRIBUTE_EMPTY => 1004;
use constant RNGP_ATTRIBUTE_NOOP => 1005;
use constant RNGP_CHOICE_CONTENT => 1006;
use constant RNGP_CHOICE_EMPTY => 1007;
use constant RNGP_CREATE_FAILURE => 1008;
use constant RNGP_DATA_CONTENT => 1009;
use constant RNGP_DEF_CHOICE_AND_INTERLEAVE => 1010;
use constant RNGP_DEFINE_CREATE_FAILED => 1011;
use constant RNGP_DEFINE_EMPTY => 1012;
use constant RNGP_DEFINE_MISSING => 1013;
use constant RNGP_DEFINE_NAME_MISSING => 1014;
use constant RNGP_ELEM_CONTENT_EMPTY => 1015;
use constant RNGP_ELEM_CONTENT_ERROR => 1016;
use constant RNGP_ELEMENT_EMPTY => 1017;
use constant RNGP_ELEMENT_CONTENT => 1018;
use constant RNGP_ELEMENT_NAME => 1019;
use constant RNGP_ELEMENT_NO_CONTENT => 1020;
use constant RNGP_ELEM_TEXT_CONFLICT => 1021;
use constant RNGP_EMPTY => 1022;
use constant RNGP_EMPTY_CONSTRUCT => 1023;
use constant RNGP_EMPTY_CONTENT => 1024;
use constant RNGP_EMPTY_NOT_EMPTY => 1025;
use constant RNGP_ERROR_TYPE_LIB => 1026;
use constant RNGP_EXCEPT_EMPTY => 1027;
use constant RNGP_EXCEPT_MISSING => 1028;
use constant RNGP_EXCEPT_MULTIPLE => 1029;
use constant RNGP_EXCEPT_NO_CONTENT => 1030;
use constant RNGP_EXTERNALREF_EMTPY => 1031;
use constant RNGP_EXTERNAL_REF_FAILURE => 1032;
use constant RNGP_EXTERNALREF_RECURSE => 1033;
use constant RNGP_FORBIDDEN_ATTRIBUTE => 1034;
use constant RNGP_FOREIGN_ELEMENT => 1035;
use constant RNGP_GRAMMAR_CONTENT => 1036;
use constant RNGP_GRAMMAR_EMPTY => 1037;
use constant RNGP_GRAMMAR_MISSING => 1038;
use constant RNGP_GRAMMAR_NO_START => 1039;
use constant RNGP_GROUP_ATTR_CONFLICT => 1040;
use constant RNGP_HREF_ERROR => 1041;
use constant RNGP_INCLUDE_EMPTY => 1042;
use constant RNGP_INCLUDE_FAILURE => 1043;
use constant RNGP_INCLUDE_RECURSE => 1044;
use constant RNGP_INTERLEAVE_ADD => 1045;
use constant RNGP_INTERLEAVE_CREATE_FAILED => 1046;
use constant RNGP_INTERLEAVE_EMPTY => 1047;
use constant RNGP_INTERLEAVE_NO_CONTENT => 1048;
use constant RNGP_INVALID_DEFINE_NAME => 1049;
use constant RNGP_INVALID_URI => 1050;
use constant RNGP_INVALID_VALUE => 1051;
use constant RNGP_MISSING_HREF => 1052;
use constant RNGP_NAME_MISSING => 1053;
use constant RNGP_NEED_COMBINE => 1054;
use constant RNGP_NOTALLOWED_NOT_EMPTY => 1055;
use constant RNGP_NSNAME_ATTR_ANCESTOR => 1056;
use constant RNGP_NSNAME_NO_NS => 1057;
use constant RNGP_PARAM_FORBIDDEN => 1058;
use constant RNGP_PARAM_NAME_MISSING => 1059;
use constant RNGP_PARENTREF_CREATE_FAILED => 1060;
use constant RNGP_PARENTREF_NAME_INVALID => 1061;
use constant RNGP_PARENTREF_NO_NAME => 1062;
use constant RNGP_PARENTREF_NO_PARENT => 1063;
use constant RNGP_PARENTREF_NOT_EMPTY => 1064;
use constant RNGP_PARSE_ERROR => 1065;
use constant RNGP_PAT_ANYNAME_EXCEPT_ANYNAME => 1066;
use constant RNGP_PAT_ATTR_ATTR => 1067;
use constant RNGP_PAT_ATTR_ELEM => 1068;
use constant RNGP_PAT_DATA_EXCEPT_ATTR => 1069;
use constant RNGP_PAT_DATA_EXCEPT_ELEM => 1070;
use constant RNGP_PAT_DATA_EXCEPT_EMPTY => 1071;
use constant RNGP_PAT_DATA_EXCEPT_GROUP => 1072;
use constant RNGP_PAT_DATA_EXCEPT_INTERLEAVE => 1073;
use constant RNGP_PAT_DATA_EXCEPT_LIST => 1074;
use constant RNGP_PAT_DATA_EXCEPT_ONEMORE => 1075;
use constant RNGP_PAT_DATA_EXCEPT_REF => 1076;
use constant RNGP_PAT_DATA_EXCEPT_TEXT => 1077;
use constant RNGP_PAT_LIST_ATTR => 1078;
use constant RNGP_PAT_LIST_ELEM => 1079;
use constant RNGP_PAT_LIST_INTERLEAVE => 1080;
use constant RNGP_PAT_LIST_LIST => 1081;
use constant RNGP_PAT_LIST_REF => 1082;
use constant RNGP_PAT_LIST_TEXT => 1083;
use constant RNGP_PAT_NSNAME_EXCEPT_ANYNAME => 1084;
use constant RNGP_PAT_NSNAME_EXCEPT_NSNAME => 1085;
use constant RNGP_PAT_ONEMORE_GROUP_ATTR => 1086;
use constant RNGP_PAT_ONEMORE_INTERLEAVE_ATTR => 1087;
use constant RNGP_PAT_START_ATTR => 1088;
use constant RNGP_PAT_START_DATA => 1089;
use constant RNGP_PAT_START_EMPTY => 1090;
use constant RNGP_PAT_START_GROUP => 1091;
use constant RNGP_PAT_START_INTERLEAVE => 1092;
use constant RNGP_PAT_START_LIST => 1093;
use constant RNGP_PAT_START_ONEMORE => 1094;
use constant RNGP_PAT_START_TEXT => 1095;
use constant RNGP_PAT_START_VALUE => 1096;
use constant RNGP_PREFIX_UNDEFINED => 1097;
use constant RNGP_REF_CREATE_FAILED => 1098;
use constant RNGP_REF_CYCLE => 1099;
use constant RNGP_REF_NAME_INVALID => 1100;
use constant RNGP_REF_NO_DEF => 1101;
use constant RNGP_REF_NO_NAME => 1102;
use constant RNGP_REF_NOT_EMPTY => 1103;
use constant RNGP_START_CHOICE_AND_INTERLEAVE => 1104;
use constant RNGP_START_CONTENT => 1105;
use constant RNGP_START_EMPTY => 1106;
use constant RNGP_START_MISSING => 1107;
use constant RNGP_TEXT_EXPECTED => 1108;
use constant RNGP_TEXT_HAS_CHILD => 1109;
use constant RNGP_TYPE_MISSING => 1110;
use constant RNGP_TYPE_NOT_FOUND => 1111;
use constant RNGP_TYPE_VALUE => 1112;
use constant RNGP_UNKNOWN_ATTRIBUTE => 1113;
use constant RNGP_UNKNOWN_COMBINE => 1114;
use constant RNGP_UNKNOWN_CONSTRUCT => 1115;
use constant RNGP_UNKNOWN_TYPE_LIB => 1116;
use constant RNGP_URI_FRAGMENT => 1117;
use constant RNGP_URI_NOT_ABSOLUTE => 1118;
use constant RNGP_VALUE_EMPTY => 1119;
use constant RNGP_VALUE_NO_CONTENT => 1120;
use constant RNGP_XMLNS_NAME => 1121;
use constant RNGP_XML_NS => 1122;
use constant XPATH_EXPRESSION_OK => 1200;
use constant XPATH_NUMBER_ERROR => 1201;
use constant XPATH_UNFINISHED_LITERAL_ERROR => 1202;
use constant XPATH_START_LITERAL_ERROR => 1203;
use constant XPATH_VARIABLE_REF_ERROR => 1204;
use constant XPATH_UNDEF_VARIABLE_ERROR => 1205;
use constant XPATH_INVALID_PREDICATE_ERROR => 1206;
use constant XPATH_EXPR_ERROR => 1207;
use constant XPATH_UNCLOSED_ERROR => 1208;
use constant XPATH_UNKNOWN_FUNC_ERROR => 1209;
use constant XPATH_INVALID_OPERAND => 1210;
use constant XPATH_INVALID_TYPE => 1211;
use constant XPATH_INVALID_ARITY => 1212;
use constant XPATH_INVALID_CTXT_SIZE => 1213;
use constant XPATH_INVALID_CTXT_POSITION => 1214;
use constant XPATH_MEMORY_ERROR => 1215;
use constant XPTR_SYNTAX_ERROR => 1216;
use constant XPTR_RESOURCE_ERROR => 1217;
use constant XPTR_SUB_RESOURCE_ERROR => 1218;
use constant XPATH_UNDEF_PREFIX_ERROR => 1219;
use constant XPATH_ENCODING_ERROR => 1220;
use constant XPATH_INVALID_CHAR_ERROR => 1221;
use constant TREE_INVALID_HEX => 1300;
use constant TREE_INVALID_DEC => 1301;
use constant TREE_UNTERMINATED_ENTITY => 1302;
use constant SAVE_NOT_UTF8 => 1400;
use constant SAVE_CHAR_INVALID => 1401;
use constant SAVE_NO_DOCTYPE => 1402;
use constant SAVE_UNKNOWN_ENCODING => 1403;
use constant REGEXP_COMPILE_ERROR => 1450;
use constant IO_UNKNOWN => 1500;
use constant IO_EACCES => 1501;
use constant IO_EAGAIN => 1502;
use constant IO_EBADF => 1503;
use constant IO_EBADMSG => 1504;
use constant IO_EBUSY => 1505;
use constant IO_ECANCELED => 1506;
use constant IO_ECHILD => 1507;
use constant IO_EDEADLK => 1508;
use constant IO_EDOM => 1509;
use constant IO_EEXIST => 1510;
use constant IO_EFAULT => 1511;
use constant IO_EFBIG => 1512;
use constant IO_EINPROGRESS => 1513;
use constant IO_EINTR => 1514;
use constant IO_EINVAL => 1515;
use constant IO_EIO => 1516;
use constant IO_EISDIR => 1517;
use constant IO_EMFILE => 1518;
use constant IO_EMLINK => 1519;
use constant IO_EMSGSIZE => 1520;
use constant IO_ENAMETOOLONG => 1521;
use constant IO_ENFILE => 1522;
use constant IO_ENODEV => 1523;
use constant IO_ENOENT => 1524;
use constant IO_ENOEXEC => 1525;
use constant IO_ENOLCK => 1526;
use constant IO_ENOMEM => 1527;
use constant IO_ENOSPC => 1528;
use constant IO_ENOSYS => 1529;
use constant IO_ENOTDIR => 1530;
use constant IO_ENOTEMPTY => 1531;
use constant IO_ENOTSUP => 1532;
use constant IO_ENOTTY => 1533;
use constant IO_ENXIO => 1534;
use constant IO_EPERM => 1535;
use constant IO_EPIPE => 1536;
use constant IO_ERANGE => 1537;
use constant IO_EROFS => 1538;
use constant IO_ESPIPE => 1539;
use constant IO_ESRCH => 1540;
use constant IO_ETIMEDOUT => 1541;
use constant IO_EXDEV => 1542;
use constant IO_NETWORK_ATTEMPT => 1543;
use constant IO_ENCODER => 1544;
use constant IO_FLUSH => 1545;
use constant IO_WRITE => 1546;
use constant IO_NO_INPUT => 1547;
use constant IO_BUFFER_FULL => 1548;
use constant IO_LOAD_ERROR => 1549;
use constant IO_ENOTSOCK => 1550;
use constant IO_EISCONN => 1551;
use constant IO_ECONNREFUSED => 1552;
use constant IO_ENETUNREACH => 1553;
use constant IO_EADDRINUSE => 1554;
use constant IO_EALREADY => 1555;
use constant IO_EAFNOSUPPORT => 1556;
use constant XINCLUDE_RECURSION => 1600;
use constant XINCLUDE_PARSE_VALUE => 1601;
use constant XINCLUDE_ENTITY_DEF_MISMATCH => 1602;
use constant XINCLUDE_NO_HREF => 1603;
use constant XINCLUDE_NO_FALLBACK => 1604;
use constant XINCLUDE_HREF_URI => 1605;
use constant XINCLUDE_TEXT_FRAGMENT => 1606;
use constant XINCLUDE_TEXT_DOCUMENT => 1607;
use constant XINCLUDE_INVALID_CHAR => 1608;
use constant XINCLUDE_BUILD_FAILED => 1609;
use constant XINCLUDE_UNKNOWN_ENCODING => 1610;
use constant XINCLUDE_MULTIPLE_ROOT => 1611;
use constant XINCLUDE_XPTR_FAILED => 1612;
use constant XINCLUDE_XPTR_RESULT => 1613;
use constant XINCLUDE_INCLUDE_IN_INCLUDE => 1614;
use constant XINCLUDE_FALLBACKS_IN_INCLUDE => 1615;
use constant XINCLUDE_FALLBACK_NOT_IN_INCLUDE => 1616;
use constant CATALOG_MISSING_ATTR => 1650;
use constant CATALOG_ENTRY_BROKEN => 1651;
use constant CATALOG_PREFER_VALUE => 1652;
use constant CATALOG_NOT_CATALOG => 1653;
use constant CATALOG_RECURSION => 1654;
use constant SCHEMAP_PREFIX_UNDEFINED => 1700;
use constant SCHEMAP_ATTRFORMDEFAULT_VALUE => 1701;
use constant SCHEMAP_ATTRGRP_NONAME_NOREF => 1702;
use constant SCHEMAP_ATTR_NONAME_NOREF => 1703;
use constant SCHEMAP_COMPLEXTYPE_NONAME_NOREF => 1704;
use constant SCHEMAP_ELEMFORMDEFAULT_VALUE => 1705;
use constant SCHEMAP_ELEM_NONAME_NOREF => 1706;
use constant SCHEMAP_EXTENSION_NO_BASE => 1707;
use constant SCHEMAP_FACET_NO_VALUE => 1708;
use constant SCHEMAP_FAILED_BUILD_IMPORT => 1709;
use constant SCHEMAP_GROUP_NONAME_NOREF => 1710;
use constant SCHEMAP_IMPORT_NAMESPACE_NOT_URI => 1711;
use constant SCHEMAP_IMPORT_REDEFINE_NSNAME => 1712;
use constant SCHEMAP_IMPORT_SCHEMA_NOT_URI => 1713;
use constant SCHEMAP_INVALID_BOOLEAN => 1714;
use constant SCHEMAP_INVALID_ENUM => 1715;
use constant SCHEMAP_INVALID_FACET => 1716;
use constant SCHEMAP_INVALID_FACET_VALUE => 1717;
use constant SCHEMAP_INVALID_MAXOCCURS => 1718;
use constant SCHEMAP_INVALID_MINOCCURS => 1719;
use constant SCHEMAP_INVALID_REF_AND_SUBTYPE => 1720;
use constant SCHEMAP_INVALID_WHITE_SPACE => 1721;
use constant SCHEMAP_NOATTR_NOREF => 1722;
use constant SCHEMAP_NOTATION_NO_NAME => 1723;
use constant SCHEMAP_NOTYPE_NOREF => 1724;
use constant SCHEMAP_REF_AND_SUBTYPE => 1725;
use constant SCHEMAP_RESTRICTION_NONAME_NOREF => 1726;
use constant SCHEMAP_SIMPLETYPE_NONAME => 1727;
use constant SCHEMAP_TYPE_AND_SUBTYPE => 1728;
use constant SCHEMAP_UNKNOWN_ALL_CHILD => 1729;
use constant SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD => 1730;
use constant SCHEMAP_UNKNOWN_ATTR_CHILD => 1731;
use constant SCHEMAP_UNKNOWN_ATTRGRP_CHILD => 1732;
use constant SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP => 1733;
use constant SCHEMAP_UNKNOWN_BASE_TYPE => 1734;
use constant SCHEMAP_UNKNOWN_CHOICE_CHILD => 1735;
use constant SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD => 1736;
use constant SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD => 1737;
use constant SCHEMAP_UNKNOWN_ELEM_CHILD => 1738;
use constant SCHEMAP_UNKNOWN_EXTENSION_CHILD => 1739;
use constant SCHEMAP_UNKNOWN_FACET_CHILD => 1740;
use constant SCHEMAP_UNKNOWN_FACET_TYPE => 1741;
use constant SCHEMAP_UNKNOWN_GROUP_CHILD => 1742;
use constant SCHEMAP_UNKNOWN_IMPORT_CHILD => 1743;
use constant SCHEMAP_UNKNOWN_LIST_CHILD => 1744;
use constant SCHEMAP_UNKNOWN_NOTATION_CHILD => 1745;
use constant SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD => 1746;
use constant SCHEMAP_UNKNOWN_REF => 1747;
use constant SCHEMAP_UNKNOWN_RESTRICTION_CHILD => 1748;
use constant SCHEMAP_UNKNOWN_SCHEMAS_CHILD => 1749;
use constant SCHEMAP_UNKNOWN_SEQUENCE_CHILD => 1750;
use constant SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD => 1751;
use constant SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD => 1752;
use constant SCHEMAP_UNKNOWN_TYPE => 1753;
use constant SCHEMAP_UNKNOWN_UNION_CHILD => 1754;
use constant SCHEMAP_ELEM_DEFAULT_FIXED => 1755;
use constant SCHEMAP_REGEXP_INVALID => 1756;
use constant SCHEMAP_FAILED_LOAD => 1756;
use constant SCHEMAP_NOTHING_TO_PARSE => 1757;
use constant SCHEMAP_NOROOT => 1758;
use constant SCHEMAP_REDEFINED_GROUP => 1759;
use constant SCHEMAP_REDEFINED_TYPE => 1760;
use constant SCHEMAP_REDEFINED_ELEMENT => 1761;
use constant SCHEMAP_REDEFINED_ATTRGROUP => 1762;
use constant SCHEMAP_REDEFINED_ATTR => 1763;
use constant SCHEMAP_REDEFINED_NOTATION => 1764;
use constant SCHEMAP_FAILED_PARSE => 1765;
use constant SCHEMAV_NOROOT => 1800;
use constant SCHEMAV_UNDECLAREDELEM => 1801;
use constant SCHEMAV_NOTTOPLEVEL => 1802;
use constant SCHEMAV_MISSING => 1803;
use constant SCHEMAV_WRONGELEM => 1804;
use constant SCHEMAV_NOTYPE => 1805;
use constant SCHEMAV_NOROLLBACK => 1806;
use constant SCHEMAV_ISABSTRACT => 1807;
use constant SCHEMAV_NOTEMPTY => 1808;
use constant SCHEMAV_ELEMCONT => 1809;
use constant SCHEMAV_HAVEDEFAULT => 1810;
use constant SCHEMAV_NOTNILLABLE => 1811;
use constant SCHEMAV_EXTRACONTENT => 1812;
use constant SCHEMAV_INVALIDATTR => 1813;
use constant SCHEMAV_INVALIDELEM => 1814;
use constant SCHEMAV_NOTDETERMINIST => 1815;
use constant SCHEMAV_CONSTRUCT => 1816;
use constant SCHEMAV_INTERNAL => 1817;
use constant SCHEMAV_NOTSIMPLE => 1818;
use constant SCHEMAV_ATTRUNKNOWN => 1819;
use constant SCHEMAV_ATTRINVALID => 1820;
use constant SCHEMAV_VALUE => 1821;
use constant SCHEMAV_FACET => 1822;
use constant XPTR_UNKNOWN_SCHEME => 1900;
use constant XPTR_CHILDSEQ_START => 1901;
use constant XPTR_EVAL_FAILED => 1902;
use constant XPTR_EXTRA_OBJECTS => 1903;
use constant C14N_CREATE_CTXT => 1950;
use constant C14N_REQUIRES_UTF8 => 1951;
use constant C14N_CREATE_STACK => 1952;
use constant C14N_INVALID_NODE => 1953;
use constant FTP_PASV_ANSWER => 2000;
use constant FTP_EPSV_ANSWER => 2001;
use constant FTP_ACCNT => 2002;
use constant HTTP_URL_SYNTAX => 2020;
use constant HTTP_USE_IP => 2021;
use constant HTTP_UNKNOWN_HOST => 2022;
1;
PK @[cY'
LibXML/Common.podnu W+A =head1 NAME
XML::LibXML::Common - Constants and Character Encoding Routines
=head1 SYNOPSIS
use XML::LibXML::Common;
$encodedstring = encodeToUTF8( $name_of_encoding, $sting_to_encode );
$decodedstring = decodeFromUTF8($name_of_encoding, $string_to_decode );
=head1 DESCRIPTION
XML::LibXML::Common defines constants for all node types and provides interface
to libxml2 charset conversion functions.
Since XML::LibXML use their own node type definitions, one may want to use
XML::LibXML::Common in its compatibility mode:
=head2 Exporter TAGS
use XML::LibXML::Common qw(:libxml);
C<<<<<< :libxml >>>>>> tag will use the XML::LibXML Compatibility mode, which defines the old 'XML_'
node-type definitions.
use XML::LibXML::Common qw(:gdome);
C<<<<<< :gdome >>>>>> tag will use the XML::GDOME Compatibility mode, which defines the old 'GDOME_'
node-type definitions.
use XML::LibXML::Common qw(:w3c);
This uses the nodetype definition names as specified for DOM.
use XML::LibXML::Common qw(:encoding);
This tag can be used to export only the charset encoding functions of
XML::LibXML::Common.
=head2 Exports
By default the W3 definitions as defined in the DOM specifications and the
encoding functions are exported by XML::LibXML::Common.
=head2 Encoding functions
To encode or decode a string to or from UTF-8, XML::LibXML::Common exports two
functions, which provide interfact to the encoding support in C<<<<<< libxml2 >>>>>>. Which encodings are supported by these functions depends on how C<<<<<< libxml2 >>>>>> was compiled. UTF-16 is always supported and on most installations, ISO
encodings are supported as well.
This interface was useful for older versions of Perl. Since Perl >= 5.8
provides similar funcions via the C<<<<<< Encode >>>>>> module, it is probably a good idea to use those instead.
=over 4
=item encodeToUTF8
$encodedstring = encodeToUTF8( $name_of_encoding, $sting_to_encode );
The function will convert a byte string from the specified encoding to an UTF-8
encoded character string.
=item decodeToUTF8
$decodedstring = decodeFromUTF8($name_of_encoding, $string_to_decode );
This function converts an UTF-8 encoded character string to a specified
encoding. Note that the conversion can raise an error if the given string
contains characters that cannot be represented in the target encoding.
=back
Both these functions report their errors on the standard error. If an error
occours the function will croak(). To catch the error information it is
required to call the encoding function from within an eval block in order to
prevent the entire script from being stopped on encoding error.
=head2 A note on history
Before XML::LibXML 1.70, this class was available as a separate CPAN
distribution, intended to provide functionality shared between XML::LibXML,
XML::GDOME, and possibly other modules. Since there seems to be no progress in
this direction, we decided to merge XML::LibXML::Common 0.13 and XML::LibXML
1.70 to one CPAN distribution.
The merge also naturally eliminates a practical and urgent problem experienced
by many XML::LibXML users on certain platforms, namely misterious misbehavior
of XML::LibXML occurring if the installed (often pre-packaged) version of
XML::LibXML::Common was compiled against an older version of libxml2 than
XML::LibXML.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[·]Nw w LibXML/Dtd.podnu W+A =head1 NAME
XML::LibXML::Dtd - XML::LibXML DTD Handling
=head1 SYNOPSIS
use XML::LibXML;
$dtd = XML::LibXML::Dtd->new($public_id, $system_id);
$dtd = XML::LibXML::Dtd->parse_string($dtd_str);
$publicId = $dtd->getName();
$publicId = $dtd->publicId();
$systemId = $dtd->systemId();
=head1 DESCRIPTION
This class holds a DTD. You may parse a DTD from either a string, or from an
external SYSTEM identifier.
No support is available as yet for parsing from a filehandle.
XML::LibXML::Dtd is a sub-class of L<<<<<< XML::LibXML::Node >>>>>>, so all the methods available to nodes (particularly toString()) are available
to Dtd objects.
=head1 METHODS
=over 4
=item new
$dtd = XML::LibXML::Dtd->new($public_id, $system_id);
Parse a DTD from the system identifier, and return a DTD object that you can
pass to $doc->is_valid() or $doc->validate().
my $dtd = XML::LibXML::Dtd->new(
"SOME // Public / ID / 1.0",
"test.dtd"
);
my $doc = XML::LibXML->new->parse_file("test.xml");
$doc->validate($dtd);
=item parse_string
$dtd = XML::LibXML::Dtd->parse_string($dtd_str);
The same as new() above, except you can parse a DTD from a string. Note that
parsing from string may fail if the DTD contains external parametric-entity
references with relative URLs.
=item getName
$publicId = $dtd->getName();
Returns the name of DTD; i.e., the name immediately following the DOCTYPE
keyword.
=item publicId
$publicId = $dtd->publicId();
Returns the public identifier of the external subset.
=item systemId
$systemId = $dtd->systemId();
Returns the system identifier of the external subset.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[$-= = LibXML/Namespace.podnu W+A =head1 NAME
XML::LibXML::Namespace - XML::LibXML Namespace Implementation
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Namespace nodes are listed here,
# see XML::LibXML::Node manpage for other methods
my $ns = XML::LibXML::Namespace->new($nsURI);
print $ns->nodeName();
print $ns->name();
$localname = $ns->getLocalName();
print $ns->getData();
print $ns->getValue();
print $ns->value();
$known_uri = $ns->getNamespaceURI();
$known_prefix = $ns->getPrefix();
=head1 DESCRIPTION
Namespace nodes are returned by both $element->findnodes('namespace::foo') or
by $node->getNamespaces().
The namespace node API is not part of any current DOM API, and so it is quite
minimal. It should be noted that namespace nodes are I<<<<<< not >>>>>> a sub class of L<<<<<< XML::LibXML::Node >>>>>>, however Namespace nodes act a lot like attribute nodes, and similarly named
methods will return what you would expect if you treated the namespace node as
an attribute. Note that in order to fix several inconsistencies between the API
and the documentation, the behavior of some functions have been changed in
1.64.
=head1 METHODS
=over 4
=item new
my $ns = XML::LibXML::Namespace->new($nsURI);
Creates a new Namespace node. Note that this is not a 'node' as an attribute or
an element node. Therefore you can't do call all L<<<<<< XML::LibXML::Node >>>>>> Functions. All functions available for this node are listed below.
Optionally you can pass the prefix to the namespace constructor. If this second
parameter is omitted you will create a so called default namespace. Note, the
newly created namespace is not bound to any document or node, therefore you
should not expect it to be available in an existing document.
=item declaredURI
Returns the URI for this namespace.
=item declaredPrefix
Returns the prefix for this namespace.
=item nodeName
print $ns->nodeName();
Returns "xmlns:prefix", where prefix is the prefix for this namespace.
=item name
print $ns->name();
Alias for nodeName()
=item getLocalName
$localname = $ns->getLocalName();
Returns the local name of this node as if it were an attribute, that is, the
prefix associated with the namespace.
=item getData
print $ns->getData();
Returns the URI of the namespace, i.e. the value of this node as if it were an
attribute.
=item getValue
print $ns->getValue();
Alias for getData()
=item value
print $ns->value();
Alias for getData()
=item getNamespaceURI
$known_uri = $ns->getNamespaceURI();
Returns the string "http://www.w3.org/2000/xmlns/"
=item getPrefix
$known_prefix = $ns->getPrefix();
Returns the string "xmlns"
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[& LibXML/ErrNo.podnu W+A =head1 NAME
XML::LibXML::ErrNo - Structured Errors
This module is based on xmlerror.h libxml2 C header file. It defines symbolic
constants for all libxml2 error codes. Currently libxml2 uses over 480
different error codes. See also XML::LibXML::Error.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[[ [
LibXML/PI.podnu W+A =head1 NAME
XML::LibXML::PI - XML::LibXML Processing Instructions
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Processing Instruction nodes are listed here,
# see XML::LibXML::Node manpage for other methods
$pinode->setData( $data_string );
$pinode->setData( name=>string_value [...] );
=head1 DESCRIPTION
Processing instructions are implemented with XML::LibXML with read and write
access. The PI data is the PI without the PI target (as specified in XML 1.0
[17]) as a string. This string can be accessed with getData as implemented in L<<<<<< XML::LibXML::Node >>>>>>.
The write access is aware about the fact, that many processing instructions
have attribute like data. Therefore setData() provides besides the DOM spec
conform Interface to pass a set of named parameter. So the code segment
my $pi = $dom->createProcessingInstruction("abc");
$pi->setData(foo=>'bar', foobar=>'foobar');
$dom->appendChild( $pi );
will result the following PI in the DOM:
Which is how it is specified in the DOM specification. This three step
interface creates temporary a node in perl space. This can be avoided while
using the insertProcessingInstruction() method. Instead of the three calls
described above, the call
$dom->insertProcessingInstruction("abc",'foo="bar" foobar="foobar"');
will have the same result as above.
L<<<<<< XML::LibXML::PI >>>>>>'s implementation of setData() documented below differs a bit from the the
standard version as available in L<<<<<< XML::LibXML::Node >>>>>>:
=over 4
=item setData
$pinode->setData( $data_string );
$pinode->setData( name=>string_value [...] );
This method allows to change the content data of a PI. Additionally to the
interface specified for DOM Level2, the method provides a named parameter
interface to set the data. This parameter list is converted into a string
before it is appended to the PI.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[Wʗ LibXML/CDATASection.podnu W+A =head1 NAME
XML::LibXML::CDATASection - XML::LibXML Class for CDATA Sections
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to CDATA nodes are listed here,
# see XML::LibXML::Node manpage for other methods
$node = XML::LibXML::CDATASection( $content );
=head1 DESCRIPTION
This class provides all functions of L<<<<<< XML::LibXML::Text >>>>>>, but for CDATA nodes.
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$node = XML::LibXML::CDATASection( $content );
The constructor is the only provided function for this package. It is required,
because I<<<<<< libxml2 >>>>>> treats the different text node types slightly differently.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[ LibXML/SAX.podnu W+A =head1 NAME
XML::LibXML::SAX - XML::LibXML direct SAX parser
=head1 DESCRIPTION
XML::LibXML provides an interface to libxml2 direct SAX interface. Through this
interface it is possible to generate SAX events directly while parsing a
document. While using the SAX parser XML::LibXML will not create a DOM Document
tree.
Such an interface is useful if very large XML documents have to be processed
and no DOM functions are required. By using this interface it is possible to
read data stored within a XML document directly into the application data
structures without loading the document into memory.
The SAX interface of XML::LibXML is based on the famous XML::SAX interface. It
uses the generic interface as provided by XML::SAX::Base.
Additionally to the generic functions, which are only able to process entire
documents, XML::LibXML::SAX provides I<<<<<< parse_chunk() >>>>>>. This method generates SAX events from well balanced data such as is often
provided by databases.
I<<<<<< NOTE: >>>>>> At the moment XML::LibXML provides only an incomplete interface to libxml2's
native SAX implementation. The current implementation is not tested in
production environment. It may causes significant memory problems or shows
wrong behaviour. If you run into specific problems using this part of
XML::LibXML, let me know.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[ LibXML/Number.pmnu W+A # $Id: Number.pm 785 2009-07-16 14:17:46Z pajas $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Number;
use XML::LibXML::Boolean;
use XML::LibXML::Literal;
use strict;
use vars qw ($VERSION);
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&value,
'0+' => \&value,
'<=>' => \&cmp;
sub new {
my $class = shift;
my $number = shift;
if ($number !~ /^\s*(-\s*)?(\d+(\.\d*)?|\.\d+)\s*$/) {
$number = undef;
}
else {
$number =~ s/\s+//g;
}
bless \$number, $class;
}
sub as_string {
my $self = shift;
defined $$self ? $$self : 'NaN';
}
sub as_xml {
my $self = shift;
return "" . (defined($$self) ? $$self : 'NaN') . "\n";
}
sub value {
my $self = shift;
$$self;
}
sub cmp {
my $self = shift;
my ($other, $swap) = @_;
if ($swap) {
return $other <=> $$self;
}
return $$self <=> $other;
}
sub evaluate {
my $self = shift;
$self;
}
sub to_boolean {
my $self = shift;
return $$self ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False;
}
sub to_literal { XML::LibXML::Literal->new($_[0]->as_string); }
sub to_number { $_[0]; }
sub string_value { return $_[0]->value }
1;
__END__
=head1 NAME
XML::LibXML::Number - Simple numeric values.
=head1 DESCRIPTION
This class holds simple numeric values. It doesn't support -0, +/- Infinity,
or NaN, as the XPath spec says it should, but I'm not hurting anyone I don't think.
=head1 API
=head2 new($num)
Creates a new XML::LibXML::Number object, with the value in $num. Does some
rudimentary numeric checking on $num to ensure it actually is a number.
=head2 value()
Also as overloaded stringification. Returns the numeric value held.
=cut
PK @[Jn% % LibXML/InputCallback.podnu W+A =head1 NAME
XML::LibXML::InputCallback - XML::LibXML Class for Input Callbacks
=head1 SYNOPSIS
use XML::LibXML;
=head1 DESCRIPTION
You may get unexpected results if you are trying to load external documents
during libxml2 parsing if the location of the resource is not a HTTP, FTP or
relative location but a absolute path for example. To get around this
limitation, you may add your own input handler to open, read and close
particular types of locations or URI classes. Using this input callback
handlers, you can handle your own custom URI schemes for example.
The input callbacks are used whenever LibXML has to get something other than
externally parsed entities from somewhere. They are implemented using a
callback stack on the Perl layer in analogy to libxml2's native callback stack.
The XML::LibXML::InputCallback class transparently registers the input
callbacks for the libxml2's parser processes.
=head2 How does XML::LibXML::InputCallback work?
The libxml2 library offers a callback implementation as global functions only.
To work-around the troubles resulting in having only global callbacks - for
example, if the same global callback stack is manipulated by different
applications running together in a single Apache Web-server environment -,
XML::LibXML::InputCallback comes with a object-oriented and a function-oriented
part.
Using the function-oriented part the global callback stack of libxml2 can be
manipulated. Those functions can be used as interface to the callbacks on the
C- and XS Layer. At the object-oriented part, operations for working with the
"pseudo-localized" callback stack are implemented. Currently, you can register
and de-register callbacks on the Perl layer and initialize them on a per parser
basis.
=head3 Callback Groups
The libxml2 input callbacks come in groups. One group contains a URI matcher (I<<<<<< match >>>>>>), a data stream constructor (I<<<<<< open >>>>>>), a data stream reader (I<<<<<< read >>>>>>), and a data stream destructor (I<<<<<< close >>>>>>). The callbacks can be manipulated on a per group basis only.
=head3 The Parser Process
The parser process work on a XML data stream, along which, links to other
resources can be embedded. This can be links to external DTDs or XIncludes for
example. Those resources are identified by URIs. The callback implementation of
libxml2 assumes that one callback group can handle a certain amount of URIs and
a certain URI scheme. Per default, callback handlers for I<<<<<< file://* >>>>>>, I<<<<<< file:://*.gz >>>>>>, I<<<<<< http://* >>>>>> and I<<<<<< ftp://* >>>>>> are registered.
Callback groups in the callback stack are processed from top to bottom, meaning
that callback groups registered later will be processed before the earlier
registered ones.
While parsing the data stream, the libxml2 parser checks if a registered
callback group will handle a URI - if they will not, the URI will be
interpreted as I<<<<<< file://URI >>>>>>. To handle a URI, the I<<<<<< match >>>>>> callback will have to return '1'. If that happens, the handling of the URI will
be passed to that callback group. Next, the URI will be passed to the I<<<<<< open >>>>>> callback, which should return a I<<<<<< reference >>>>>> to the data stream if it successfully opened the file, '0' otherwise. If
opening the stream was successful, the I<<<<<< read >>>>>> callback will be called repeatedly until it returns an empty string. After the
read callback, the I<<<<<< close >>>>>> callback will be called to close the stream.
=head3 Organisation of callback groups in XML::LibXML::InputCallback
Callback groups are implemented as a stack (Array), each entry holds a
reference to an array of the callbacks. For the libxml2 library, the
XML::LibXML::InputCallback callback implementation appears as one single
callback group. The Perl implementation however allows to manage different
callback stacks on a per libxml2-parser basis.
=head2 Using XML::LibXML::InputCallback
After object instantiation using the parameter-less constructor, you can
register callback groups.
my $input_callbacks = XML::LibXML::InputCallback->new();
$input_callbacks->register_callbacks([ $match_cb1, $open_cb1,
$read_cb1, $close_cb1 ] );
$input_callbacks->register_callbacks([ $match_cb2, $open_cb2,
$read_cb2, $close_cb2 ] );
$input_callbacks->register_callbacks( [ $match_cb3, $open_cb3,
$read_cb3, $close_cb3 ] );
$parser->input_callbacks( $input_callbacks );
$parser->parse_file( $some_xml_file );
=head2 What about the old callback system prior to XML::LibXML::InputCallback?
In XML::LibXML versions prior to 1.59 - i.e. without the
XML::LibXML::InputCallback module - you could define your callbacks either
using globally or locally. You still can do that using
XML::LibXML::InputCallback, and in addition to that you can define the
callbacks on a per parser basis!
If you use the old callback interface through global callbacks,
XML::LibXML::InputCallback will treat them with a lower priority as the ones
registered using the new interface. The global callbacks will not override the
callback groups registered using the new interface. Local callbacks are
attached to a specific parser instance, therefore they are treated with highest
priority. If the I<<<<<< match >>>>>> callback of the callback group registered as local variable is identical to one
of the callback groups registered using the new interface, that callback group
will be replaced.
Users of the old callback implementation whose I<<<<<< open >>>>>> callback returned a plain string, will have to adapt their code to return a
reference to that string after upgrading to version >= 1.59. The new callback
system can only deal with the I<<<<<< open >>>>>> callback returning a reference!
=head1 INTERFACE DESCRIPTION
=head2 Global Variables
=over 4
=item $_CUR_CB
Stores the current callback and can be used as shortcut to access the callback
stack.
=item @_GLOBAL_CALLBACKS
Stores all callback groups for the current parser process.
=item @_CB_STACK
Stores the currently used callback group. Used to prevent parser errors when
dealing with nested XML data.
=back
=head2 Global Callbacks
=over 4
=item _callback_match
Implements the interface for the I<<<<<< match >>>>>> callback at C-level and for the selection of the callback group from the
callbacks defined at the Perl-level.
=item _callback_open
Forwards the I<<<<<< open >>>>>> callback from libxml2 to the corresponding callback function at the Perl-level.
=item _callback_read
Forwards the read request to the corresponding callback function at the
Perl-level and returns the result to libxml2.
=item _callback_close
Forwards the I<<<<<< close >>>>>> callback from libxml2 to the corresponding callback function at the
Perl-level..
=back
=head2 Class methods
=over 4
=item new()
A simple constructor.
=item register_callbacks( [ $match_cb, $open_cb, $read_cb, $close_cb ])
The four callbacks I<<<<<< have >>>>>> to be given as array reference in the above order I<<<<<< match >>>>>>, I<<<<<< open >>>>>>, I<<<<<< read >>>>>>, I<<<<<< close >>>>>>!
=item unregister_callbacks( [ $match_cb, $open_cb, $read_cb, $close_cb ])
With no arguments given, C<<<<<< unregister_callbacks() >>>>>> will delete the last registered callback group from the stack. If four
callbacks are passed as array reference, the callback group to unregister will
be identified by the I<<<<<< match >>>>>> callback and deleted from the callback stack. Note that if several identical I<<<<<< match >>>>>> callbacks are defined in different callback groups, ALL of them will be deleted
from the stack.
=item init_callbacks()
Initializes the callback system before a parsing process.
=item cleanup_callbacks()
Resets global variables and the libxml2 callback stack.
=item lib_init_callbacks()
Used internally for callback registration at C-level.
=item lib_cleanup_callbacks()
Used internally for callback resetting at the C-level.
=back
=head1 EXAMPLE CALLBACKS
The following example is a purely fictitious example that uses a
MyScheme::Handler object that responds to methods similar to an IO::Handle.
# Define the four callback functions
sub match_uri {
my $uri = shift;
return $uri =~ /^myscheme:/; # trigger our callback group at a 'myscheme' URIs
}
sub open_uri {
my $uri = shift;
my $handler = MyScheme::Handler->new($uri);
return $handler;
}
# The returned $buffer will be parsed by the libxml2 parser
sub read_uri {
my $handler = shift;
my $length = shift;
my $buffer;
read($handler, $buffer, $length);
return $buffer; # $buffer will be an empty string '' if read() is done
}
# Close the handle associated with the resource.
sub close_uri {
my $handler = shift;
close($handler);
}
# Register them with a instance of XML::LibXML::InputCallback
my $input_callbacks = XML::LibXML::InputCallback->new();
$input_callbacks->register_callbacks([ \&match_uri, \&open_uri,
\&read_uri, \&close_uri ] );
# Register the callback group at a parser instance
$parser->input_callbacks( $input_callbacks );
# $some_xml_file will be parsed using our callbacks
$parser->parse_file( $some_xml_file );
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[ LibXML/SAX/Parser.pmnu W+A # $Id: Parser.pm 785 2009-07-16 14:17:46Z pajas $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX::Parser;
use strict;
use vars qw($VERSION @ISA);
use XML::LibXML;
use XML::LibXML::Common qw(:libxml);
use XML::SAX::Base;
use XML::SAX::DocumentLocator;
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
@ISA = ('XML::SAX::Base');
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
sub _parse_characterstream {
my ($self, $fh, $options) = @_;
die "parsing a characterstream is not supported at this time";
}
sub _parse_bytestream {
my ($self, $fh, $options) = @_;
my $parser = XML::LibXML->new();
my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_fh($fh, $options->{Source}{SystemId}) : $parser->parse_fh($fh);
$self->generate($doc);
}
sub _parse_string {
my ($self, $str, $options) = @_;
my $parser = XML::LibXML->new();
my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_string($str, $options->{Source}{SystemId}) : $parser->parse_string($str);
$self->generate($doc);
}
sub _parse_systemid {
my ($self, $sysid, $options) = @_;
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file($sysid);
$self->generate($doc);
}
sub generate {
my $self = shift;
my ($node) = @_;
my $doc = $node->ownerDocument();
{
# precompute some DocumentLocator values
my %locator = (
PublicId => undef,
SystemId => undef,
Encoding => undef,
XMLVersion => undef,
);
my $dtd = defined $doc ? $doc->externalSubset() : undef;
if (defined $dtd) {
$locator{PublicId} = $dtd->publicId();
$locator{SystemId} = $dtd->systemId();
}
if (defined $doc) {
$locator{Encoding} = $doc->encoding();
$locator{XMLVersion} = $doc->version();
}
$self->set_document_locator(
XML::SAX::DocumentLocator->new(
sub { $locator{PublicId} },
sub { $locator{SystemId} },
sub { defined($self->{current_node}) ? $self->{current_node}->line_number() : undef },
sub { 1 },
sub { $locator{Encoding} },
sub { $locator{XMLVersion} },
),
);
}
if ( $node->nodeType() == XML_DOCUMENT_NODE
|| $node->nodeType == XML_HTML_DOCUMENT_NODE ) {
$self->start_document({});
$self->xml_decl({Version => $node->getVersion, Encoding => $node->getEncoding});
$self->process_node($node);
$self->end_document({});
}
}
sub process_node {
my ($self, $node) = @_;
local $self->{current_node} = $node;
my $node_type = $node->nodeType();
if ($node_type == XML_COMMENT_NODE) {
$self->comment( { Data => $node->getData } );
}
elsif ($node_type == XML_TEXT_NODE
|| $node_type == XML_CDATA_SECTION_NODE) {
# warn($node->getData . "\n");
$self->characters( { Data => $node->nodeValue } );
}
elsif ($node_type == XML_ELEMENT_NODE) {
# warn("<" . $node->getName . ">\n");
$self->process_element($node);
# warn("" . $node->getName . ">\n");
}
elsif ($node_type == XML_ENTITY_REF_NODE) {
foreach my $kid ($node->childNodes) {
# warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n");
$self->process_node($kid);
}
}
elsif ($node_type == XML_DOCUMENT_NODE
|| $node_type == XML_HTML_DOCUMENT_NODE
|| $node_type == XML_DOCUMENT_FRAG_NODE) {
# some times it is just usefull to generate SAX events from
# a document fragment (very good with filters).
foreach my $kid ($node->childNodes) {
$self->process_node($kid);
}
}
elsif ($node_type == XML_PI_NODE) {
$self->processing_instruction( { Target => $node->getName, Data => $node->getData } );
}
elsif ($node_type == XML_COMMENT_NODE) {
$self->comment( { Data => $node->getData } );
}
elsif ( $node_type == XML_XINCLUDE_START
|| $node_type == XML_XINCLUDE_END ) {
# ignore!
# i may want to handle this one day, dunno yet
}
elsif ($node_type == XML_DTD_NODE ) {
# ignore!
# i will support DTDs, but had no time yet.
}
else {
# warn("unsupported node type: $node_type");
}
}
sub process_element {
my ($self, $element) = @_;
my $attribs = {};
my @ns_maps = $element->getNamespaces;
foreach my $ns (@ns_maps) {
$self->start_prefix_mapping(
{
NamespaceURI => $ns->href,
Prefix => ( defined $ns->localname ? $ns->localname : ''),
}
);
}
foreach my $attr ($element->attributes) {
my $key;
# warn("Attr: $attr -> ", $attr->getName, " = ", $attr->getData, "\n");
# this isa dump thing...
if ($attr->isa('XML::LibXML::Namespace')) {
# TODO This needs fixing modulo agreeing on what
# is the right thing to do here.
unless ( defined $attr->name ) {
## It's an atter like "xmlns='foo'"
$attribs->{"{}xmlns"} =
{
Name => "xmlns",
LocalName => "xmlns",
Prefix => "",
Value => $attr->href,
NamespaceURI => "",
};
}
else {
my $prefix = "xmlns";
my $localname = $attr->localname;
my $key = "{http://www.w3.org/2000/xmlns/}";
my $name = "xmlns";
if ( defined $localname ) {
$key .= $localname;
$name.= ":".$localname;
}
$attribs->{$key} =
{
Name => $name,
Value => $attr->href,
NamespaceURI => "http://www.w3.org/2000/xmlns/",
Prefix => $prefix,
LocalName => $localname,
};
}
}
else {
my $ns = $attr->namespaceURI;
$ns = '' unless defined $ns;
$key = "{$ns}".$attr->localname;
## Not sure why, but $attr->name is coming through stripped
## of its prefix, so we need to hand-assemble a real name.
my $name = $attr->name;
$name = "" unless defined $name;
my $prefix = $attr->prefix;
$prefix = "" unless defined $prefix;
$name = "$prefix:$name"
if index( $name, ":" ) < 0 && length $prefix;
$attribs->{$key} =
{
Name => $name,
Value => $attr->value,
NamespaceURI => $ns,
Prefix => $prefix,
LocalName => $attr->localname,
};
}
# use Data::Dumper;
# warn("Attr made: ", Dumper($attribs->{$key}), "\n");
}
my $node = {
Name => $element->nodeName,
Attributes => $attribs,
NamespaceURI => $element->namespaceURI,
Prefix => $element->prefix || "",
LocalName => $element->localname,
};
$self->start_element($node);
foreach my $child ($element->childNodes) {
$self->process_node($child);
}
my $end_node = { %$node };
delete $end_node->{Attributes};
$self->end_element($end_node);
foreach my $ns (@ns_maps) {
$self->end_prefix_mapping(
{
NamespaceURI => $ns->href,
Prefix => ( defined $ns->localname ? $ns->localname : ''),
}
);
}
}
1;
__END__
PK @[t LibXML/SAX/Builder.pmnu W+A # $Id: Builder.pm 785 2009-07-16 14:17:46Z pajas $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX::Builder;
use XML::LibXML;
use XML::NamespaceSupport;
use vars qw ($VERSION);
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
sub new {
my $class = shift;
return bless {@_}, $class;
}
sub result { $_[0]->{LAST_DOM}; }
sub done {
my ($self) = @_;
my $dom = $self->{DOM};
$dom = $self->{Parent} unless defined $dom; # this is for parsing document chunks
delete $self->{NamespaceStack};
delete $self->{Parent};
delete $self->{DOM};
$self->{LAST_DOM} = $dom;
return $dom;
}
sub set_document_locator {
}
sub start_dtd {
my ($self, $dtd) = @_;
if (defined $dtd->{Name} and
(defined $dtd->{SystemId} or defined $dtd->{PublicId})) {
$self->{DOM}->createExternalSubset($dtd->{Name},$dtd->{PublicId},$dtd->{SystemId});
}
}
sub end_dtd {
}
sub start_document {
my ($self, $doc) = @_;
$self->{DOM} = XML::LibXML::Document->createDocument();
if ( defined $self->{Encoding} ) {
$self->xml_decl({Version => ($self->{Version} || '1.0') , Encoding => $self->{Encoding}});
}
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
$self->{Parent} = undef;
return ();
}
sub xml_decl {
my $self = shift;
my $decl = shift;
if ( defined $decl->{Version} ) {
$self->{DOM}->setVersion( $decl->{Version} );
}
if ( defined $decl->{Encoding} ) {
$self->{DOM}->setEncoding( $decl->{Encoding} );
}
return ();
}
sub end_document {
my ($self, $doc) = @_;
my $d = $self->done();
return $d;
}
sub start_prefix_mapping {
my $self = shift;
my $ns = shift;
unless ( defined $self->{DOM} or defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
$self->{USENAMESPACESTACK} = 1;
$self->{NamespaceStack}->declare_prefix( $ns->{Prefix}, $ns->{NamespaceURI} );
return ();
}
sub end_prefix_mapping {
my $self = shift;
my $ns = shift;
$self->{NamespaceStack}->undeclare_prefix( $ns->{Prefix} );
return ();
}
sub start_element {
my ($self, $el) = @_;
my $node;
unless ( defined $self->{DOM} or defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
if ( defined $self->{Parent} ) {
$el->{NamespaceURI} ||= "";
$node = $self->{Parent}->addNewChild( $el->{NamespaceURI},
$el->{Name} );
}
else {
if ($el->{NamespaceURI}) {
if ( defined $self->{DOM} ) {
$node = $self->{DOM}->createRawElementNS($el->{NamespaceURI},
$el->{Name});
}
else {
$node = XML::LibXML::Element->new( $el->{Name} );
$node->setNamespace( $el->{NamespaceURI},
$el->{Prefix} , 1 );
}
}
else {
if ( defined $self->{DOM} ) {
$node = $self->{DOM}->createRawElement($el->{Name});
}
else {
$node = XML::LibXML::Element->new( $el->{Name} );
}
}
$self->{DOM}->setDocumentElement($node);
}
# build namespaces
my $skip_ns= 0;
foreach my $p ( $self->{NamespaceStack}->get_declared_prefixes() ) {
$skip_ns= 1;
my $uri = $self->{NamespaceStack}->get_uri($p);
my $nodeflag = 0;
if ( defined $uri
and defined $el->{NamespaceURI}
and $uri eq $el->{NamespaceURI} ) {
# $nodeflag = 1;
next;
}
$node->setNamespace($uri, $p, 0 );
}
$self->{Parent} = $node;
$self->{NamespaceStack}->push_context;
# do attributes
foreach my $key (keys %{$el->{Attributes}}) {
my $attr = $el->{Attributes}->{$key};
if (ref($attr)) {
# catch broken name/value pairs
next unless $attr->{Name} ;
next if $self->{USENAMESPACESTACK}
and ( $attr->{Name} eq "xmlns"
or ( defined $attr->{Prefix}
and $attr->{Prefix} eq "xmlns" ) );
if ( defined $attr->{Prefix}
and $attr->{Prefix} eq "xmlns" and $skip_ns == 0 ) {
# ok, the generator does not set namespaces correctly!
my $uri = $attr->{Value};
$node->setNamespace($uri,
$attr->{Localname},
$uri eq $el->{NamespaceURI} ? 1 : 0 );
}
else {
$node->setAttributeNS($attr->{NamespaceURI} || "",
$attr->{Name}, $attr->{Value});
}
}
else {
$node->setAttribute($key => $attr);
}
}
return ();
}
sub end_element {
my ($self, $el) = @_;
return unless $self->{Parent};
$self->{NamespaceStack}->pop_context;
$self->{Parent} = $self->{Parent}->parentNode();
return ();
}
sub start_cdata {
my $self = shift;
$self->{IN_CDATA} = 1;
return ();
}
sub end_cdata {
my $self = shift;
$self->{IN_CDATA} = 0;
return ();
}
sub characters {
my ($self, $chars) = @_;
if ( not defined $self->{DOM} and not defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
return unless $self->{Parent};
my $node;
unless ( defined $chars and defined $chars->{Data} ) {
return;
}
if ( defined $self->{DOM} ) {
if ( defined $self->{IN_CDATA} and $self->{IN_CDATA} == 1 ) {
$node = $self->{DOM}->createCDATASection($chars->{Data});
}
else {
$node = $self->{Parent}->appendText($chars->{Data});
return;
}
}
elsif ( defined $self->{IN_CDATA} and $self->{IN_CDATA} == 1 ) {
$node = XML::LibXML::CDATASection->new($chars->{Data});
}
else {
$node = XML::LibXML::Text->new($chars->{Data});
}
$self->{Parent}->addChild($node);
return ();
}
sub comment {
my ($self, $chars) = @_;
my $comment;
if ( not defined $self->{DOM} and not defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
unless ( defined $chars and defined $chars->{Data} ) {
return;
}
if ( defined $self->{DOM} ) {
$comment = $self->{DOM}->createComment( $chars->{Data} );
}
else {
$comment = XML::LibXML::Comment->new( $chars->{Data} );
}
if ( defined $self->{Parent} ) {
$self->{Parent}->addChild($comment);
}
else {
$self->{DOM}->addChild($comment);
}
return ();
}
sub processing_instruction {
my ( $self, $pi ) = @_;
my $PI;
return unless defined $self->{DOM};
$PI = $self->{DOM}->createPI( $pi->{Target}, $pi->{Data} );
if ( defined $self->{Parent} ) {
$self->{Parent}->addChild( $PI );
}
else {
$self->{DOM}->addChild( $PI );
}
return ();
}
sub warning {
my $self = shift;
my $error = shift;
# fill $@ but do not die seriously
eval { $error->throw; };
}
sub error {
my $self = shift;
my $error = shift;
delete $self->{NamespaceStack};
delete $self->{Parent};
delete $self->{DOM};
$error->throw;
}
sub fatal_error {
my $self = shift;
my $error = shift;
delete $self->{NamespaceStack};
delete $self->{Parent};
delete $self->{DOM};
$error->throw;
}
1;
__END__
PK @[q LibXML/SAX/Generator.pmnu W+A # $Id: Generator.pm 772 2009-01-23 21:42:09Z pajas
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX::Generator;
use strict;
use XML::LibXML;
use vars qw ($VERSION);
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
warn("This class (", __PACKAGE__, ") is deprecated!");
sub new {
my $class = shift;
unshift @_, 'Handler' unless @_ != 1;
my %p = @_;
return bless \%p, $class;
}
sub generate {
my $self = shift;
my ($node) = @_;
my $document = { Parent => undef };
$self->{Handler}->start_document($document);
process_node($self->{Handler}, $node);
$self->{Handler}->end_document($document);
}
sub process_node {
my ($handler, $node) = @_;
my $node_type = $node->getType();
if ($node_type == XML_COMMENT_NODE) {
$handler->comment( { Data => $node->getData } );
}
elsif ($node_type == XML_TEXT_NODE || $node_type == XML_CDATA_SECTION_NODE) {
# warn($node->getData . "\n");
$handler->characters( { Data => $node->getData } );
}
elsif ($node_type == XML_ELEMENT_NODE) {
# warn("<" . $node->getName . ">\n");
process_element($handler, $node);
# warn("" . $node->getName . ">\n");
}
elsif ($node_type == XML_ENTITY_REF_NODE) {
foreach my $kid ($node->getChildnodes) {
# warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n");
process_node($handler, $kid);
}
}
elsif ($node_type == XML_DOCUMENT_NODE) {
# just get root element. Ignore other cruft.
foreach my $kid ($node->getChildnodes) {
if ($kid->getType() == XML_ELEMENT_NODE) {
process_element($handler, $kid);
last;
}
}
}
else {
warn("unknown node type: $node_type");
}
}
sub process_element {
my ($handler, $element) = @_;
my @attr;
foreach my $attr ($element->getAttributes) {
push @attr, XML::LibXML::SAX::AttributeNode->new(
Name => $attr->getName,
Value => $attr->getData,
NamespaceURI => $attr->getNamespaceURI,
Prefix => $attr->getPrefix,
LocalName => $attr->getLocalName,
);
}
my $node = {
Name => $element->getName,
Attributes => { map { $_->{Name} => $_ } @attr },
NamespaceURI => $element->getNamespaceURI,
Prefix => $element->getPrefix,
LocalName => $element->getLocalName,
};
$handler->start_element($node);
foreach my $child ($element->getChildnodes) {
process_node($handler, $child);
}
$handler->end_element($node);
}
package XML::LibXML::SAX::AttributeNode;
use overload '""' => "stringify";
sub new {
my $class = shift;
my %p = @_;
return bless \%p, $class;
}
sub stringify {
my $self = shift;
return $self->{Value};
}
1;
__END__
=head1 NAME
XML::LibXML::SAX::Generator - Generate SAX events from a LibXML tree
=head1 SYNOPSIS
my $handler = MySAXHandler->new();
my $generator = XML::LibXML::SAX::Generator->new(Handler => $handler);
my $dom = XML::LibXML->new->parse_file("foo.xml");
$generator->generate($dom);
=head1 DESCRIPTION
THIS CLASS IS DEPRACED! Use XML::LibXML::SAX::Parser instead!
This helper class allows you to generate SAX events from any XML::LibXML
node, and all it's sub-nodes. This basically gives you interop from
XML::LibXML to other modules that may implement SAX.
It uses SAX2 style, but should be compatible with anything SAX1, by use
of stringification overloading.
There is nothing to really know about, beyond the synopsis above, and
a general knowledge of how to use SAX, which is beyond the scope here.
=cut
PK @[SYP P LibXML/SAX/Builder.podnu W+A =head1 NAME
XML::LibXML::SAX::Builder - Building DOM trees from SAX events.
=head1 SYNOPSIS
use XML::LibXML::SAX::Builder;
my $builder = XML::LibXML::SAX::Builder->new();
my $gen = XML::Generator::DBI->new(Handler => $builder, dbh => $dbh);
$gen->execute("SELECT * FROM Users");
my $doc = $builder->result();
=head1 DESCRIPTION
This is a SAX handler that generates a DOM tree from SAX events. Usage is as
above. Input is accepted from any SAX1 or SAX2 event generator.
Building DOM trees from SAX events is quite easy with
XML::LibXML::SAX::Builder. The class is designed as a SAX2 final handler not as
a filter!
Since SAX is strictly stream oriented, you should not expect anything to return
from a generator. Instead you have to ask the builder instance directly to get
the document built. XML::LibXML::SAX::Builder's result() function holds the
document generated from the last SAX stream.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[\ LibXML/Reader.pmnu W+A # $Id: Reader.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Reader;
use XML::LibXML;
use Carp;
use strict;
use warnings;
use vars qw ($VERSION);
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
use 5.008_000;
BEGIN {
UNIVERSAL::can('XML::LibXML::Reader','_newForFile') or
croak("Cannot use XML::LibXML::Reader module - ".
"your libxml2 is compiled without reader support!");
}
use base qw(Exporter);
use constant {
XML_READER_TYPE_NONE => 0,
XML_READER_TYPE_ELEMENT => 1,
XML_READER_TYPE_ATTRIBUTE => 2,
XML_READER_TYPE_TEXT => 3,
XML_READER_TYPE_CDATA => 4,
XML_READER_TYPE_ENTITY_REFERENCE => 5,
XML_READER_TYPE_ENTITY => 6,
XML_READER_TYPE_PROCESSING_INSTRUCTION => 7,
XML_READER_TYPE_COMMENT => 8,
XML_READER_TYPE_DOCUMENT => 9,
XML_READER_TYPE_DOCUMENT_TYPE => 10,
XML_READER_TYPE_DOCUMENT_FRAGMENT => 11,
XML_READER_TYPE_NOTATION => 12,
XML_READER_TYPE_WHITESPACE => 13,
XML_READER_TYPE_SIGNIFICANT_WHITESPACE => 14,
XML_READER_TYPE_END_ELEMENT => 15,
XML_READER_TYPE_END_ENTITY => 16,
XML_READER_TYPE_XML_DECLARATION => 17,
XML_READER_NONE => -1,
XML_READER_START => 0,
XML_READER_ELEMENT => 1,
XML_READER_END => 2,
XML_READER_EMPTY => 3,
XML_READER_BACKTRACK => 4,
XML_READER_DONE => 5,
XML_READER_ERROR => 6
};
use vars qw( @EXPORT @EXPORT_OK %EXPORT_TAGS );
sub CLONE_SKIP { 1 }
BEGIN {
%EXPORT_TAGS = (
types =>
[qw(
XML_READER_TYPE_NONE
XML_READER_TYPE_ELEMENT
XML_READER_TYPE_ATTRIBUTE
XML_READER_TYPE_TEXT
XML_READER_TYPE_CDATA
XML_READER_TYPE_ENTITY_REFERENCE
XML_READER_TYPE_ENTITY
XML_READER_TYPE_PROCESSING_INSTRUCTION
XML_READER_TYPE_COMMENT
XML_READER_TYPE_DOCUMENT
XML_READER_TYPE_DOCUMENT_TYPE
XML_READER_TYPE_DOCUMENT_FRAGMENT
XML_READER_TYPE_NOTATION
XML_READER_TYPE_WHITESPACE
XML_READER_TYPE_SIGNIFICANT_WHITESPACE
XML_READER_TYPE_END_ELEMENT
XML_READER_TYPE_END_ENTITY
XML_READER_TYPE_XML_DECLARATION
)],
states =>
[qw(
XML_READER_NONE
XML_READER_START
XML_READER_ELEMENT
XML_READER_END
XML_READER_EMPTY
XML_READER_BACKTRACK
XML_READER_DONE
XML_READER_ERROR
)]
);
@EXPORT = (@{$EXPORT_TAGS{types}},@{$EXPORT_TAGS{states}});
@EXPORT_OK = @EXPORT;
$EXPORT_TAGS{all}=\@EXPORT_OK;
}
{
my %props = (
load_ext_dtd => 1, # load the external subset
complete_attributes => 2, # default DTD attributes
validation => 3, # validate with the DTD
expand_entities => 4, # substitute entities
);
sub getParserProp {
my ($self, $name) = @_;
my $prop = $props{$name};
return undef unless defined $prop;
return $self->_getParserProp($prop);
}
sub setParserProp {
my $self = shift;
my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_;
my ($key, $value);
while (($key,$value) = each %args) {
my $prop = $props{ $key };
$self->_setParserProp($prop,$value);
}
return;
}
my (%string_pool,%rng_pool,%xsd_pool); # used to preserve data passed to the reader
sub new {
my ($class) = shift;
my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_;
my $encoding = $args{encoding};
my $URI = $args{URI};
$URI="$URI" if defined $URI; # stringify in case it is an URI object
my $options = XML::LibXML->_parser_options(\%args);
my $self = undef;
if ( defined $args{location} ) {
$self = $class->_newForFile( $args{location}, $encoding, $options );
}
elsif ( defined $args{string} ) {
$self = $class->_newForString( $args{string}, $URI, $encoding, $options );
$string_pool{$self} = \$args{string};
}
elsif ( defined $args{IO} ) {
$self = $class->_newForIO( $args{IO}, $URI, $encoding, $options );
}
elsif ( defined $args{DOM} ) {
croak("DOM must be a XML::LibXML::Document node")
unless UNIVERSAL::isa($args{DOM}, 'XML::LibXML::Document');
$self = $class->_newForDOM( $args{DOM} );
}
elsif ( defined $args{FD} ) {
my $fd = fileno($args{FD});
$self = $class->_newForFd( $fd, $URI, $encoding, $options );
}
else {
croak("XML::LibXML::Reader->new: specify location, string, IO, DOM, or FD");
}
if ($args{RelaxNG}) {
if (ref($args{RelaxNG})) {
$rng_pool{$self} = \$args{RelaxNG};
$self->_setRelaxNG($args{RelaxNG});
} else {
$self->_setRelaxNGFile($args{RelaxNG});
}
}
if ($args{Schema}) {
if (ref($args{Schema})) {
$xsd_pool{$self} = \$args{Schema};
$self->_setXSD($args{Schema});
} else {
$self->_setXSDFile($args{Schema});
}
}
return $self;
}
sub DESTROY {
my $self = shift;
delete $string_pool{$self};
delete $rng_pool{$self};
delete $xsd_pool{$self};
$self->_DESTROY;
}
}
sub close {
my ($reader) = @_;
# _close return -1 on failure, 0 on success
# perl close returns 0 on failure, 1 on success
return $reader->_close == 0 ? 1 : 0;
}
sub preservePattern {
my $reader=shift;
my ($pattern,$ns_map)=@_;
if (ref($ns_map) eq 'HASH') {
# translate prefix=>URL hash to a (URL,prefix) list
$reader->_preservePattern($pattern,[reverse %$ns_map]);
} else {
$reader->_preservePattern(@_);
}
}
sub nodePath {
my $reader=shift;
my $path = $reader->_nodePath;
$path=~s/\[\d+\]//g; # make /foo[1]/bar[1] just /foo/bar, since
# sibling count in the buffered fragment is
# basically random and generally misleading
return $path;
}
1;
__END__
PK @[l3 l3 LibXML/Element.podnu W+A =head1 NAME
XML::LibXML::Element - XML::LibXML Class for Element Nodes
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Element nodes are listed here,
# see XML::LibXML::Node manpage for other methods
$node = XML::LibXML::Element->new( $name );
$node->setAttribute( $aname, $avalue );
$node->setAttributeNS( $nsURI, $aname, $avalue );
$avalue = $node->getAttribute( $aname );
$avalue = $node->setAttributeNS( $nsURI, $aname );
$attrnode = $node->getAttributeNode( $aname );
$attrnode = $node->getAttributeNodeNS( $namespaceURI, $aname );
$node->removeAttribute( $aname );
$node->removeAttributeNS( $nsURI, $aname );
$boolean = $node->hasAttribute( $aname );
$boolean = $node->hasAttributeNS( $nsURI, $aname );
@nodes = $node->getChildrenByTagName($tagname);
@nodes = $node->getChildrenByTagNameNS($nsURI,$tagname);
@nodes = $node->getChildrenByLocalName($localname);
@nodes = $node->getElementsByTagName($tagname);
@nodes = $node->getElementsByTagNameNS($nsURI,$localname);
@nodes = $node->getElementsByLocalName($localname);
$node->appendWellBalancedChunk( $chunk );
$node->appendText( $PCDATA );
$node->appendTextNode( $PCDATA );
$node->appendTextChild( $childname , $PCDATA );
$node->setNamespace( $nsURI , $nsPrefix, $activate );
$node->setNamespaceDeclURI( $nsPrefix, $newURI );
$node->setNamespaceDeclPrefix( $oldPrefix, $newPrefix );
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$node = XML::LibXML::Element->new( $name );
This function creates a new node unbound to any DOM.
=item setAttribute
$node->setAttribute( $aname, $avalue );
This method sets or replaces the node's attribute C<<<<<< $aname >>>>>> to the value C<<<<<< $avalue >>>>>>
=item setAttributeNS
$node->setAttributeNS( $nsURI, $aname, $avalue );
Namespace-aware version of C<<<<<< setAttribute >>>>>>, where C<<<<<< $nsURI >>>>>> is a namespace URI, C<<<<<< $aname >>>>>> is a qualified name, and C<<<<<< $avalue >>>>>> is the value. The namespace URI may be null (empty or undefined) in order to
create an attribute which has no namespace.
The current implementation differs from DOM in the following aspects
If an attribute with the same local name and namespace URI already exists on
the element, but its prefix differs from the prefix of C<<<<<< $aname >>>>>>, then this function is supposed to change the prefix (regardless of namespace
declarations and possible collisions). However, the current implementation does
rather the opposite. If a prefix is declared for the namespace URI in the scope
of the attribute, then the already declared prefix is used, disregarding the
prefix specified in C<<<<<< $aname >>>>>>. If no prefix is declared for the namespace, the function tries to declare the
prefix specified in C<<<<<< $aname >>>>>> and dies if the prefix is already taken by some other namespace.
According to DOM Level 2 specification, this method can also be used to create
or modify special attributes used for declaring XML namespaces (which belong to
the namespace "http://www.w3.org/2000/xmlns/" and have prefix or name "xmlns").
This should work since version 1.61, but again the implementation differs from
DOM specification in the following: if a declaration of the same namespace
prefix already exists on the element, then changing its value via this method
automatically changes the namespace of all elements and attributes in its
scope. This is because in libxml2 the namespace URI of an element is not static
but is computed from a pointer to a namespace declaration attribute.
=item getAttribute
$avalue = $node->getAttribute( $aname );
If C<<<<<< $node >>>>>> has an attribute with the name C<<<<<< $aname >>>>>>, the value of this attribute will get returned.
=item getAttributeNS
$avalue = $node->setAttributeNS( $nsURI, $aname );
Retrieves an attribute value by local name and namespace URI.
=item getAttributeNode
$attrnode = $node->getAttributeNode( $aname );
Retrieve an attribute node by name. If no attribute with a given name exists, C<<<<<< undef >>>>>> is returned.
=item getAttributeNodeNS
$attrnode = $node->getAttributeNodeNS( $namespaceURI, $aname );
Retrieves an attribute node by local name and namespace URI. If no attribute
with a given localname and namespace exists, C<<<<<< undef >>>>>> is returned.
=item removeAttribute
$node->removeAttribute( $aname );
The method removes the attribute C<<<<<< $aname >>>>>> from the node's attribute list, if the attribute can be found.
=item removeAttributeNS
$node->removeAttributeNS( $nsURI, $aname );
Namespace version of C<<<<<< removeAttribute >>>>>>
=item hasAttribute
$boolean = $node->hasAttribute( $aname );
This function tests if the named attribute is set for the node. If the
attribute is specified, TRUE (1) will be returned, otherwise the return value
is FALSE (0).
=item hasAttributeNS
$boolean = $node->hasAttributeNS( $nsURI, $aname );
namespace version of C<<<<<< hasAttribute >>>>>>
=item getChildrenByTagName
@nodes = $node->getChildrenByTagName($tagname);
The function gives direct access to all child elements of the current node with
a given tagname, where tagname is a qualified name, that is, in case of
namespace usage it may consist of a prefix and local name. This function makes
things a lot easier if one needs to handle big data sets. A special tagname '*'
can be used to match any name.
If this function is called in SCALAR context, it returns the number of elements
found.
=item getChildrenByTagNameNS
@nodes = $node->getChildrenByTagNameNS($nsURI,$tagname);
Namespace version of C<<<<<< getChildrenByTagName >>>>>>. A special nsURI '*' matches any namespace URI, in which case the function
behaves just like C<<<<<< getChildrenByLocalName >>>>>>.
If this function is called in SCALAR context, it returns the number of elements
found.
=item getChildrenByLocalName
@nodes = $node->getChildrenByLocalName($localname);
The function gives direct access to all child elements of the current node with
a given local name. It makes things a lot easier if one needs to handle big
data sets. A special C<<<<<< localname >>>>>> '*' can be used to match any local name.
If this function is called in SCALAR context, it returns the number of elements
found.
=item getElementsByTagName
@nodes = $node->getElementsByTagName($tagname);
This function is part of the spec. It fetches all descendants of a node with a
given tagname, where C<<<<<< tagname >>>>>> is a qualified name, that is, in case of namespace usage it may consist of a
prefix and local name. A special C<<<<<< tagname >>>>>> '*' can be used to match any tag name.
In SCALAR context this function returns a L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByTagNameNS
@nodes = $node->getElementsByTagNameNS($nsURI,$localname);
Namespace version of C<<<<<< getElementsByTagName >>>>>> as found in the DOM spec. A special C<<<<<< localname >>>>>> '*' can be used to match any local name and C<<<<<< nsURI >>>>>> '*' can be used to match any namespace URI.
In SCALAR context this function returns a L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByLocalName
@nodes = $node->getElementsByLocalName($localname);
This function is not found in the DOM specification. It is a mix of
getElementsByTagName and getElementsByTagNameNS. It will fetch all tags
matching the given local-name. This allows one to select tags with the same
local name across namespace borders.
In SCALAR context this function returns a L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item appendWellBalancedChunk
$node->appendWellBalancedChunk( $chunk );
Sometimes it is necessary to append a string coded XML Tree to a node. I<<<<<< appendWellBalancedChunk >>>>>> will do the trick for you. But this is only done if the String is C<<<<<< well-balanced >>>>>>.
I<<<<<< Note that appendWellBalancedChunk() is only left for compatibility reasons >>>>>>. Implicitly it uses
my $fragment = $parser->parse_xml_chunk( $chunk );
$node->appendChild( $fragment );
This form is more explicit and makes it easier to control the flow of a script.
=item appendText
$node->appendText( $PCDATA );
alias for appendTextNode().
=item appendTextNode
$node->appendTextNode( $PCDATA );
This wrapper function lets you add a string directly to an element node.
=item appendTextChild
$node->appendTextChild( $childname , $PCDATA );
Somewhat similar with C<<<<<< appendTextNode >>>>>>: It lets you set an Element, that contains only a C<<<<<< text node >>>>>> directly by specifying the name and the text content.
=item setNamespace
$node->setNamespace( $nsURI , $nsPrefix, $activate );
setNamespace() allows one to apply a namespace to an element. The function
takes three parameters: 1. the namespace URI, which is required and the two
optional values prefix, which is the namespace prefix, as it should be used in
child elements or attributes as well as the additional activate parameter. If
prefix is not given, undefined or empty, this function tries to create a
declaration of the default namespace.
The activate parameter is most useful: If this parameter is set to FALSE (0), a
new namespace declaration is simply added to the element while the element's
namespace itself is not altered. Nevertheless, activate is set to TRUE (1) on
default. In this case the namespace is used as the node's effective namespace.
This means the namespace prefix is added to the node name and if there was a
namespace already active for the node, it will be replaced (but its declaration
is not removed from the document). A new namespace declaration is only created
if necessary (that is, if the element is already in the scope of a namespace
declaration associating the prefix with the namespace URI, then this
declaration is reused).
The following example may clarify this:
my $e1 = $doc->createElement("bar");
$e1->setNamespace("http://foobar.org", "foo")
results
while
my $e2 = $doc->createElement("bar");
$e2->setNamespace("http://foobar.org", "foo",0)
results only
By using $activate == 0 it is possible to create multiple namespace
declarations on a single element.
The function fails if it is required to create a declaration associating the
prefix with the namespace URI but the element already carries a declaration
with the same prefix but different namespace URI.
=item setNamespaceDeclURI
$node->setNamespaceDeclURI( $nsPrefix, $newURI );
EXPERIMENTAL IN 1.61 !
This function manipulates directly with an existing namespace declaration on an
element. It takes two parameters: the prefix by which it looks up the namespace
declaration and a new namespace URI which replaces its previous value.
It returns 1 if the namespace declaration was found and changed, 0 otherwise.
All elements and attributes (even those previously unbound from the document)
for which the namespace declaration determines their namespace belong to the
new namespace after the change.
If the new URI is undef or empty, the nodes have no namespace and no prefix
after the change. Namespace declarations once nulled in this way do not further
appear in the serialized output (but do remain in the document for internal
integrity of libxml2 data structures).
This function is NOT part of any DOM API.
=item setNamespaceDeclPrefix
$node->setNamespaceDeclPrefix( $oldPrefix, $newPrefix );
EXPERIMENTAL IN 1.61 !
This function manipulates directly with an existing namespace declaration on an
element. It takes two parameters: the old prefix by which it looks up the
namespace declaration and a new prefix which is to replace the old one.
The function dies with an error if the element is in the scope of another
declaration whose prefix equals to the new prefix, or if the change should
result in a declaration with a non-empty prefix but empty namespace URI.
Otherwise, it returns 1 if the namespace declaration was found and changed and
0 if not found.
All elements and attributes (even those previously unbound from the document)
for which the namespace declaration determines their namespace change their
prefix to the new value.
If the new prefix is undef or empty, the namespace declaration becomes a
declaration of a default namespace. The corresponding nodes drop their
namespace prefix (but remain in the, now default, namespace). In this case the
function fails, if the containing element is in the scope of another default
namespace declaration.
This function is NOT part of any DOM API.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[=a a LibXML/DOM.podnu W+A =head1 NAME
XML::LibXML::DOM - XML::LibXML DOM Implementation
=head1 DESCRIPTION
XML::LibXML provides an light-wight interface to I<<<<<< modify >>>>>> a node of the document tree generated by the XML::LibXML parser. This interface
follows as far as possible the DOM Level 3 specification. Additionally to the
specified functions the XML::LibXML supports some functions that are more handy
to use in the perl environment.
One also has to remember, that XML::LibXML is an interface to libxml2 nodes
which actually reside on the C-Level of XML::LibXML. This means each node is a
reference to a structure different than a perl hash or array. The only way to
access these structure's values is through the DOM interface provided by
XML::LibXML. This also means, that one I<<<<<< can't >>>>>> simply inherit a XML::LibXML node and add new member variables as they were
hash keys.
The DOM interface of XML::LibXML does not intend to implement a full DOM
interface as it is done by XML::GDOME and used for full featured application.
Moreover, it offers an simple way to build or modify documents that are created
by XML::LibXML's parser.
Another target of the XML::LibXML interface is to make the interfaces of
libxml2 available to the perl community. This includes also some workarounds to
some features where libxml2 assumes more control over the C-Level that most
perl users don't have.
One of the most important parts of the XML::LibXML DOM interface is, that the
interfaces try do follow the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>) rather strictly. This means the interface functions are named as the DOM
specification says and not what widespread Java interfaces claim to be
standard. Although there are several functions that have only a singular
interface that conforms to the DOM spec XML::LibXML provides an additional Java
style alias interface.
Also there are some function interfaces left over from early stages of
XML::LibXML for compatibility reasons. These interfaces are for compatibility
reasons I<<<<<< only >>>>>>. They might disappear in one of the future versions of XML::LibXML, so a user
is requested to switch over to the official functions.
=head2 Encodings and XML::LibXML's DOM implementation
See the section on Encodings in the I<<<<<< XML::LibXML >>>>>> manual page.
=head2 Namespaces and XML::LibXML's DOM implementation
XML::LibXML's DOM implementation is limited by the DOM implementation of
libxml2 which treats namespaces slightly differently than required by the DOM
Level 2 specification.
According to the DOM Level 2 specification, namespaces of elements and
attributes should be persistent, and nodes should be permanently bound to
namespace URIs as they get created; it should be possible to manipulate the
special attributes used for declaring XML namespaces just as other attributes
without affecting the namespaces of other nodes. In DOM Level 2, the
application is responsible for creating the special attributes consistently
and/or for correct serialization of the document.
This is both inconvenient, causes problems in serialization of DOM to XML, and
most importantly, seems almost impossible to implement over libxml2.
In libxml2, namespace URI and prefix of a node is provided by a pointer to a
namespace declaration (appearing as a special xmlns attribute in the XML
document). If the prefix or namespace URI of the declaration changes, the
prefix and namespace URI of all nodes that point to it changes as well.
Moreover, in contrast to DOM, a node (element or attribute) can only be bound
to a namespace URI if there is some namespace declaration in the document to
point to.
Therefore current DOM implementation in XML::LibXML tries to treat namespace
declarations in a compromise between reason, common sense, limitations of
libxml2, and the DOM Level 2 specification.
In XML::LibXML, special attributes declaring XML namespaces are often created
automatically, usually when a namespaced node is attached to a document and no
existing declaration of the namespace and prefix is in the scope to be reused.
In this respect, XML::LibXML DOM implementation differs from the DOM Level 2
specification according to which special attributes for declaring the
apropriate XML namespaces should not be added when a node with a namespace
prefix and namespace URI is created.
Namespace declarations are also created when L<<<<<< XML::LibXML::Document >>>>>>'s createElementNS() or createAttributeNS() function are used. If the a
namespace is not declared on the documentElement, the namespace will be locally
declared for the newly created node. In case of Attributes this may look a bit
confusing, since these nodes cannot have namespace declarations itself. In this
case the namespace is internally applied to the attribute and later declared on
the node the attribute is appended to (if required).
The following example may explain this a bit:
my $doc = XML::LibXML->createDocument;
my $root = $doc->createElementNS( "", "foo" );
$doc->setDocumentElement( $root );
my $attr = $doc->createAttributeNS( "bar", "bar:foo", "test" );
$root->setAttributeNodeNS( $attr );
This piece of code will result in the following document:
The namespace is declared on the document element during the
setAttributeNodeNS() call.
Namespaces can be also declared explicitly by the use of XML::LibXML:Element's
setNamespace() function. Since 1.61, they can also be manipulated with
functions setNamespaceDeclPrefix() and setNamespaceDeclURI() (not available in
DOM). Changing an URI or prefix of an existing namespace declaration affects
the namespace URI and prefix of all nodes which point to it (that is the nodes
in its scope).
It is also important to repeat the specification: While working with namespaces
you should use the namespace aware functions instead of the simplified
versions. For example you should I<<<<<< never >>>>>> use setAttribute() but setAttributeNS().
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[:~ LibXML/Error.pmnu W+A # $Id: Error.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Error;
use strict;
use vars qw($AUTOLOAD @error_domains $VERSION $WARNINGS);
use Carp;
use overload
'""' => \&as_string,
'eq' => sub {
("$_[0]" eq "$_[1]")
},
'cmp' => sub {
("$_[0]" cmp "$_[1]")
},
fallback => 1;
$WARNINGS = 0; # 0: supress, 1: report via warn, 2: report via die
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
use constant XML_ERR_NONE => 0;
use constant XML_ERR_WARNING => 1; # A simple warning
use constant XML_ERR_ERROR => 2; # A recoverable error
use constant XML_ERR_FATAL => 3; # A fatal error
use constant XML_ERR_FROM_NONE => 0;
use constant XML_ERR_FROM_PARSER => 1; # The XML parser
use constant XML_ERR_FROM_TREE => 2; # The tree module
use constant XML_ERR_FROM_NAMESPACE => 3; # The XML Namespace module
use constant XML_ERR_FROM_DTD => 4; # The XML DTD validation
use constant XML_ERR_FROM_HTML => 5; # The HTML parser
use constant XML_ERR_FROM_MEMORY => 6; # The memory allocator
use constant XML_ERR_FROM_OUTPUT => 7; # The serialization code
use constant XML_ERR_FROM_IO => 8; # The Input/Output stack
use constant XML_ERR_FROM_FTP => 9; # The FTP module
use constant XML_ERR_FROM_HTTP => 10; # The FTP module
use constant XML_ERR_FROM_XINCLUDE => 11; # The XInclude processing
use constant XML_ERR_FROM_XPATH => 12; # The XPath module
use constant XML_ERR_FROM_XPOINTER => 13; # The XPointer module
use constant XML_ERR_FROM_REGEXP => 14; # The regular expressions module
use constant XML_ERR_FROM_DATATYPE => 15; # The W3C XML Schemas Datatype module
use constant XML_ERR_FROM_SCHEMASP => 16; # The W3C XML Schemas parser module
use constant XML_ERR_FROM_SCHEMASV => 17; # The W3C XML Schemas validation module
use constant XML_ERR_FROM_RELAXNGP => 18; # The Relax-NG parser module
use constant XML_ERR_FROM_RELAXNGV => 19; # The Relax-NG validator module
use constant XML_ERR_FROM_CATALOG => 20; # The Catalog module
use constant XML_ERR_FROM_C14N => 21; # The Canonicalization module
use constant XML_ERR_FROM_XSLT => 22; # The XSLT engine from libxslt
use constant XML_ERR_FROM_VALID => 23; # The validaton module
@error_domains = ("", "parser", "tree", "namespace", "validity",
"HTML parser", "memory", "output", "I/O", "ftp",
"http", "XInclude", "XPath", "xpointer", "regexp",
"Schemas datatype", "Schemas parser", "Schemas validity",
"Relax-NG parser", "Relax-NG validity",
"Catalog", "C14N", "XSLT", "validity");
{
sub new {
my ($class,$xE) = @_;
my $terr;
if (ref($xE)) {
my ($context,$column) = $xE->context_and_column();
$terr =bless {
domain => $xE->domain(),
level => $xE->level(),
code => $xE->code(),
message => $xE->message(),
file => $xE->file(),
line => $xE->line(),
str1 => $xE->str1(),
str2 => $xE->str2(),
str3 => $xE->str3(),
num1 => $xE->num1(),
num2 => $xE->num2(),
(defined($context) ?
(
context => $context,
column => $column,
) : ()),
}, $class;
} else {
# !!!! problem : got a flat error
# warn("PROBLEM: GOT A FLAT ERROR $xE\n");
$terr =bless {
domain => 0,
level => 2,
code => -1,
message => $xE,
file => undef,
line => undef,
str1 => undef,
str2 => undef,
str3 => undef,
num1 => undef,
num2 => undef,
}, $class;
}
return $terr;
}
sub _callback_error {
#print "CALLBACK\n";
my ($xE,$prev) = @_;
my $terr;
$terr=XML::LibXML::Error->new($xE);
if ($terr->{level} == XML_ERR_WARNING and $WARNINGS!=2) {
warn $terr if $WARNINGS;
return $prev;
}
#unless ( defined $terr->{file} and length $terr->{file} ) {
# this would make it easier to recognize parsed strings
# but it breaks old implementations
# [CG] $terr->{file} = 'string()';
#}
#warn "Saving the error ",$terr->dump;
$terr->{_prev} = ref($prev) ? $prev :
defined($prev) && length($prev) ? XML::LibXML::Error->new($prev) : undef;
return $terr;
}
sub _instant_error_callback {
my $xE = shift;
my $terr= XML::LibXML::Error->new($xE);
print "Reporting an instanteous error ",$terr->dump;
die $terr;
}
sub _report_warning {
my ($saved_error) = @_;
#print "CALLBACK WARN\n";
if ( defined $saved_error ) {
#print "reporting a warning ",$saved_error->dump;
warn $saved_error;
}
}
sub _report_error {
my ($saved_error) = @_;
#print "CALLBACK ERROR: $saved_error\n";
if ( defined $saved_error ) {
die $saved_error;
}
}
}
sub AUTOLOAD {
my $self=shift;
return undef unless ref($self);
my $sub = $AUTOLOAD;
$sub =~ s/.*:://;
if ($sub=~/^(?:code|_prev|level|file|line|domain|nodename|message|column|context|str[123]|num[12])$/) {
return $self->{$sub};
} else {
croak("Unknown error field $sub");
}
}
# backward compatibility
sub int1 { $_[0]->num1 }
sub int2 { $_[0]->num2 }
sub DESTROY {}
sub domain {
my ($self)=@_;
return undef unless ref($self);
return $error_domains[$self->{domain}];
}
sub as_string {
my ($self)=@_;
my $msg = "";
my $level;
if (defined($self->{_prev})) {
$msg = $self->{_prev}->as_string;
}
if ($self->{level} == XML_ERR_NONE) {
$level = "";
} elsif ($self->{level} == XML_ERR_WARNING) {
$level = "warning";
} elsif ($self->{level} == XML_ERR_ERROR ||
$self->{level} == XML_ERR_FATAL) {
$level = "error";
}
my $where="";
if (defined($self->{file})) {
$where="$self->{file}:$self->{line}";
} elsif (($self->{domain} == XML_ERR_FROM_PARSER)
and
$self->{line}) {
$where="Entity: line $self->{line}";
}
if ($self->{nodename}) {
$where.=": element ".$self->{nodename};
}
$msg.=$where.": " if $where ne "";
$msg.=$error_domains[$self->{domain}]." ".$level." :";
my $str=$self->{message}||"";
chomp($str);
$msg.=" ".$str."\n";
if (($self->{domain} == XML_ERR_FROM_XPATH) and
defined($self->{str1})) {
$msg.=$self->{str1}."\n";
$msg.=(" " x $self->{num1})."^\n";
} elsif (defined $self->{context}) {
my $context = $self->{context};
$msg.=$context."\n";
$context = substr($context,0,$self->{column});
$context=~s/[^\t]/ /g;
$msg.=$context."^\n";
}
return $msg;
}
sub dump {
my ($self)=@_;
use Data::Dumper;
return Data::Dumper->new([$self],['error'])->Dump;
}
1;
PK @[M Y
Y
LibXML/SAX.pmnu W+A # $Id: SAX.pm 785 2009-07-16 14:17:46Z pajas $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX;
use strict;
use vars qw($VERSION @ISA);
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
use XML::LibXML;
use XML::SAX::Base;
use base qw(XML::SAX::Base);
use Carp;
use IO::File;
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
sub _parse_characterstream {
my ( $self, $fh ) = @_;
# this my catch the xml decl, so the parser won't get confused about
# a possibly wrong encoding.
croak( "not implemented yet" );
}
sub _parse_bytestream {
my ( $self, $fh ) = @_;
$self->{ParserOptions}{LibParser} = XML::LibXML->new;
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_fh;
$self->{ParserOptions}{ParseFuncParam} = $fh;
$self->_parse;
return $self->end_document({});
}
sub _parse_string {
my ( $self, $string ) = @_;
# $self->{ParserOptions}{LibParser} = XML::LibXML->new;
$self->{ParserOptions}{LibParser} = XML::LibXML->new() unless defined $self->{ParserOptions}{LibParser};
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_string;
$self->{ParserOptions}{ParseFuncParam} = $string;
$self->_parse;
return $self->end_document({});
}
sub _parse_systemid {
my $self = shift;
$self->{ParserOptions}{LibParser} = XML::LibXML->new;
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_file;
$self->{ParserOptions}{ParseFuncParam} = shift;
$self->_parse;
return $self->end_document({});
}
sub parse_chunk {
my ( $self, $chunk ) = @_;
$self->{ParserOptions}{LibParser} = XML::LibXML->new;
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_xml_chunk;
$self->{ParserOptions}{LibParser}->{IS_FILTER}=1; # a hack to prevent parse_xml_chunk from issuing end_document
$self->{ParserOptions}{ParseFuncParam} = $chunk;
$self->_parse;
return;
}
sub _parse {
my $self = shift;
my $args = bless $self->{ParserOptions}, ref($self);
$args->{LibParser}->set_handler( $self );
eval {
$args->{ParseFunc}->($args->{LibParser}, $args->{ParseFuncParam});
};
if ( $args->{LibParser}->{SAX}->{State} == 1 ) {
croak( "SAX Exception not implemented, yet; Data ended before document ended\n" );
}
# break a possible circular reference
$args->{LibParser}->set_handler( undef );
if ( $@ ) {
croak $@;
}
return;
}
1;
PK @[P>b LibXML/Common.pmnu W+A #-------------------------------------------------------------------------#
# $Id: Common.pm,v 1.5 2003/02/27 18:32:59 phish108 Exp $
#
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
#-------------------------------------------------------------------------#
package XML::LibXML::Common;
#-------------------------------------------------------------------------#
# global blur #
#-------------------------------------------------------------------------#
use strict;
require Exporter;
require DynaLoader;
use vars qw( @ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS);
@ISA = qw(Exporter);
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
use XML::LibXML qw(:libxml);
#-------------------------------------------------------------------------#
# export information #
#-------------------------------------------------------------------------#
%EXPORT_TAGS = (
all => [qw(
ELEMENT_NODE
ATTRIBUTE_NODE
TEXT_NODE
CDATA_SECTION_NODE
ENTITY_REFERENCE_NODE
ENTITY_NODE
PI_NODE
PROCESSING_INSTRUCTION_NODE
COMMENT_NODE
DOCUMENT_NODE
DOCUMENT_TYPE_NODE
DOCUMENT_FRAG_NODE
DOCUMENT_FRAGMENT_NODE
NOTATION_NODE
HTML_DOCUMENT_NODE
DTD_NODE
ELEMENT_DECLARATION
ATTRIBUTE_DECLARATION
ENTITY_DECLARATION
NAMESPACE_DECLARATION
XINCLUDE_END
XINCLUDE_START
encodeToUTF8
decodeFromUTF8
)],
w3c => [qw(
ELEMENT_NODE
ATTRIBUTE_NODE
TEXT_NODE
CDATA_SECTION_NODE
ENTITY_REFERENCE_NODE
ENTITY_NODE
PI_NODE
PROCESSING_INSTRUCTION_NODE
COMMENT_NODE
DOCUMENT_NODE
DOCUMENT_TYPE_NODE
DOCUMENT_FRAG_NODE
DOCUMENT_FRAGMENT_NODE
NOTATION_NODE
HTML_DOCUMENT_NODE
DTD_NODE
ELEMENT_DECLARATION
ATTRIBUTE_DECLARATION
ENTITY_DECLARATION
NAMESPACE_DECLARATION
XINCLUDE_END
XINCLUDE_START
)],
libxml => [qw(
XML_ELEMENT_NODE
XML_ATTRIBUTE_NODE
XML_TEXT_NODE
XML_CDATA_SECTION_NODE
XML_ENTITY_REF_NODE
XML_ENTITY_NODE
XML_PI_NODE
XML_COMMENT_NODE
XML_DOCUMENT_NODE
XML_DOCUMENT_TYPE_NODE
XML_DOCUMENT_FRAG_NODE
XML_NOTATION_NODE
XML_HTML_DOCUMENT_NODE
XML_DTD_NODE
XML_ELEMENT_DECL
XML_ATTRIBUTE_DECL
XML_ENTITY_DECL
XML_NAMESPACE_DECL
XML_XINCLUDE_END
XML_XINCLUDE_START
)],
gdome => [qw(
GDOME_ELEMENT_NODE
GDOME_ATTRIBUTE_NODE
GDOME_TEXT_NODE
GDOME_CDATA_SECTION_NODE
GDOME_ENTITY_REF_NODE
GDOME_ENTITY_NODE
GDOME_PI_NODE
GDOME_COMMENT_NODE
GDOME_DOCUMENT_NODE
GDOME_DOCUMENT_TYPE_NODE
GDOME_DOCUMENT_FRAG_NODE
GDOME_NOTATION_NODE
GDOME_HTML_DOCUMENT_NODE
GDOME_DTD_NODE
GDOME_ELEMENT_DECL
GDOME_ATTRIBUTE_DECL
GDOME_ENTITY_DECL
GDOME_NAMESPACE_DECL
GDOME_XINCLUDE_END
GDOME_XINCLUDE_START
)],
encoding => [qw(
encodeToUTF8
decodeFromUTF8
)],
);
@EXPORT_OK = (
@{$EXPORT_TAGS{encoding}},
@{$EXPORT_TAGS{w3c}},
@{$EXPORT_TAGS{libxml}},
@{$EXPORT_TAGS{gdome}},
);
@EXPORT = (
@{$EXPORT_TAGS{encoding}},
@{$EXPORT_TAGS{w3c}},
);
#-------------------------------------------------------------------------#
# W3 conform node types #
#-------------------------------------------------------------------------#
use constant ELEMENT_NODE => 1;
use constant ATTRIBUTE_NODE => 2;
use constant TEXT_NODE => 3;
use constant CDATA_SECTION_NODE => 4;
use constant ENTITY_REFERENCE_NODE => 5;
use constant ENTITY_NODE => 6;
use constant PROCESSING_INSTRUCTION_NODE => 7;
use constant COMMENT_NODE => 8;
use constant DOCUMENT_NODE => 9;
use constant DOCUMENT_TYPE_NODE => 10;
use constant DOCUMENT_FRAGMENT_NODE => 11;
use constant NOTATION_NODE => 12;
use constant HTML_DOCUMENT_NODE => 13;
use constant DTD_NODE => 14;
use constant ELEMENT_DECLARATION => 15;
use constant ATTRIBUTE_DECLARATION => 16;
use constant ENTITY_DECLARATION => 17;
use constant NAMESPACE_DECLARATION => 18;
#-------------------------------------------------------------------------#
# some extras for the W3 spec
#-------------------------------------------------------------------------#
use constant PI_NODE => 7;
use constant DOCUMENT_FRAG_NODE => 11;
use constant XINCLUDE_END => 19;
use constant XINCLUDE_START => 20;
#-------------------------------------------------------------------------#
# libgdome compat names #
#-------------------------------------------------------------------------#
use constant GDOME_ELEMENT_NODE => 1;
use constant GDOME_ATTRIBUTE_NODE => 2;
use constant GDOME_TEXT_NODE => 3;
use constant GDOME_CDATA_SECTION_NODE => 4;
use constant GDOME_ENTITY_REF_NODE => 5;
use constant GDOME_ENTITY_NODE => 6;
use constant GDOME_PI_NODE => 7;
use constant GDOME_COMMENT_NODE => 8;
use constant GDOME_DOCUMENT_NODE => 9;
use constant GDOME_DOCUMENT_TYPE_NODE => 10;
use constant GDOME_DOCUMENT_FRAG_NODE => 11;
use constant GDOME_NOTATION_NODE => 12;
use constant GDOME_HTML_DOCUMENT_NODE => 13;
use constant GDOME_DTD_NODE => 14;
use constant GDOME_ELEMENT_DECL => 15;
use constant GDOME_ATTRIBUTE_DECL => 16;
use constant GDOME_ENTITY_DECL => 17;
use constant GDOME_NAMESPACE_DECL => 18;
use constant GDOME_XINCLUDE_START => 19;
use constant GDOME_XINCLUDE_END => 20;
1;
#-------------------------------------------------------------------------#
__END__
PK @[ceK K LibXML/Literal.pmnu W+A # $Id: Literal.pm 785 2009-07-16 14:17:46Z pajas $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Literal;
use XML::LibXML::Boolean;
use XML::LibXML::Number;
use strict;
use vars qw ($VERSION);
$VERSION = "1.70"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&value,
'cmp' => \&cmp;
sub new {
my $class = shift;
my ($string) = @_;
# $string =~ s/"/"/g;
# $string =~ s/'/'/g;
bless \$string, $class;
}
sub as_string {
my $self = shift;
my $string = $$self;
$string =~ s/'/'/g;
return "'$string'";
}
sub as_xml {
my $self = shift;
my $string = $$self;
return "$string\n";
}
sub value {
my $self = shift;
$$self;
}
sub cmp {
my $self = shift;
my ($cmp, $swap) = @_;
if ($swap) {
return $cmp cmp $$self;
}
return $$self cmp $cmp;
}
sub evaluate {
my $self = shift;
$self;
}
sub to_boolean {
my $self = shift;
return (length($$self) > 0) ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False;
}
sub to_number { return XML::LibXML::Number->new($_[0]->value); }
sub to_literal { return $_[0]; }
sub string_value { return $_[0]->value; }
1;
__END__
=head1 NAME
XML::LibXML::Literal - Simple string values.
=head1 DESCRIPTION
In XPath terms a Literal is what we know as a string.
=head1 API
=head2 new($string)
Create a new Literal object with the value in $string. Note that " and
' will be converted to " and ' respectively. That is not part of the XPath
specification, but I consider it useful. Note though that you have to go
to extraordinary lengths in an XML template file (be it XSLT or whatever) to
make use of this:
Which produces a Literal of:
I'm feeling "sad"
=head2 value()
Also overloaded as stringification, simply returns the literal string value.
=head2 cmp($literal)
Returns the equivalent of perl's cmp operator against the given $literal.
=cut
PK @[fjj j LibXML/Parser.podnu W+A =head1 NAME
XML::LibXML::Parser - Parsing XML Data with XML::LibXML
=head1 SYNOPSIS
use XML::LibXML 1.70;
# Parser constructor
$parser = XML::LibXML->new();
$parser = XML::LibXML->new(option=>value, ...);
$parser = XML::LibXML->new({option=>value, ...});
# Parsing XML
$dom = XML::LibXML->load_xml(
location => $file_or_url
# parser options ...
);
$dom = XML::LibXML->load_xml(
string => $xml_string
# parser options ...
);
$dom = XML::LibXML->load_xml({
IO => $perl_file_handle
# parser options ...
);
$dom = $parser->load_xml(...);
# Parsing HTML
$dom = XML::LibXML->load_html(...);
$dom = $parser->load_html(...);
# Parsing well-balanced XML chunks
$fragment = $parser->parse_balanced_chunk( $wbxmlstring, $encoding );
# Processing XInclude
$parser->process_xincludes( $doc );
$parser->processXIncludes( $doc );
# Old-style parser interfaces
$doc = $parser->parse_file( $xmlfilename );
$doc = $parser->parse_fh( $io_fh );
$doc = $parser->parse_string( $xmlstring);
$doc = $parser->parse_html_file( $htmlfile, \%opts );
$doc = $parser->parse_html_fh( $io_fh, \%opts );
$doc = $parser->parse_html_string( $htmlstring, \%opts );
# Push parser
$parser->parse_chunk($string, $terminate);
$parser->init_push();
$parser->push(@data);
$doc = $parser->finish_push( $recover );
# Set/query parser options
$parser->option_exists($name);
$parser->get_option($name);
$parser->set_option($name,$value);
$parser->set_options({$name=>$value,...});
# XML catalogs
$parser->load_catalog( $catalog_file );
=head1 PARSING
A XML document is read into a data structure such as a DOM tree by a piece of
software, called a parser. XML::LibXML currently provides four different parser
interfaces:
=over 4
=item *
A DOM Pull-Parser
=item *
A DOM Push-Parser
=item *
A SAX Parser
=item *
A DOM based SAX Parser.
=back
=head2 Creating a Parser Instance
XML::LibXML provides an OO interface to the libxml2 parser functions. Thus you
have to create a parser instance before you can parse any XML data.
=over 4
=item new
$parser = XML::LibXML->new();
$parser = XML::LibXML->new(option=>value, ...);
$parser = XML::LibXML->new({option=>value, ...});
Create a new XML and HTML parser instance. Each parser instance holds default
values for various parser options. Optionally, one can pass a hash reference or
a list of option => value pairs to set a different default set of options.
Unless specified otherwise, the options C<<<<<< load_ext_dtd >>>>>>, C<<<<<< expand_entities >>>>>>, and C<<<<<< huge >>>>>> are set to 1. See L<<<<<< Parser Options >>>>>> for a list of libxml2 parser's options.
=back
=head2 DOM Parser
One of the common parser interfaces of XML::LibXML is the DOM parser. This
parser reads XML data into a DOM like data structure, so each tag can get
accessed and transformed.
XML::LibXML's DOM parser is not only capable to parse XML data, but also
(strict) HTML files. There are three ways to parse documents - as a string, as
a Perl filehandle, or as a filename/URL. The return value from each is a L<<<<<< XML::LibXML::Document >>>>>> object, which is a DOM object.
All of the functions listed below will throw an exception if the document is
invalid. To prevent this causing your program exiting, wrap the call in an
eval{} block
=over 4
=item load_xml
$dom = XML::LibXML->load_xml(
location => $file_or_url
# parser options ...
);
$dom = XML::LibXML->load_xml(
string => $xml_string
# parser options ...
);
$dom = XML::LibXML->load_xml({
IO => $perl_file_handle
# parser options ...
);
$dom = $parser->load_xml(...);
This function is available since XML::LibXML 1.70. It provides easy to use
interface to the XML parser that parses given file (or URL), string, or input
stream to a DOM tree. The arguments can be passed in a HASH reference or as
name => value pairs. The function can be called as a class method or an object
method. In both cases it internally creates a new parser instance passing the
specified parser options; if called as an object method, it clones the original
parser (preserving its settings) and additionally applies the specified options
to the new parser. See the constructor C<<<<<< new >>>>>> and L<<<<<< Parser Options >>>>>> for more information.
=item load_xml
$dom = XML::LibXML->load_html(...);
$dom = $parser->load_html(...);
This function is available since XML::LibXML 1.70. It has the same usage as C<<<<<< load_xml >>>>>>, providing interface to the HTML parser. See C<<<<<< load_xml >>>>>> for more information.
Parsing HTML may cause problems, especially if the ampersand ('&') is used.
This is a common problem if HTML code is parsed that contains links to
CGI-scripts. Such links cause the parser to throw errors. In such cases libxml2
still parses the entire document as there was no error, but the error causes
XML::LibXML to stop the parsing process. However, the document is not lost.
Such HTML documents should be parsed using the I<<<<<< recover >>>>>> flag. By default recovering is deactivated.
The functions described above are implemented to parse well formed documents.
In some cases a program gets well balanced XML instead of well formed documents
(e.g. a XML fragment from a Database). With XML::LibXML it is not required to
wrap such fragments in the code, because XML::LibXML is capable even to parse
well balanced XML fragments.
=over 4
=item parse_balanced_chunk
$fragment = $parser->parse_balanced_chunk( $wbxmlstring, $encoding );
This function parses a well balanced XML string into a L<<<<<< XML::LibXML::DocumentFragment >>>>>>. The first arguments contains the input string, the optional second argument
can be used to specify character encoding of the input (UTF-8 is assumed by
default).
=item parse_xml_chunk
This is the old name of parse_balanced_chunk(). Because it may causes confusion
with the push parser interface, this function should not be used anymore.
=back
By default XML::LibXML does not process XInclude tags within a XML Document
(see options section below). XML::LibXML allows to post process a document to
expand XInclude tags.
=over 4
=item process_xincludes
$parser->process_xincludes( $doc );
After a document is parsed into a DOM structure, you may want to expand the
documents XInclude tags. This function processes the given document structure
and expands all XInclude tags (or throws an error) by using the flags and
callbacks of the given parser instance.
Note that the resulting Tree contains some extra nodes (of type
XML_XINCLUDE_START and XML_XINCLUDE_END) after successfully processing the
document. These nodes indicate where data was included into the original tree.
if the document is serialized, these extra nodes will not show up.
Remember: A Document with processed XIncludes differs from the original
document after serialization, because the original XInclude tags will not get
restored!
If the parser flag "expand_xincludes" is set to 1, you need not to post process
the parsed document.
=item processXIncludes
$parser->processXIncludes( $doc );
This is an alias to process_xincludes, but through a JAVA like function name.
=item parse_file
$doc = $parser->parse_file( $xmlfilename );
This function parses an XML document from a file or network; $xmlfilename can
be either a filename or an URL. Note that for parsing files, this function is
the fastest choice, about 6-8 times faster then parse_fh().
=item parse_fh
$doc = $parser->parse_fh( $io_fh );
parse_fh() parses a IOREF or a subclass of IO::Handle.
Because the data comes from an open handle, libxml2's parser does not know
about the base URI of the document. To set the base URI one should use
parse_fh() as follows:
my $doc = $parser->parse_fh( $io_fh, $baseuri );
=item parse_string
$doc = $parser->parse_string( $xmlstring);
This function is similar to parse_fh(), but it parses a XML document that is
available as a single string in memory. Again, you can pass an optional base
URI to the function.
my $doc = $parser->parse_string( $xmlstring, $baseuri );
=item parse_html_file
$doc = $parser->parse_html_file( $htmlfile, \%opts );
Similar to parse_file() but parses HTML (strict) documents; $htmlfile can be
filename or URL.
An optional second argument can be used to pass some options to the HTML parser
as a HASH reference. See options labeled with HTML in L<<<<<< Parser Options >>>>>>.
=item parse_html_fh
$doc = $parser->parse_html_fh( $io_fh, \%opts );
Similar to parse_fh() but parses HTML (strict) streams.
An optional second argument can be used to pass some options to the HTML parser
as a HASH reference. See options labeled with HTML in L<<<<<< Parser Options >>>>>>.
Note: encoding option may not work correctly with this function in libxml2 <
2.6.27 if the HTML file declares charset using a META tag.
=item parse_html_string
$doc = $parser->parse_html_string( $htmlstring, \%opts );
Similar to parse_string() but parses HTML (strict) strings.
An optional second argument can be used to pass some options to the HTML parser
as a HASH reference. See options labeled with HTML in L<<<<<< Parser Options >>>>>>.
=back
=back
=head2 Push Parser
XML::LibXML provides a push parser interface. Rather than pulling the data from
a given source the push parser waits for the data to be pushed into it.
This allows one to parse large documents without waiting for the parser to
finish. The interface is especially useful if a program needs to pre-process
the incoming pieces of XML (e.g. to detect document boundaries).
While XML::LibXML parse_*() functions force the data to be a well-formed XML,
the push parser will take any arbitrary string that contains some XML data. The
only requirement is that all the pushed strings are together a well formed
document. With the push parser interface a program can interrupt the parsing
process as required, where the parse_*() functions give not enough flexibility.
Different to the pull parser implemented in parse_fh() or parse_file(), the
push parser is not able to find out about the documents end itself. Thus the
calling program needs to indicate explicitly when the parsing is done.
In XML::LibXML this is done by a single function:
=over 4
=item parse_chunk
$parser->parse_chunk($string, $terminate);
parse_chunk() tries to parse a given chunk of data, which isn't necessarily
well balanced data. The function takes two parameters: The chunk of data as a
string and optional a termination flag. If the termination flag is set to a
true value (e.g. 1), the parsing will be stopped and the resulting document
will be returned as the following example describes:
my $parser = XML::LibXML->new;
for my $string ( "<", "foo", ' bar="hello world"', "/>") {
$parser->parse_chunk( $string );
}
my $doc = $parser->parse_chunk("", 1); # terminate the parsing
=back
Internally XML::LibXML provides three functions that control the push parser
process:
=over 4
=item init_push
$parser->init_push();
Initializes the push parser.
=item push
$parser->push(@data);
This function pushes the data stored inside the array to libxml2's parser. Each
entry in @data must be a normal scalar! This method can be called repeatedly.
=item finish_push
$doc = $parser->finish_push( $recover );
This function returns the result of the parsing process. If this function is
called without a parameter it will complain about non well-formed documents. If
$restore is 1, the push parser can be used to restore broken or non well formed
(XML) documents as the following example shows:
eval {
$parser->push( "", "bar" );
$doc = $parser->finish_push(); # will report broken XML
};
if ( $@ ) {
# ...
}
This can be annoying if the closing tag is missed by accident. The following
code will restore the document:
eval {
$parser->push( "", "bar" );
$doc = $parser->finish_push(1); # will return the data parsed
# unless an error happened
};
print $doc->toString(); # returns "bar"
Of course finish_push() will return nothing if there was no data pushed to the
parser before.
=back
=head2 Pull Parser (Reader)
XML::LibXML also provides a pull-parser interface similar to the XmlReader
interface in .NET. This interface is almost streaming, and is usually faster
and simpler to use than SAX. See L<<<<<< XML::LibXML::Reader >>>>>>.
=head2 Direct SAX Parser
XML::LibXML provides a direct SAX parser in the L<<<<<< XML::LibXML::SAX >>>>>> module.
=head2 DOM based SAX Parser
XML::LibXML also provides a DOM based SAX parser. The SAX parser is defined in
the module XML::LibXML::SAX::Parser. As it is not a stream based parser, it
parses documents into a DOM and traverses the DOM tree instead.
The API of this parser is exactly the same as any other Perl SAX2 parser. See
XML::SAX::Intro for details.
Aside from the regular parsing methods, you can access the DOM tree traverser
directly, using the generate() method:
my $doc = build_yourself_a_document();
my $saxparser = $XML::LibXML::SAX::Parser->new( ... );
$parser->generate( $doc );
This is useful for serializing DOM trees, for example that you might have done
prior processing on, or that you have as a result of XSLT processing.
I<<<<<< WARNING >>>>>>
This is NOT a streaming SAX parser. As I said above, this parser reads the
entire document into a DOM and serialises it. Some people couldn't read that in
the paragraph above so I've added this warning. If you want a streaming SAX
parser look at the L<<<<<< XML::LibXML::SAX >>>>>> man page
=head1 SERIALIZATION
XML::LibXML provides some functions to serialize nodes and documents. The
serialization functions are described on the L<<<<<< XML::LibXML::Node >>>>>> manpage or the L<<<<<< XML::LibXML::Document >>>>>> manpage. XML::LibXML checks three global flags that alter the serialization
process:
=over 4
=item *
skipXMLDeclaration
=item *
skipDTD
=item *
setTagCompression
=back
of that three functions only setTagCompression is available for all
serialization functions.
Because XML::LibXML does these flags not itself, one has to define them locally
as the following example shows:
local $XML::LibXML::skipXMLDeclaration = 1;
local $XML::LibXML::skipDTD = 1;
local $XML::LibXML::setTagCompression = 1;
If skipXMLDeclaration is defined and not '0', the XML declaration is omitted
during serialization.
If skipDTD is defined and not '0', an existing DTD would not be serialized with
the document.
If setTagCompression is defined and not '0' empty tags are displayed as open
and closing tags rather than the shortcut. For example the empty tag I<<<<<< foo >>>>>> will be rendered as I<<<<<< >>>>>> rather than I<<<<<< >>>>>>.
=head1 PARSER OPTIONS
Handling of libxml2 parser options has been unified and improved in XML::LibXML
1.70. You can now set default options for a particular parser instance by
passing them to the constructor as C<<<<<< XML::LibXML->new({name=>value, ...}) >>>>>> or C<<<<<< XML::LibXML->new(name=>value,...) >>>>>>. The options can be queried and changed using the following methods (pre-1.70
interfaces such as C<<<<<< $parser->load_ext_dtd(0) >>>>>> also exist, see below):
=over 4
=item option_exists
$parser->option_exists($name);
Returns 1 if the current XML::LibXML version supports the option C<<<<<< $name >>>>>>, otherwise returns 0 (note that this does not necessarily mean that the option
is supported by the underlying libxml2 library).
=item get_option
$parser->get_option($name);
Returns the current value of the parser option C<<<<<< $name >>>>>>.
=item set_option
$parser->set_option($name,$value);
Sets option C<<<<<< $name >>>>>> to value C<<<<<< $value >>>>>>.
=item set_options
$parser->set_options({$name=>$value,...});
Sets multiple parsing options at once.
=back
IMPORTANT NOTE: This documentation reflects the parser flags available in
libxml2 2.7.3. Some options have no effect if an older version of libxml2 is
used.
Each of the flags listed below is labeled labeled
=over 4
=item /parser/
if it can be used with a C<<<<<< XML::LibXML >>>>>> parser object (i.e. passed to C<<<<<< XML::LibXML->new >>>>>>, C<<<<<< XML::LibXML->set_option >>>>>>, etc.)
=item /html/
if it can be used passed to the C<<<<<< parse_html_* >>>>>> methods
=item /reader/
if it can be used with the C<<<<<< XML::LibXML::Reader >>>>>>.
=back
Unless specified otherwise, the default for boolean valued options is 0
(false).
The available options are:
=over 4
=item URI
/parser, html, reader/
In case of parsing strings or file handles, XML::LibXML doesn't know about the
base uri of the document. To make relative references such as XIncludes work,
one has to set a base URI, that is then used for the parsed document.
=item line_numbers
/parser, html, reader/
If this option is activated, libxml2 will store the line number of each element
node in the parsed document. The line number can be obtained using the C<<<<<< line_number() >>>>>> method of the C<<<<<< XML::LibXML::Node >>>>>> class (for non-element nodes this may report the line number of the containing
element). The line numbers are also used for reporting positions of validation
errors.
IMPORTANT: Due to limitations in the libxml2 library line numbers greater than
65535 will be returned as 65535. Unfortunatelly, this is a long and sad story,
please see L<<<<<< http://bugzilla.gnome.org/show_bug.cgi?id=325533 >>>>>> for more details.
=item encoding
/html/
character encoding of the input
=item recover
/parser, html, reader/
recover from errors; possible values are 0, 1, and 2
A true value turns on recovery mode which allows one to parse broken XML or
HTML data. The recovery mode allows the parser to return the successfully
parsed portion of the input document. This is useful for almost well-formed
documents, where for example a closing tag is missing somewhere. Still,
XML::LibXML will only parse until the first fatal (non-recoverable) error
occurs, reporting recoverable parsing errors as warnings. To suppress even
these warnings, use recover=>2.
Note that validation is switched off automatically in recovery mode.
=item expand_entities
/parser, reader/
substitute entities; possible values are 0 and 1; default is 1
Note that although this flag disables entity substitution, it does not prevent
the parser from loading external entities; when substitution of an external
entity is disabled, the entity will be represented in the document tree by a
XML_ENTITY_REF_NODE node whose subtree will be the content obtained by parsing
the external resource; Although this is level of nesting is visible from the
DOM it is transparent to XPath data model, so it is possible to match nodes in
an unexpanded entity by the same XPath expression as if the entity was
expanded. See also ext_ent_handler.
=item ext_ent_handler
/parser/
Provide a custom external entity handler to be used when expand_entities is set
to 1. Possible value is a subroutine reference.
This feature does not work properly in libxml2 < 2.6.27!
The subroutine provided is called whenever the parser needs to retrieve the
content of an external entity. It is called with two arguments: the system ID
(URI) and the public ID. The value returned by the subroutine is parsed as the
content of the entity.
This method can be used to completely disable entity loading, e.g. to prevent
exploits of the type described at (L<<<<<< http://searchsecuritychannel.techtarget.com/generic/0,295582,sid97_gci1304703,00.html >>>>>>), where a service is tricked to expose its private data by letting it parse a
remote file (RSS feed) that contains an entity reference to a local file (e.g. C<<<<<< /etc/fstab >>>>>>).
A more granular solution to this problem, however, is provided by custom URL
resolvers, as in
my $c = XML::LibXML::InputCallback->new();
sub match { # accept file:/ URIs except for XML catalogs in /etc/xml/
my ($uri) = @_;
return ($uri=~m{^file:/}
and $uri !~ m{^file:///etc/xml/})
? 1 : 0;
}
$c->register_callbacks([ \&match, sub{}, sub{}, sub{} ]);
$parser->input_callbacks($c);
=item load_ext_dtd
/parser, reader/
load the external DTD subset while parsing; possible values are 0 and 1. Unless
specified, XML::LibXML sets this option to 1.
This flag is also required for DTD Validation, to provide complete attribute,
and to expand entities, regardless if the document has an internal subset. Thus
switching off external DTD loading, will disable entity expansion, validation,
and complete attributes on internal subsets as well.
=item complete_attributes
/parser, reader/
create default DTD attributes; possible values are 0 and 1
=item validation
/parser, reader/
validate with the DTD; possible values are 0 and 1
=item suppress_errors
/parser, html, reader/
suppress error reports; possible values are 0 and 1
=item suppress_warnings
/parser, html, reader/
suppress warning reports; possible values are 0 and 1
=item pedantic_parser
/parser, html, reader/
pedantic error reporting; possible values are 0 and 1
=item no_blanks
/parser, html, reader/
remove blank nodes; possible values are 0 and 1
=item expand_xinclude or xinclude
/parser, reader/
Implement XInclude substitution; possible values are 0 and 1
Expands XIinclude tags immediately while parsing the document. Note that the
parser will use the URI resolvers installed via C<<<<<< XML::LibXML::InputCallback >>>>>> to parse the included document (if any).
=item no_xinclude_nodes
/parser, reader/
do not generate XINCLUDE START/END nodes; possible values are 0 and 1
=item no_network
/parser, html, reader/
Forbid network access; possible values are 0 and 1
If set to true, all attempts to fetch non-local resources (such as DTD or
external entities) will fail (unless custom callbacks are defined).
It may be necessary to use the flag C<<<<<< recover >>>>>> for processing documents requiring such resources while networking is off.
=item clean_namespaces
/parser, reader/
remove redundant namespaces declarations during parsing; possible values are 0
and 1.
=item no_cdata
/parser, html, reader/
merge CDATA as text nodes; possible values are 0 and 1
=item no_basefix
/parser, reader/
not fixup XINCLUDE xml#base URIS; possible values are 0 and 1
=item huge
/parser, html, reader/
relax any hardcoded limit from the parser; possible values are 0 and 1. Unless
specified, XML::LibXML sets this option to 1.
=item gdome
/parser/
THIS OPTION IS EXPERIMENTAL!
Although quite powerful, XML:LibXML's DOM implementation is incomplete with
respect to the DOM level 2 or level 3 specifications. XML::GDOME is based on
libxml2 as well and and provides a rather complete DOM implementation by
wrapping libgdome. This flag allows you to make use of XML::LibXML's full
parser options and XML::GDOME's DOM implementation at the same time.
To make use of this function, one has to install libgdome and configure
XML::LibXML to use this library. For this you need to rebuild XML::LibXML!
Note: this feature was not seriously tested in recent XML::LibXML releases.
=back
For compatibility with XML::LibXML versions prior to 1.70, the following
methods are also supported for querying and setting the corresponding parser
options (if called without arguments, the methods return the current value of
the corresponding parser options; with an argument sets the option to a given
value):
$parser->validation();
$parser->recover();
$parser->pedantic_parser();
$parser->line_numbers();
$parser->load_ext_dtd();
$parser->complete_attributes();
$parser->expand_xinclude();
$parser->gdome_dom();
$parser->clean_namespaces();
$parser->no_network();
The following obsolete methods trigger parser options in some special way:
=over 4
=item recover_silently
$parser->recover_silently(1);;
If called without an argument, returns true if the current value of the C<<<<<< recover >>>>>> parser option is 2 and returns false otherwise. With a true argument sets the C<<<<<< recover >>>>>> parser option to 2; with a false argument sets the C<<<<<< recover >>>>>> parser option to 0.
=item expand_entities
$parser->expand_entities(0);
Get/set the C<<<<<< expand_entities >>>>>> option. If called with a true argument, also turns the C<<<<<< load_ext_dtd >>>>>> option to 1.
=item keep_blanks
$parser->keep_blanks(0);
This is actually an oposite of the C<<<<<< no_blanks >>>>>> parser option. If used without an argument retrieves negated value of C<<<<<< no_blanks >>>>>>. If used with an argument sets C<<<<<< no_blanks >>>>>> to the oposite value.
=item base_uri
$parser->base_uri( $your_base_uri );
Get/set the C<<<<<< URI >>>>>> option.
=back
=head1 XML CATALOGS
C<<<<<< libxml2 >>>>>> supports XML catalogs. Catalogs are used to map remote resources to their local
copies. Using catalogs can speed up parsing processes if many external
resources from remote addresses are loaded into the parsed documents (such as
DTDs or XIncludes).
Note that libxml2 has a global pool of loaded catalogs, so if you apply the
method C<<<<<< load_catalog >>>>>> to one parser instance, all parser instances will start using the catalog (in
addition to other previously loaded catalogs).
Note also that catalogs are not used when a custom external entity handler is
specified. At the current state it is not possible to make use of both types of
resolving systems at the same time.
=over 4
=item load_catalog
$parser->load_catalog( $catalog_file );
Loads the XML catalog file $catalog_file.
=back
=head1 ERROR REPORTING
XML::LibXML throws exceptions during parsing, validation or XPath processing
(and some other occasions). These errors can be caught by using I<<<<<< eval >>>>>> blocks. The error is stored in I<<<<<< $@ >>>>>>. There are two implementations: the old one throws $@ which is just a message
string, in the new one $@ is an object from the class XML::LibXML::Error; this
class overrides the operator "" so that when printed, the object flattens to
the usual error message.
XML::LibXML throws errors as they occur. This is a very common misunderstanding
in the use of XML::LibXML. If the eval is omitted, XML::LibXML will always halt
your script by "croaking" (see Carp man page for details).
Also note that an increasing number of functions throw errors if bad data is
passed as arguments. If you cannot assure valid data passed to XML::LibXML you
should eval these functions.
Note: since version 1.59, get_last_error() is no longer available in
XML::LibXML for thread-safety reasons.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[նa a LibXML/Node.podnu W+A =head1 NAME
XML::LibXML::Node - Abstract Base Class of XML::LibXML Nodes
=head1 SYNOPSIS
use XML::LibXML;
$name = $node->nodeName;
$node->setNodeName( $newName );
$bool = $node->isSameNode( $other_node );
$bool = $node->isEqual( $other_node );
$content = $node->nodeValue;
$content = $node->textContent;
$type = $node->nodeType;
$node->unbindNode();
$childnode = $node->removeChild( $childnode );
$oldnode = $node->replaceChild( $newNode, $oldNode );
$node->replaceNode($newNode);
$childnode = $node->appendChild( $childnode );
$childnode = $node->addChild( $chilnode );
$node = $parent->addNewChild( $nsURI, $name );
$node->addSibling($newNode);
$newnode =$node->cloneNode( $deep );
$parentnode = $node->parentNode;
$nextnode = $node->nextSibling();
$nextnode = $node->nextNonBlankSibling();
$prevnode = $node->previousSibling();
$prevnode = $node->previousNonBlankSibling();
$boolean = $node->hasChildNodes();
$childnode = $node->firstChild;
$childnode = $node->lastChild;
$documentnode = $node->ownerDocument;
$node = $node->getOwner;
$node->setOwnerDocument( $doc );
$node->insertBefore( $newNode, $refNode );
$node->insertAfter( $newNode, $refNode );
@nodes = $node->findnodes( $xpath_expression );
$result = $node->find( $xpath );
print $node->findvalue( $xpath );
$bool = $node->exists( $xpath_expression );
@childnodes = $node->childNodes();
@childnodes = $node->nonBlankChildNodes();
$xmlstring = $node->toString($format,$docencoding);
$c14nstring = $node->toStringC14N();
$c14nstring = $node->toStringC14N($with_comments, $xpath_expression , $xpath_context);
$ec14nstring = $node->toStringEC14N();
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $inclusive_prefix_list);
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $xpath_context, $inclusive_prefix_list);
$str = $doc->serialize($format);
$localname = $node->localname;
$nameprefix = $node->prefix;
$uri = $node->namespaceURI();
$boolean = $node->hasAttributes();
@attributelist = $node->attributes();
$URI = $node->lookupNamespaceURI( $prefix );
$prefix = $node->lookupNamespacePrefix( $URI );
$node->normalize;
@nslist = $node->getNamespaces;
$node->removeChildNodes();
$strURI = $node->baseURI();
$node->setBaseURI($strURI);
$node->nodePath();
$lineno = $node->line_number();
=head1 DESCRIPTION
XML::LibXML::Node defines functions that are common to all Node Types. A
LibXML::Node should never be created standalone, but as an instance of a high
level class such as LibXML::Element or LibXML::Text. The class itself should
provide only common functionality. In XML::LibXML each node is part either of a
document or a document-fragment. Because of this there is no node without a
parent. This may causes confusion with "unbound" nodes.
=head1 METHODS
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item nodeName
$name = $node->nodeName;
Returns the node's name. This function is aware of namespaces and returns the
full name of the current node (C<<<<<< prefix:localname >>>>>>).
Since 1.62 this function also returns the correct DOM names for node types with
constant names, namely: #text, #cdata-section, #comment, #document,
#document-fragment.
=item setNodeName
$node->setNodeName( $newName );
In very limited situations, it is useful to change a nodes name. In the DOM
specification this should throw an error. This Function is aware of namespaces.
=item isSameNode
$bool = $node->isSameNode( $other_node );
returns TRUE (1) if the given nodes refer to the same node structure, otherwise
FALSE (0) is returned.
=item isEqual
$bool = $node->isEqual( $other_node );
deprecated version of isSameNode().
I<<<<<< NOTE >>>>>> isEqual will change behaviour to follow the DOM specification
=item nodeValue
$content = $node->nodeValue;
If the node has any content (such as stored in a C<<<<<< text node >>>>>>) it can get requested through this function.
I<<<<<< NOTE: >>>>>> Element Nodes have no content per definition. To get the text value of an
Element use textContent() instead!
=item textContent
$content = $node->textContent;
this function returns the content of all text nodes in the descendants of the
given node as specified in DOM.
=item nodeType
$type = $node->nodeType;
Return a numeric value representing the node type of this node. The module
XML::LibXML by default exports constants for the node types (see the EXPORT
section in the L<<<<<< XML::LibXML >>>>>> manual page).
=item unbindNode
$node->unbindNode();
Unbinds the Node from its siblings and Parent, but not from the Document it
belongs to. If the node is not inserted into the DOM afterwards it will be lost
after the program terminated. From a low level view, the unbound node is
stripped from the context it is and inserted into a (hidden) document-fragment.
=item removeChild
$childnode = $node->removeChild( $childnode );
This will unbind the Child Node from its parent C<<<<<< $node >>>>>>. The function returns the unbound node. If C<<<<<< oldNode >>>>>> is not a child of the given Node the function will fail.
=item replaceChild
$oldnode = $node->replaceChild( $newNode, $oldNode );
Replaces the C<<<<<< $oldNode >>>>>> with the C<<<<<< $newNode >>>>>>. The C<<<<<< $oldNode >>>>>> will be unbound from the Node. This function differs from the DOM L2
specification, in the case, if the new node is not part of the document, the
node will be imported first.
=item replaceNode
$node->replaceNode($newNode);
This function is very similar to replaceChild(), but it replaces the node
itself rather than a childnode. This is useful if a node found by any XPath
function, should be replaced.
=item appendChild
$childnode = $node->appendChild( $childnode );
The function will add the C<<<<<< $childnode >>>>>> to the end of C<<<<<< $node >>>>>>'s children. The function should fail, if the new childnode is already a child
of C<<<<<< $node >>>>>>. This function differs from the DOM L2 specification, in the case, if the new
node is not part of the document, the node will be imported first.
=item addChild
$childnode = $node->addChild( $chilnode );
As an alternative to appendChild() one can use the addChild() function. This
function is a bit faster, because it avoids all DOM conformity checks.
Therefore this function is quite useful if one builds XML documents in memory
where the order and ownership (C<<<<<< ownerDocument >>>>>>) is assured.
addChild() uses libxml2's own xmlAddChild() function. Thus it has to be used
with extra care: If a text node is added to a node and the node itself or its
last childnode is as well a text node, the node to add will be merged with the
one already available. The current node will be removed from memory after this
action. Because perl is not aware of this action, the perl instance is still
available. XML::LibXML will catch the loss of a node and refuse to run any
function called on that node.
my $t1 = $doc->createTextNode( "foo" );
my $t2 = $doc->createTextNode( "bar" );
$t1->addChild( $t2 ); # is OK
my $val = $t2->nodeValue(); # will fail, script dies
Also addChild() will not check if the added node belongs to the same document
as the node it will be added to. This could lead to inconsistent documents and
in more worse cases even to memory violations, if one does not keep track of
this issue.
Although this sounds like a lot of trouble, addChild() is useful if a document
is built from a stream, such as happens sometimes in SAX handlers or filters.
If you are not sure about the source of your nodes, you better stay with
appendChild(), because this function is more user friendly in the sense of
being more error tolerant.
=item addNewChild
$node = $parent->addNewChild( $nsURI, $name );
Similar to C<<<<<< addChild() >>>>>>, this function uses low level libxml2 functionality to provide faster
interface for DOM building. I<<<<<< addNewChild() >>>>>> uses C<<<<<< xmlNewChild() >>>>>> to create a new node on a given parent element.
addNewChild() has two parameters $nsURI and $name, where $nsURI is an
(optional) namespace URI. $name is the fully qualified element name;
addNewChild() will determine the correct prefix if necessary.
The function returns the newly created node.
This function is very useful for DOM building, where a created node can be
directly associated with its parent. I<<<<<< NOTE >>>>>> this function is not part of the DOM specification and its use will limit your
code to XML::LibXML.
=item addSibling
$node->addSibling($newNode);
addSibling() allows adding an additional node to the end of a nodelist, defined
by the given node.
=item cloneNode
$newnode =$node->cloneNode( $deep );
I<<<<<< cloneNode >>>>>> creates a copy of C<<<<<< $node >>>>>>. When $deep is set to 1 (true) the function will copy all childnodes as well.
If $deep is 0 only the current node will be copied. Note that in case of
element, attributes are copied even if $deep is 0.
Note that the behavior of this function for $deep=0 has changed in 1.62 in
order to be consistent with the DOM spec (in older versions attributes and
namespace information was not copied for elements).
=item parentNode
$parentnode = $node->parentNode;
Returns simply the Parent Node of the current node.
=item nextSibling
$nextnode = $node->nextSibling();
Returns the next sibling if any .
=item nextNonBlankSibling
$nextnode = $node->nextNonBlankSibling();
Returns the next non-blank sibling if any (a node is blank if it is a Text or
CDATA node consisting of whitespace only). This method is not defined by DOM.
=item previousSibling
$prevnode = $node->previousSibling();
Analogous to I<<<<<< getNextSibling >>>>>> the function returns the previous sibling if any.
=item previousNonBlankSibling
$prevnode = $node->previousNonBlankSibling();
Returns the previous non-blank sibling if any (a node is blank if it is a Text
or CDATA node consisting of whitespace only). This method is not defined by
DOM.
=item hasChildNodes
$boolean = $node->hasChildNodes();
If the current node has Childnodes this function returns TRUE (1), otherwise it
returns FALSE (0, not undef).
=item firstChild
$childnode = $node->firstChild;
If a node has childnodes this function will return the first node in the
childlist.
=item lastChild
$childnode = $node->lastChild;
If the C<<<<<< $node >>>>>> has childnodes this function returns the last child node.
=item ownerDocument
$documentnode = $node->ownerDocument;
Through this function it is always possible to access the document the current
node is bound to.
=item getOwner
$node = $node->getOwner;
This function returns the node the current node is associated with. In most
cases this will be a document node or a document fragment node.
=item setOwnerDocument
$node->setOwnerDocument( $doc );
This function binds a node to another DOM. This method unbinds the node first,
if it is already bound to another document.
This function is the opposite calling of L<<<<<< XML::LibXML::Document >>>>>>'s adoptNode() function. Because of this it has the same limitations with
Entity References as adoptNode().
=item insertBefore
$node->insertBefore( $newNode, $refNode );
The method inserts C<<<<<< $newNode >>>>>> before C<<<<<< $refNode >>>>>>. If C<<<<<< $refNode >>>>>> is undefined, the newNode will be set as the new last child of the parent node.
This function differs from the DOM L2 specification, in the case, if the new
node is not part of the document, the node will be imported first,
automatically.
$refNode has to be passed to the function even if it is undefined:
$node->insertBefore( $newNode, undef ); # the same as $node->appendChild( $newNode );
$node->insertBefore( $newNode ); # wrong
Note, that the reference node has to be a direct child of the node the function
is called on. Also, $newChild is not allowed to be an ancestor of the new
parent node.
=item insertAfter
$node->insertAfter( $newNode, $refNode );
The method inserts C<<<<<< $newNode >>>>>> after C<<<<<< $refNode >>>>>>. If C<<<<<< $refNode >>>>>> is undefined, the newNode will be set as the new last child of the parent node.
Note, that $refNode has to be passed explicitly even if it is undef.
=item findnodes
@nodes = $node->findnodes( $xpath_expression );
I<<<<<< findnodes >>>>>> evaluates the xpath expression (XPath 1.0) on the current node and returns the
resulting node set as an array. In scalar context returns a L<<<<<< XML::LibXML::NodeList >>>>>> object.
The xpath expression can be passed either as a string or or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
I<<<<<< NOTE ON NAMESPACES AND XPATH >>>>>>:
A common mistake about XPath is to assume that node tests consisting of an
element name with no prefix match elements in the default namespace. This
assumption is wrong - by XPath specification, such node tests can only match
elements that are in no (i.e. null) namespace.
So, for example, one cannot match the root element of an XHTML document with C<<<<<< $node->find('/html') >>>>>> since C<<<<<< '/html' >>>>>> would only match if the root element C<<<<<< >>>>>> had no namespace, but all XHTML elements belong to the namespace
http://www.w3.org/1999/xhtml. (Note that C<<<<<< xmlns="..." >>>>>> namespace declarations can also be specified in a DTD, which makes the
situation even worse, since the XML document looks as if there was no default
namespace).
There are several possible ways to deal with namespaces in XPath:
=over 4
=item *
The recommended way is to use the L<<<<<< XML::LibXML::XPathContext >>>>>> module to define an explicit context for XPath evaluation, in which a document
independent prefix-to-namespace mapping can be defined. For example:
my $xpc = XML::LibXML::XPathContext->new;
$xpc->registerNs('x', 'http://www.w3.org/1999/xhtml');
$xpc->find('/x:html',$node);
=item *
Another possibility is to use prefixes declared in the queried document (if
known). If the document declares a prefix for the namespace in question (and
the context node is in the scope of the declaration), C<<<<<< XML::LibXML >>>>>> allows you to use the prefix in the XPath expression, e.g.:
$node->find('/x:html');
=back
See also XML::LibXML::XPathContext->findnodes.
=item find
$result = $node->find( $xpath );
I<<<<<< find >>>>>> evaluates the XPath 1.0 expression using the current node as the context of the
expression, and returns the result depending on what type of result the XPath
expression had. For example, the XPath "1 * 3 + 52" results in a L<<<<<< XML::LibXML::Number >>>>>> object being returned. Other expressions might return a L<<<<<< XML::LibXML::Boolean >>>>>> object, or a L<<<<<< XML::LibXML::Literal >>>>>> object (a string). Each of those objects uses Perl's overload feature to "do
the right thing" in different contexts.
The xpath expression can be passed either as a string or or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
See also L<<<<<< XML::LibXML::XPathContext >>>>>>->find.
=item findvalue
print $node->findvalue( $xpath );
I<<<<<< findvalue >>>>>> is exactly equivalent to:
$node->find( $xpath )->to_literal;
That is, it returns the literal value of the results. This enables you to
ensure that you get a string back from your search, allowing certain shortcuts.
This could be used as the equivalent of XSLT's .
See also L<<<<<< XML::LibXML::XPathContext >>>>>>->findvalue.
The xpath expression can be passed either as a string or or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item exists
$bool = $node->exists( $xpath_expression );
This method behaves like I<<<<<< findnodes >>>>>>, except that it only returns a boolean value (1 if the expression matches a
node, 0 otherwise) and may be faster than I<<<<<< findnodes >>>>>>, because the XPath evaluation may stop early on the first match (this is true
for libxml2 >= 2.6.27).
For XPath expressions that do not return node-set, the method returns true if
the returned value is a non-zero number or a non-empty string.
=item childNodes
@childnodes = $node->childNodes();
I<<<<<< childNodes >>>>>> implements a more intuitive interface to the childnodes of the current node. It
enables you to pass all children directly to a C<<<<<< map >>>>>> or C<<<<<< grep >>>>>>. If this function is called in scalar context, a L<<<<<< XML::LibXML::NodeList >>>>>> object will be returned.
=item nonBlankChildNodes
@childnodes = $node->nonBlankChildNodes();
This is like I<<<<<< childNodes >>>>>>, but returns only non-blank nodes (where a node is blank if it is a Text or
CDATA node consisting of whitespace only). This method is not defined by DOM.
=item toString
$xmlstring = $node->toString($format,$docencoding);
This method is similar to the method C<<<<<< toString >>>>>> of a L<<<<<< XML::LibXML::Document >>>>>> but for a single node. It returns a string consisting of XML serialization of
the given node and all its descendants. Unlike C<<<<<< XML::LibXML::Document::toString >>>>>>, in this case the resulting string is by default a character string (UTF-8
encoded with UTF8 flag on). An optional flag $format controls indentation, as
in C<<<<<< XML::LibXML::Document::toString >>>>>>. If the second optional $docencoding flag is true, the result will be a byte
string in the document encoding (see C<<<<<< XML::LibXML::Document::actualEncoding >>>>>>).
=item toStringC14N
$c14nstring = $node->toStringC14N();
$c14nstring = $node->toStringC14N($with_comments, $xpath_expression , $xpath_context);
The function is similar to toString(). Instead of simply serializing the
document tree, it transforms it as it is specified in the XML-C14N
Specification (see L<<<<<< http://www.w3.org/TR/xml-c14n >>>>>>). Such transformation is known as canonization.
If $with_comments is 0 or not defined, the result-document will not contain any
comments that exist in the original document. To include comments into the
canonized document, $with_comments has to be set to 1.
The parameter $xpath_expression defines the nodeset of nodes that should be
visible in the resulting document. This can be used to filter out some nodes.
One has to note, that only the nodes that are part of the nodeset, will be
included into the result-document. Their child-nodes will not exist in the
resulting document, unless they are part of the nodeset defined by the xpath
expression.
If $xpath_expression is omitted or empty, toStringC14N() will include all nodes
in the given sub-tree, using the following XPath expressions: with comments
(. | .//node() | .//@* | .//namespace::*)
and without comments
(. | .//node() | .//@* | .//namespace::*)[not(self::comment())]
An optional parameter $xpath_context can be used to pass an L<<<<<< XML::LibXML::XPathContext >>>>>> object defining the context for evaluation of $xpath_expression. This is useful
for mapping namespace prefixes used in the XPath expression to namespace URIs.
Note, however, that $node will be used as the context node for the evaluation,
not the context node of $xpath_context!
=item toStringEC14N
$ec14nstring = $node->toStringEC14N();
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $inclusive_prefix_list);
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $xpath_context, $inclusive_prefix_list);
The function is similar to toStringC14N() but follows the XML-EXC-C14N
Specification (see L<<<<<< http://www.w3.org/TR/xml-exc-c14n >>>>>>) for exclusive canonization of XML.
The arguments $with_comments, $xpath_expression, $xpath_context are as in
toStringC14N(). An ARRAY reference can be passed as the last argument
$inclusive_prefix_list, listing namespace prefixes that are to be handled in
the manner described by the Canonical XML Recommendation (i.e. preserved in the
output even if the namespace is not used). C.f. the spec for details.
=item serialize
$str = $doc->serialize($format);
An alias for toString(). This function was name added to be more consistent
with libxml2.
=item serialize_c14n
An alias for toStringC14N().
=item serialize_exc_c14n
An alias for toStringEC14N().
=item localname
$localname = $node->localname;
Returns the local name of a tag. This is the part behind the colon.
=item prefix
$nameprefix = $node->prefix;
Returns the prefix of a tag. This is the part before the colon.
=item namespaceURI
$uri = $node->namespaceURI();
returns the URI of the current namespace.
=item hasAttributes
$boolean = $node->hasAttributes();
returns 1 (TRUE) if the current node has any attributes set, otherwise 0
(FALSE) is returned.
=item attributes
@attributelist = $node->attributes();
This function returns all attributes and namespace declarations assigned to the
given node.
Because XML::LibXML does not implement namespace declarations and attributes
the same way, it is required to test what kind of node is handled while
accessing the functions result.
If this function is called in array context the attribute nodes are returned as
an array. In scalar context the function will return a L<<<<<< XML::LibXML::NamedNodeMap >>>>>> object.
=item lookupNamespaceURI
$URI = $node->lookupNamespaceURI( $prefix );
Find a namespace URI by its prefix starting at the current node.
=item lookupNamespacePrefix
$prefix = $node->lookupNamespacePrefix( $URI );
Find a namespace prefix by its URI starting at the current node.
I<<<<<< NOTE >>>>>> Only the namespace URIs are meant to be unique. The prefix is only document
related. Also the document might have more than a single prefix defined for a
namespace.
=item normalize
$node->normalize;
This function normalizes adjacent text nodes. This function is not as strict as
libxml2's xmlTextMerge() function, since it will not free a node that is still
referenced by the perl layer.
=item getNamespaces
@nslist = $node->getNamespaces;
If a node has any namespaces defined, this function will return these
namespaces. Note, that this will not return all namespaces that are in scope,
but only the ones declared explicitly for that node.
Although getNamespaces is available for all nodes, it only makes sense if used
with element nodes.
=item removeChildNodes
$node->removeChildNodes();
This function is not specified for any DOM level: It removes all childnodes
from a node in a single step. Other than the libxml2 function itself
(xmlFreeNodeList), this function will not immediately remove the nodes from the
memory. This saves one from getting memory violations, if there are nodes still
referred to from the Perl level.
=item baseURI ()
$strURI = $node->baseURI();
Searches for the base URL of the node. The method should work on both XML and
HTML documents even if base mechanisms for these are completely different. It
returns the base as defined in RFC 2396 sections "5.1.1. Base URI within
Document Content" and "5.1.2. Base URI from the Encapsulating Entity". However
it does not return the document base (5.1.3), use method C<<<<<< URI >>>>>> of C<<<<<< XML::LibXML::Document >>>>>> for this.
=item setBaseURI ($strURI)
$node->setBaseURI($strURI);
This method only does something useful for an element node in a XML document.
It sets the xml:base attribute on the node to $strURI, which effectively sets
the base URI of the node to the same value.
Note: For HTML documents this behaves as if the document was XML which may not
be desired, since it does not effectively set the base URI of the node. See RFC
2396 appendix D for an example of how base URI can be specified in HTML.
=item nodePath
$node->nodePath();
This function is not specified for any DOM level: It returns a canonical
structure based XPath for a given node.
=item line_number
$lineno = $node->line_number();
This function returns the line number where the tag was found during parsing.
If a node is added to the document the line number is 0. Problems may occur, if
a node from one document is passed to another one.
IMPORTANT: Due to limitations in the libxml2 library line numbers greater than
65535 will be returned as 65535. Please see L<<<<<< http://bugzilla.gnome.org/show_bug.cgi?id=325533 >>>>>> for more details.
Note: line_number() is special to XML::LibXML and not part of the DOM
specification.
If the line_numbers flag of the parser was not activated before parsing,
line_number() will always return 0.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
1.70
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
PK @[