diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/lib/libxmllib.tex | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/lib/libxmllib.tex')
-rw-r--r-- | sys/src/cmd/python/Doc/lib/libxmllib.tex | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/lib/libxmllib.tex b/sys/src/cmd/python/Doc/lib/libxmllib.tex new file mode 100644 index 000000000..f7197cade --- /dev/null +++ b/sys/src/cmd/python/Doc/lib/libxmllib.tex @@ -0,0 +1,287 @@ +\section{\module{xmllib} --- + A parser for XML documents} + +\declaremodule{standard}{xmllib} +\modulesynopsis{A parser for XML documents.} +\moduleauthor{Sjoerd Mullender}{Sjoerd.Mullender@cwi.nl} +\sectionauthor{Sjoerd Mullender}{Sjoerd.Mullender@cwi.nl} + + +\index{XML} +\index{Extensible Markup Language} + +\deprecated{2.0}{Use \refmodule{xml.sax} instead. The newer XML + package includes full support for XML 1.0.} + +\versionchanged[Added namespace support]{1.5.2} + +This module defines a class \class{XMLParser} which serves as the basis +for parsing text files formatted in XML (Extensible Markup Language). + +\begin{classdesc}{XMLParser}{} +The \class{XMLParser} class must be instantiated without +arguments.\footnote{Actually, a number of keyword arguments are +recognized which influence the parser to accept certain non-standard +constructs. The following keyword arguments are currently +recognized. The defaults for all of these is \code{0} (false) except +for the last one for which the default is \code{1} (true). +\var{accept_unquoted_attributes} (accept certain attribute values +without requiring quotes), \var{accept_missing_endtag_name} (accept +end tags that look like \code{</>}), \var{map_case} (map upper case to +lower case in tags and attributes), \var{accept_utf8} (allow UTF-8 +characters in input; this is required according to the XML standard, +but Python does not as yet deal properly with these characters, so +this is not the default), \var{translate_attribute_references} (don't +attempt to translate character and entity references in attribute values).} +\end{classdesc} + +This class provides the following interface methods and instance variables: + +\begin{memberdesc}{attributes} +A mapping of element names to mappings. The latter mapping maps +attribute names that are valid for the element to the default value of +the attribute, or if there is no default to \code{None}. The default +value is the empty dictionary. This variable is meant to be +overridden, not extended since the default is shared by all instances +of \class{XMLParser}. +\end{memberdesc} + +\begin{memberdesc}{elements} +A mapping of element names to tuples. The tuples contain a function +for handling the start and end tag respectively of the element, or +\code{None} if the method \method{unknown_starttag()} or +\method{unknown_endtag()} is to be called. The default value is the +empty dictionary. This variable is meant to be overridden, not +extended since the default is shared by all instances of +\class{XMLParser}. +\end{memberdesc} + +\begin{memberdesc}{entitydefs} +A mapping of entitynames to their values. The default value contains +definitions for \code{'lt'}, \code{'gt'}, \code{'amp'}, \code{'quot'}, +and \code{'apos'}. +\end{memberdesc} + +\begin{methoddesc}{reset}{} +Reset the instance. Loses all unprocessed data. This is called +implicitly at the instantiation time. +\end{methoddesc} + +\begin{methoddesc}{setnomoretags}{} +Stop processing tags. Treat all following input as literal input +(CDATA). +\end{methoddesc} + +\begin{methoddesc}{setliteral}{} +Enter literal mode (CDATA mode). This mode is automatically exited +when the close tag matching the last unclosed open tag is encountered. +\end{methoddesc} + +\begin{methoddesc}{feed}{data} +Feed some text to the parser. It is processed insofar as it consists +of complete tags; incomplete data is buffered until more data is +fed or \method{close()} is called. +\end{methoddesc} + +\begin{methoddesc}{close}{} +Force processing of all buffered data as if it were followed by an +end-of-file mark. This method may be redefined by a derived class to +define additional processing at the end of the input, but the +redefined version should always call \method{close()}. +\end{methoddesc} + +\begin{methoddesc}{translate_references}{data} +Translate all entity and character references in \var{data} and +return the translated string. +\end{methoddesc} + +\begin{methoddesc}{getnamespace}{} +Return a mapping of namespace abbreviations to namespace URIs that are +currently in effect. +\end{methoddesc} + +\begin{methoddesc}{handle_xml}{encoding, standalone} +This method is called when the \samp{<?xml ...?>} tag is processed. +The arguments are the values of the encoding and standalone attributes +in the tag. Both encoding and standalone are optional. The values +passed to \method{handle_xml()} default to \code{None} and the string +\code{'no'} respectively. +\end{methoddesc} + +\begin{methoddesc}{handle_doctype}{tag, pubid, syslit, data} +This\index{DOCTYPE declaration} method is called when the +\samp{<!DOCTYPE...>} declaration is processed. The arguments are the +tag name of the root element, the Formal Public\index{Formal Public +Identifier} Identifier (or \code{None} if not specified), the system +identifier, and the uninterpreted contents of the internal DTD subset +as a string (or \code{None} if not present). +\end{methoddesc} + +\begin{methoddesc}{handle_starttag}{tag, method, attributes} +This method is called to handle start tags for which a start tag +handler is defined in the instance variable \member{elements}. The +\var{tag} argument is the name of the tag, and the +\var{method} argument is the function (method) which should be used to +support semantic interpretation of the start tag. The +\var{attributes} argument is a dictionary of attributes, the key being +the \var{name} and the value being the \var{value} of the attribute +found inside the tag's \code{<>} brackets. Character and entity +references in the \var{value} have been interpreted. For instance, +for the start tag \code{<A HREF="http://www.cwi.nl/">}, this method +would be called as \code{handle_starttag('A', self.elements['A'][0], +\{'HREF': 'http://www.cwi.nl/'\})}. The base implementation simply +calls \var{method} with \var{attributes} as the only argument. +\end{methoddesc} + +\begin{methoddesc}{handle_endtag}{tag, method} +This method is called to handle endtags for which an end tag handler +is defined in the instance variable \member{elements}. The \var{tag} +argument is the name of the tag, and the \var{method} argument is the +function (method) which should be used to support semantic +interpretation of the end tag. For instance, for the endtag +\code{</A>}, this method would be called as \code{handle_endtag('A', +self.elements['A'][1])}. The base implementation simply calls +\var{method}. +\end{methoddesc} + +\begin{methoddesc}{handle_data}{data} +This method is called to process arbitrary data. It is intended to be +overridden by a derived class; the base class implementation does +nothing. +\end{methoddesc} + +\begin{methoddesc}{handle_charref}{ref} +This method is called to process a character reference of the form +\samp{\&\#\var{ref};}. \var{ref} can either be a decimal number, +or a hexadecimal number when preceded by an \character{x}. +In the base implementation, \var{ref} must be a number in the +range 0-255. It translates the character to \ASCII{} and calls the +method \method{handle_data()} with the character as argument. If +\var{ref} is invalid or out of range, the method +\code{unknown_charref(\var{ref})} is called to handle the error. A +subclass must override this method to provide support for character +references outside of the \ASCII{} range. +\end{methoddesc} + +\begin{methoddesc}{handle_comment}{comment} +This method is called when a comment is encountered. The +\var{comment} argument is a string containing the text between the +\samp{<!--} and \samp{-->} delimiters, but not the delimiters +themselves. For example, the comment \samp{<!--text-->} will +cause this method to be called with the argument \code{'text'}. The +default method does nothing. +\end{methoddesc} + +\begin{methoddesc}{handle_cdata}{data} +This method is called when a CDATA element is encountered. The +\var{data} argument is a string containing the text between the +\samp{<![CDATA[} and \samp{]]>} delimiters, but not the delimiters +themselves. For example, the entity \samp{<![CDATA[text]]>} will +cause this method to be called with the argument \code{'text'}. The +default method does nothing, and is intended to be overridden. +\end{methoddesc} + +\begin{methoddesc}{handle_proc}{name, data} +This method is called when a processing instruction (PI) is +encountered. The \var{name} is the PI target, and the \var{data} +argument is a string containing the text between the PI target and the +closing delimiter, but not the delimiter itself. For example, the +instruction \samp{<?XML text?>} will cause this method to be called +with the arguments \code{'XML'} and \code{'text'}. The default method +does nothing. Note that if a document starts with \samp{<?xml +..?>}, \method{handle_xml()} is called to handle it. +\end{methoddesc} + +\begin{methoddesc}{handle_special}{data} +This method is called when a declaration is encountered. The +\var{data} argument is a string containing the text between the +\samp{<!} and \samp{>} delimiters, but not the delimiters +themselves. For example, the \index{ENTITY declaration}entity +declaration \samp{<!ENTITY text>} will cause this method to be called +with the argument \code{'ENTITY text'}. The default method does +nothing. Note that \samp{<!DOCTYPE ...>} is handled separately if it +is located at the start of the document. +\end{methoddesc} + +\begin{methoddesc}{syntax_error}{message} +This method is called when a syntax error is encountered. The +\var{message} is a description of what was wrong. The default method +raises a \exception{RuntimeError} exception. If this method is +overridden, it is permissible for it to return. This method is only +called when the error can be recovered from. Unrecoverable errors +raise a \exception{RuntimeError} without first calling +\method{syntax_error()}. +\end{methoddesc} + +\begin{methoddesc}{unknown_starttag}{tag, attributes} +This method is called to process an unknown start tag. It is intended +to be overridden by a derived class; the base class implementation +does nothing. +\end{methoddesc} + +\begin{methoddesc}{unknown_endtag}{tag} +This method is called to process an unknown end tag. It is intended +to be overridden by a derived class; the base class implementation +does nothing. +\end{methoddesc} + +\begin{methoddesc}{unknown_charref}{ref} +This method is called to process unresolvable numeric character +references. It is intended to be overridden by a derived class; the +base class implementation does nothing. +\end{methoddesc} + +\begin{methoddesc}{unknown_entityref}{ref} +This method is called to process an unknown entity reference. It is +intended to be overridden by a derived class; the base class +implementation calls \method{syntax_error()} to signal an error. +\end{methoddesc} + + +\begin{seealso} + \seetitle[http://www.w3.org/TR/REC-xml]{Extensible Markup Language + (XML) 1.0}{The XML specification, published by the World + Wide Web Consortium (W3C), defines the syntax and + processor requirements for XML. References to additional + material on XML, including translations of the + specification, are available at + \url{http://www.w3.org/XML/}.} + + \seetitle[http://www.python.org/topics/xml/]{Python and XML + Processing}{The Python XML Topic Guide provides a great + deal of information on using XML from Python and links to + other sources of information on XML.} + + \seetitle[http://www.python.org/sigs/xml-sig/]{SIG for XML + Processing in Python}{The Python XML Special Interest + Group is developing substantial support for processing XML + from Python.} +\end{seealso} + + +\subsection{XML Namespaces \label{xml-namespace}} + +This module has support for XML namespaces as defined in the XML +Namespaces proposed recommendation. +\indexii{XML}{namespaces} + +Tag and attribute names that are defined in an XML namespace are +handled as if the name of the tag or element consisted of the +namespace (the URL that defines the namespace) followed by a +space and the name of the tag or attribute. For instance, the tag +\code{<html xmlns='http://www.w3.org/TR/REC-html40'>} is treated as if +the tag name was \code{'http://www.w3.org/TR/REC-html40 html'}, and +the tag \code{<html:a href='http://frob.com'>} inside the above +mentioned element is treated as if the tag name were +\code{'http://www.w3.org/TR/REC-html40 a'} and the attribute name as +if it were \code{'http://www.w3.org/TR/REC-html40 href'}. + +An older draft of the XML Namespaces proposal is also recognized, but +triggers a warning. + +\begin{seealso} + \seetitle[http://www.w3.org/TR/REC-xml-names/]{Namespaces in XML}{ + This World Wide Web Consortium recommendation describes the + proper syntax and processing requirements for namespaces in + XML.} +\end{seealso} |