diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/lib/libmultifile.tex | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/lib/libmultifile.tex')
-rw-r--r-- | sys/src/cmd/python/Doc/lib/libmultifile.tex | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/lib/libmultifile.tex b/sys/src/cmd/python/Doc/lib/libmultifile.tex new file mode 100644 index 000000000..434832769 --- /dev/null +++ b/sys/src/cmd/python/Doc/lib/libmultifile.tex @@ -0,0 +1,175 @@ +\section{\module{multifile} --- + Support for files containing distinct parts} + +\declaremodule{standard}{multifile} +\modulesynopsis{Support for reading files which contain distinct + parts, such as some MIME data.} +\sectionauthor{Eric S. Raymond}{esr@snark.thyrsus.com} + +\deprecated{2.5}{The \refmodule{email} package should be used in + preference to the \module{multifile} module. + This module is present only to maintain backward + compatibility.} + +The \class{MultiFile} object enables you to treat sections of a text +file as file-like input objects, with \code{''} being returned by +\method{readline()} when a given delimiter pattern is encountered. The +defaults of this class are designed to make it useful for parsing +MIME multipart messages, but by subclassing it and overriding methods +it can be easily adapted for more general use. + +\begin{classdesc}{MultiFile}{fp\optional{, seekable}} +Create a multi-file. You must instantiate this class with an input +object argument for the \class{MultiFile} instance to get lines from, +such as a file object returned by \function{open()}. + +\class{MultiFile} only ever looks at the input object's +\method{readline()}, \method{seek()} and \method{tell()} methods, and +the latter two are only needed if you want random access to the +individual MIME parts. To use \class{MultiFile} on a non-seekable +stream object, set the optional \var{seekable} argument to false; this +will prevent using the input object's \method{seek()} and +\method{tell()} methods. +\end{classdesc} + +It will be useful to know that in \class{MultiFile}'s view of the world, text +is composed of three kinds of lines: data, section-dividers, and +end-markers. MultiFile is designed to support parsing of +messages that may have multiple nested message parts, each with its +own pattern for section-divider and end-marker lines. + +\begin{seealso} + \seemodule{email}{Comprehensive email handling package; supersedes + the \module{multifile} module.} +\end{seealso} + + +\subsection{MultiFile Objects \label{MultiFile-objects}} + +A \class{MultiFile} instance has the following methods: + +\begin{methoddesc}{readline}{str} +Read a line. If the line is data (not a section-divider or end-marker +or real EOF) return it. If the line matches the most-recently-stacked +boundary, return \code{''} and set \code{self.last} to 1 or 0 according as +the match is or is not an end-marker. If the line matches any other +stacked boundary, raise an error. On encountering end-of-file on the +underlying stream object, the method raises \exception{Error} unless +all boundaries have been popped. +\end{methoddesc} + +\begin{methoddesc}{readlines}{str} +Return all lines remaining in this part as a list of strings. +\end{methoddesc} + +\begin{methoddesc}{read}{} +Read all lines, up to the next section. Return them as a single +(multiline) string. Note that this doesn't take a size argument! +\end{methoddesc} + +\begin{methoddesc}{seek}{pos\optional{, whence}} +Seek. Seek indices are relative to the start of the current section. +The \var{pos} and \var{whence} arguments are interpreted as for a file +seek. +\end{methoddesc} + +\begin{methoddesc}{tell}{} +Return the file position relative to the start of the current section. +\end{methoddesc} + +\begin{methoddesc}{next}{} +Skip lines to the next section (that is, read lines until a +section-divider or end-marker has been consumed). Return true if +there is such a section, false if an end-marker is seen. Re-enable +the most-recently-pushed boundary. +\end{methoddesc} + +\begin{methoddesc}{is_data}{str} +Return true if \var{str} is data and false if it might be a section +boundary. As written, it tests for a prefix other than \code{'-}\code{-'} at +start of line (which all MIME boundaries have) but it is declared so +it can be overridden in derived classes. + +Note that this test is used intended as a fast guard for the real +boundary tests; if it always returns false it will merely slow +processing, not cause it to fail. +\end{methoddesc} + +\begin{methoddesc}{push}{str} +Push a boundary string. When a decorated version of this boundary +is found as an input line, it will be interpreted as a section-divider +or end-marker (depending on the decoration, see \rfc{2045}). All subsequent +reads will return the empty string to indicate end-of-file, until a +call to \method{pop()} removes the boundary a or \method{next()} call +reenables it. + +It is possible to push more than one boundary. Encountering the +most-recently-pushed boundary will return EOF; encountering any other +boundary will raise an error. +\end{methoddesc} + +\begin{methoddesc}{pop}{} +Pop a section boundary. This boundary will no longer be interpreted +as EOF. +\end{methoddesc} + +\begin{methoddesc}{section_divider}{str} +Turn a boundary into a section-divider line. By default, this +method prepends \code{'-}\code{-'} (which MIME section boundaries have) but +it is declared so it can be overridden in derived classes. This +method need not append LF or CR-LF, as comparison with the result +ignores trailing whitespace. +\end{methoddesc} + +\begin{methoddesc}{end_marker}{str} +Turn a boundary string into an end-marker line. By default, this +method prepends \code{'-}\code{-'} and appends \code{'-}\code{-'} (like a +MIME-multipart end-of-message marker) but it is declared so it can be +overridden in derived classes. This method need not append LF or +CR-LF, as comparison with the result ignores trailing whitespace. +\end{methoddesc} + +Finally, \class{MultiFile} instances have two public instance variables: + +\begin{memberdesc}{level} +Nesting depth of the current part. +\end{memberdesc} + +\begin{memberdesc}{last} +True if the last end-of-file was for an end-of-message marker. +\end{memberdesc} + + +\subsection{\class{MultiFile} Example \label{multifile-example}} +\sectionauthor{Skip Montanaro}{skip@mojam.com} + +\begin{verbatim} +import mimetools +import multifile +import StringIO + +def extract_mime_part_matching(stream, mimetype): + """Return the first element in a multipart MIME message on stream + matching mimetype.""" + + msg = mimetools.Message(stream) + msgtype = msg.gettype() + params = msg.getplist() + + data = StringIO.StringIO() + if msgtype[:10] == "multipart/": + + file = multifile.MultiFile(stream) + file.push(msg.getparam("boundary")) + while file.next(): + submsg = mimetools.Message(file) + try: + data = StringIO.StringIO() + mimetools.decode(file, data, submsg.getencoding()) + except ValueError: + continue + if submsg.gettype() == mimetype: + break + file.pop() + return data.getvalue() +\end{verbatim} |