summaryrefslogtreecommitdiff
path: root/sys/src/cmd/python/Doc/lib/librfc822.tex
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@localhost>2011-05-03 11:25:13 +0000
committercinap_lenrek <cinap_lenrek@localhost>2011-05-03 11:25:13 +0000
commit458120dd40db6b4df55a4e96b650e16798ef06a0 (patch)
tree8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/lib/librfc822.tex
parent3a742c699f6806c1145aea5149bf15de15a0afd7 (diff)
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/lib/librfc822.tex')
-rw-r--r--sys/src/cmd/python/Doc/lib/librfc822.tex336
1 files changed, 336 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/lib/librfc822.tex b/sys/src/cmd/python/Doc/lib/librfc822.tex
new file mode 100644
index 000000000..4ca3734db
--- /dev/null
+++ b/sys/src/cmd/python/Doc/lib/librfc822.tex
@@ -0,0 +1,336 @@
+\section{\module{rfc822} ---
+ Parse RFC 2822 mail headers}
+
+\declaremodule{standard}{rfc822}
+\modulesynopsis{Parse \rfc{2822} style mail messages.}
+
+\deprecated{2.3}{The \refmodule{email} package should be used in
+ preference to the \module{rfc822} module. This
+ module is present only to maintain backward
+ compatibility.}
+
+This module defines a class, \class{Message}, which represents an
+``email message'' as defined by the Internet standard
+\rfc{2822}.\footnote{This module originally conformed to \rfc{822},
+hence the name. Since then, \rfc{2822} has been released as an
+update to \rfc{822}. This module should be considered
+\rfc{2822}-conformant, especially in cases where the
+syntax or semantics have changed since \rfc{822}.} Such messages
+consist of a collection of message headers, and a message body. This
+module also defines a helper class
+\class{AddressList} for parsing \rfc{2822} addresses. Please refer to
+the RFC for information on the specific syntax of \rfc{2822} messages.
+
+The \refmodule{mailbox}\refstmodindex{mailbox} module provides classes
+to read mailboxes produced by various end-user mail programs.
+
+\begin{classdesc}{Message}{file\optional{, seekable}}
+A \class{Message} instance is instantiated with an input object as
+parameter. Message relies only on the input object having a
+\method{readline()} method; in particular, ordinary file objects
+qualify. Instantiation reads headers from the input object up to a
+delimiter line (normally a blank line) and stores them in the
+instance. The message body, following the headers, is not consumed.
+
+This class can work with any input object that supports a
+\method{readline()} method. If the input object has seek and tell
+capability, the \method{rewindbody()} method will work; also, illegal
+lines will be pushed back onto the input stream. If the input object
+lacks seek but has an \method{unread()} method that can push back a
+line of input, \class{Message} will use that to push back illegal
+lines. Thus this class can be used to parse messages coming from a
+buffered stream.
+
+The optional \var{seekable} argument is provided as a workaround for
+certain stdio libraries in which \cfunction{tell()} discards buffered
+data before discovering that the \cfunction{lseek()} system call
+doesn't work. For maximum portability, you should set the seekable
+argument to zero to prevent that initial \method{tell()} when passing
+in an unseekable object such as a file object created from a socket
+object.
+
+Input lines as read from the file may either be terminated by CR-LF or
+by a single linefeed; a terminating CR-LF is replaced by a single
+linefeed before the line is stored.
+
+All header matching is done independent of upper or lower case;
+e.g.\ \code{\var{m}['From']}, \code{\var{m}['from']} and
+\code{\var{m}['FROM']} all yield the same result.
+\end{classdesc}
+
+\begin{classdesc}{AddressList}{field}
+You may instantiate the \class{AddressList} helper class using a single
+string parameter, a comma-separated list of \rfc{2822} addresses to be
+parsed. (The parameter \code{None} yields an empty list.)
+\end{classdesc}
+
+\begin{funcdesc}{quote}{str}
+Return a new string with backslashes in \var{str} replaced by two
+backslashes and double quotes replaced by backslash-double quote.
+\end{funcdesc}
+
+\begin{funcdesc}{unquote}{str}
+Return a new string which is an \emph{unquoted} version of \var{str}.
+If \var{str} ends and begins with double quotes, they are stripped
+off. Likewise if \var{str} ends and begins with angle brackets, they
+are stripped off.
+\end{funcdesc}
+
+\begin{funcdesc}{parseaddr}{address}
+Parse \var{address}, which should be the value of some
+address-containing field such as \mailheader{To} or \mailheader{Cc},
+into its constituent ``realname'' and ``email address'' parts.
+Returns a tuple of that information, unless the parse fails, in which
+case a 2-tuple \code{(None, None)} is returned.
+\end{funcdesc}
+
+\begin{funcdesc}{dump_address_pair}{pair}
+The inverse of \method{parseaddr()}, this takes a 2-tuple of the form
+\code{(\var{realname}, \var{email_address})} and returns the string
+value suitable for a \mailheader{To} or \mailheader{Cc} header. If
+the first element of \var{pair} is false, then the second element is
+returned unmodified.
+\end{funcdesc}
+
+\begin{funcdesc}{parsedate}{date}
+Attempts to parse a date according to the rules in \rfc{2822}.
+however, some mailers don't follow that format as specified, so
+\function{parsedate()} tries to guess correctly in such cases.
+\var{date} is a string containing an \rfc{2822} date, such as
+\code{'Mon, 20 Nov 1995 19:12:08 -0500'}. If it succeeds in parsing
+the date, \function{parsedate()} returns a 9-tuple that can be passed
+directly to \function{time.mktime()}; otherwise \code{None} will be
+returned. Note that fields 6, 7, and 8 of the result tuple are not
+usable.
+\end{funcdesc}
+
+\begin{funcdesc}{parsedate_tz}{date}
+Performs the same function as \function{parsedate()}, but returns
+either \code{None} or a 10-tuple; the first 9 elements make up a tuple
+that can be passed directly to \function{time.mktime()}, and the tenth
+is the offset of the date's timezone from UTC (which is the official
+term for Greenwich Mean Time). (Note that the sign of the timezone
+offset is the opposite of the sign of the \code{time.timezone}
+variable for the same timezone; the latter variable follows the
+\POSIX{} standard while this module follows \rfc{2822}.) If the input
+string has no timezone, the last element of the tuple returned is
+\code{None}. Note that fields 6, 7, and 8 of the result tuple are not
+usable.
+\end{funcdesc}
+
+\begin{funcdesc}{mktime_tz}{tuple}
+Turn a 10-tuple as returned by \function{parsedate_tz()} into a UTC
+timestamp. If the timezone item in the tuple is \code{None}, assume
+local time. Minor deficiency: this first interprets the first 8
+elements as a local time and then compensates for the timezone
+difference; this may yield a slight error around daylight savings time
+switch dates. Not enough to worry about for common use.
+\end{funcdesc}
+
+
+\begin{seealso}
+ \seemodule{email}{Comprehensive email handling package; supersedes
+ the \module{rfc822} module.}
+ \seemodule{mailbox}{Classes to read various mailbox formats produced
+ by end-user mail programs.}
+ \seemodule{mimetools}{Subclass of \class{rfc822.Message} that
+ handles MIME encoded messages.}
+\end{seealso}
+
+
+\subsection{Message Objects \label{message-objects}}
+
+A \class{Message} instance has the following methods:
+
+\begin{methoddesc}{rewindbody}{}
+Seek to the start of the message body. This only works if the file
+object is seekable.
+\end{methoddesc}
+
+\begin{methoddesc}{isheader}{line}
+Returns a line's canonicalized fieldname (the dictionary key that will
+be used to index it) if the line is a legal \rfc{2822} header; otherwise
+returns \code{None} (implying that parsing should stop here and the
+line be pushed back on the input stream). It is sometimes useful to
+override this method in a subclass.
+\end{methoddesc}
+
+\begin{methoddesc}{islast}{line}
+Return true if the given line is a delimiter on which Message should
+stop. The delimiter line is consumed, and the file object's read
+location positioned immediately after it. By default this method just
+checks that the line is blank, but you can override it in a subclass.
+\end{methoddesc}
+
+\begin{methoddesc}{iscomment}{line}
+Return \code{True} if the given line should be ignored entirely, just skipped.
+By default this is a stub that always returns \code{False}, but you can
+override it in a subclass.
+\end{methoddesc}
+
+\begin{methoddesc}{getallmatchingheaders}{name}
+Return a list of lines consisting of all headers matching
+\var{name}, if any. Each physical line, whether it is a continuation
+line or not, is a separate list item. Return the empty list if no
+header matches \var{name}.
+\end{methoddesc}
+
+\begin{methoddesc}{getfirstmatchingheader}{name}
+Return a list of lines comprising the first header matching
+\var{name}, and its continuation line(s), if any. Return
+\code{None} if there is no header matching \var{name}.
+\end{methoddesc}
+
+\begin{methoddesc}{getrawheader}{name}
+Return a single string consisting of the text after the colon in the
+first header matching \var{name}. This includes leading whitespace,
+the trailing linefeed, and internal linefeeds and whitespace if there
+any continuation line(s) were present. Return \code{None} if there is
+no header matching \var{name}.
+\end{methoddesc}
+
+\begin{methoddesc}{getheader}{name\optional{, default}}
+Like \code{getrawheader(\var{name})}, but strip leading and trailing
+whitespace. Internal whitespace is not stripped. The optional
+\var{default} argument can be used to specify a different default to
+be returned when there is no header matching \var{name}.
+\end{methoddesc}
+
+\begin{methoddesc}{get}{name\optional{, default}}
+An alias for \method{getheader()}, to make the interface more compatible
+with regular dictionaries.
+\end{methoddesc}
+
+\begin{methoddesc}{getaddr}{name}
+Return a pair \code{(\var{full name}, \var{email address})} parsed
+from the string returned by \code{getheader(\var{name})}. If no
+header matching \var{name} exists, return \code{(None, None)};
+otherwise both the full name and the address are (possibly empty)
+strings.
+
+Example: If \var{m}'s first \mailheader{From} header contains the
+string \code{'jack@cwi.nl (Jack Jansen)'}, then
+\code{m.getaddr('From')} will yield the pair
+\code{('Jack Jansen', 'jack@cwi.nl')}.
+If the header contained
+\code{'Jack Jansen <jack@cwi.nl>'} instead, it would yield the
+exact same result.
+\end{methoddesc}
+
+\begin{methoddesc}{getaddrlist}{name}
+This is similar to \code{getaddr(\var{list})}, but parses a header
+containing a list of email addresses (e.g.\ a \mailheader{To} header) and
+returns a list of \code{(\var{full name}, \var{email address})} pairs
+(even if there was only one address in the header). If there is no
+header matching \var{name}, return an empty list.
+
+If multiple headers exist that match the named header (e.g. if there
+are several \mailheader{Cc} headers), all are parsed for addresses.
+Any continuation lines the named headers contain are also parsed.
+\end{methoddesc}
+
+\begin{methoddesc}{getdate}{name}
+Retrieve a header using \method{getheader()} and parse it into a 9-tuple
+compatible with \function{time.mktime()}; note that fields 6, 7, and 8
+are not usable. If there is no header matching
+\var{name}, or it is unparsable, return \code{None}.
+
+Date parsing appears to be a black art, and not all mailers adhere to
+the standard. While it has been tested and found correct on a large
+collection of email from many sources, it is still possible that this
+function may occasionally yield an incorrect result.
+\end{methoddesc}
+
+\begin{methoddesc}{getdate_tz}{name}
+Retrieve a header using \method{getheader()} and parse it into a
+10-tuple; the first 9 elements will make a tuple compatible with
+\function{time.mktime()}, and the 10th is a number giving the offset
+of the date's timezone from UTC. Note that fields 6, 7, and 8
+are not usable. Similarly to \method{getdate()}, if
+there is no header matching \var{name}, or it is unparsable, return
+\code{None}.
+\end{methoddesc}
+
+\class{Message} instances also support a limited mapping interface.
+In particular: \code{\var{m}[name]} is like
+\code{\var{m}.getheader(name)} but raises \exception{KeyError} if
+there is no matching header; and \code{len(\var{m})},
+\code{\var{m}.get(\var{name}\optional{, \var{default}})},
+\code{\var{m}.has_key(\var{name})}, \code{\var{m}.keys()},
+\code{\var{m}.values()} \code{\var{m}.items()}, and
+\code{\var{m}.setdefault(\var{name}\optional{, \var{default}})} act as
+expected, with the one difference that \method{setdefault()} uses
+an empty string as the default value. \class{Message} instances
+also support the mapping writable interface \code{\var{m}[name] =
+value} and \code{del \var{m}[name]}. \class{Message} objects do not
+support the \method{clear()}, \method{copy()}, \method{popitem()}, or
+\method{update()} methods of the mapping interface. (Support for
+\method{get()} and \method{setdefault()} was only added in Python
+2.2.)
+
+Finally, \class{Message} instances have some public instance variables:
+
+\begin{memberdesc}{headers}
+A list containing the entire set of header lines, in the order in
+which they were read (except that setitem calls may disturb this
+order). Each line contains a trailing newline. The
+blank line terminating the headers is not contained in the list.
+\end{memberdesc}
+
+\begin{memberdesc}{fp}
+The file or file-like object passed at instantiation time. This can
+be used to read the message content.
+\end{memberdesc}
+
+\begin{memberdesc}{unixfrom}
+The \UNIX{} \samp{From~} line, if the message had one, or an empty
+string. This is needed to regenerate the message in some contexts,
+such as an \code{mbox}-style mailbox file.
+\end{memberdesc}
+
+
+\subsection{AddressList Objects \label{addresslist-objects}}
+
+An \class{AddressList} instance has the following methods:
+
+\begin{methoddesc}{__len__}{}
+Return the number of addresses in the address list.
+\end{methoddesc}
+
+\begin{methoddesc}{__str__}{}
+Return a canonicalized string representation of the address list.
+Addresses are rendered in "name" <host@domain> form, comma-separated.
+\end{methoddesc}
+
+\begin{methoddesc}{__add__}{alist}
+Return a new \class{AddressList} instance that contains all addresses
+in both \class{AddressList} operands, with duplicates removed (set
+union).
+\end{methoddesc}
+
+\begin{methoddesc}{__iadd__}{alist}
+In-place version of \method{__add__()}; turns this \class{AddressList}
+instance into the union of itself and the right-hand instance,
+\var{alist}.
+\end{methoddesc}
+
+\begin{methoddesc}{__sub__}{alist}
+Return a new \class{AddressList} instance that contains every address
+in the left-hand \class{AddressList} operand that is not present in
+the right-hand address operand (set difference).
+\end{methoddesc}
+
+\begin{methoddesc}{__isub__}{alist}
+In-place version of \method{__sub__()}, removing addresses in this
+list which are also in \var{alist}.
+\end{methoddesc}
+
+
+Finally, \class{AddressList} instances have one public instance variable:
+
+\begin{memberdesc}{addresslist}
+A list of tuple string pairs, one per address. In each member, the
+first is the canonicalized name part, the second is the
+actual route-address (\character{@}-separated username-host.domain
+pair).
+\end{memberdesc}