diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/lib/librfc822.tex | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/lib/librfc822.tex')
-rw-r--r-- | sys/src/cmd/python/Doc/lib/librfc822.tex | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/lib/librfc822.tex b/sys/src/cmd/python/Doc/lib/librfc822.tex new file mode 100644 index 000000000..4ca3734db --- /dev/null +++ b/sys/src/cmd/python/Doc/lib/librfc822.tex @@ -0,0 +1,336 @@ +\section{\module{rfc822} --- + Parse RFC 2822 mail headers} + +\declaremodule{standard}{rfc822} +\modulesynopsis{Parse \rfc{2822} style mail messages.} + +\deprecated{2.3}{The \refmodule{email} package should be used in + preference to the \module{rfc822} module. This + module is present only to maintain backward + compatibility.} + +This module defines a class, \class{Message}, which represents an +``email message'' as defined by the Internet standard +\rfc{2822}.\footnote{This module originally conformed to \rfc{822}, +hence the name. Since then, \rfc{2822} has been released as an +update to \rfc{822}. This module should be considered +\rfc{2822}-conformant, especially in cases where the +syntax or semantics have changed since \rfc{822}.} Such messages +consist of a collection of message headers, and a message body. This +module also defines a helper class +\class{AddressList} for parsing \rfc{2822} addresses. Please refer to +the RFC for information on the specific syntax of \rfc{2822} messages. + +The \refmodule{mailbox}\refstmodindex{mailbox} module provides classes +to read mailboxes produced by various end-user mail programs. + +\begin{classdesc}{Message}{file\optional{, seekable}} +A \class{Message} instance is instantiated with an input object as +parameter. Message relies only on the input object having a +\method{readline()} method; in particular, ordinary file objects +qualify. Instantiation reads headers from the input object up to a +delimiter line (normally a blank line) and stores them in the +instance. The message body, following the headers, is not consumed. + +This class can work with any input object that supports a +\method{readline()} method. If the input object has seek and tell +capability, the \method{rewindbody()} method will work; also, illegal +lines will be pushed back onto the input stream. If the input object +lacks seek but has an \method{unread()} method that can push back a +line of input, \class{Message} will use that to push back illegal +lines. Thus this class can be used to parse messages coming from a +buffered stream. + +The optional \var{seekable} argument is provided as a workaround for +certain stdio libraries in which \cfunction{tell()} discards buffered +data before discovering that the \cfunction{lseek()} system call +doesn't work. For maximum portability, you should set the seekable +argument to zero to prevent that initial \method{tell()} when passing +in an unseekable object such as a file object created from a socket +object. + +Input lines as read from the file may either be terminated by CR-LF or +by a single linefeed; a terminating CR-LF is replaced by a single +linefeed before the line is stored. + +All header matching is done independent of upper or lower case; +e.g.\ \code{\var{m}['From']}, \code{\var{m}['from']} and +\code{\var{m}['FROM']} all yield the same result. +\end{classdesc} + +\begin{classdesc}{AddressList}{field} +You may instantiate the \class{AddressList} helper class using a single +string parameter, a comma-separated list of \rfc{2822} addresses to be +parsed. (The parameter \code{None} yields an empty list.) +\end{classdesc} + +\begin{funcdesc}{quote}{str} +Return a new string with backslashes in \var{str} replaced by two +backslashes and double quotes replaced by backslash-double quote. +\end{funcdesc} + +\begin{funcdesc}{unquote}{str} +Return a new string which is an \emph{unquoted} version of \var{str}. +If \var{str} ends and begins with double quotes, they are stripped +off. Likewise if \var{str} ends and begins with angle brackets, they +are stripped off. +\end{funcdesc} + +\begin{funcdesc}{parseaddr}{address} +Parse \var{address}, which should be the value of some +address-containing field such as \mailheader{To} or \mailheader{Cc}, +into its constituent ``realname'' and ``email address'' parts. +Returns a tuple of that information, unless the parse fails, in which +case a 2-tuple \code{(None, None)} is returned. +\end{funcdesc} + +\begin{funcdesc}{dump_address_pair}{pair} +The inverse of \method{parseaddr()}, this takes a 2-tuple of the form +\code{(\var{realname}, \var{email_address})} and returns the string +value suitable for a \mailheader{To} or \mailheader{Cc} header. If +the first element of \var{pair} is false, then the second element is +returned unmodified. +\end{funcdesc} + +\begin{funcdesc}{parsedate}{date} +Attempts to parse a date according to the rules in \rfc{2822}. +however, some mailers don't follow that format as specified, so +\function{parsedate()} tries to guess correctly in such cases. +\var{date} is a string containing an \rfc{2822} date, such as +\code{'Mon, 20 Nov 1995 19:12:08 -0500'}. If it succeeds in parsing +the date, \function{parsedate()} returns a 9-tuple that can be passed +directly to \function{time.mktime()}; otherwise \code{None} will be +returned. Note that fields 6, 7, and 8 of the result tuple are not +usable. +\end{funcdesc} + +\begin{funcdesc}{parsedate_tz}{date} +Performs the same function as \function{parsedate()}, but returns +either \code{None} or a 10-tuple; the first 9 elements make up a tuple +that can be passed directly to \function{time.mktime()}, and the tenth +is the offset of the date's timezone from UTC (which is the official +term for Greenwich Mean Time). (Note that the sign of the timezone +offset is the opposite of the sign of the \code{time.timezone} +variable for the same timezone; the latter variable follows the +\POSIX{} standard while this module follows \rfc{2822}.) If the input +string has no timezone, the last element of the tuple returned is +\code{None}. Note that fields 6, 7, and 8 of the result tuple are not +usable. +\end{funcdesc} + +\begin{funcdesc}{mktime_tz}{tuple} +Turn a 10-tuple as returned by \function{parsedate_tz()} into a UTC +timestamp. If the timezone item in the tuple is \code{None}, assume +local time. Minor deficiency: this first interprets the first 8 +elements as a local time and then compensates for the timezone +difference; this may yield a slight error around daylight savings time +switch dates. Not enough to worry about for common use. +\end{funcdesc} + + +\begin{seealso} + \seemodule{email}{Comprehensive email handling package; supersedes + the \module{rfc822} module.} + \seemodule{mailbox}{Classes to read various mailbox formats produced + by end-user mail programs.} + \seemodule{mimetools}{Subclass of \class{rfc822.Message} that + handles MIME encoded messages.} +\end{seealso} + + +\subsection{Message Objects \label{message-objects}} + +A \class{Message} instance has the following methods: + +\begin{methoddesc}{rewindbody}{} +Seek to the start of the message body. This only works if the file +object is seekable. +\end{methoddesc} + +\begin{methoddesc}{isheader}{line} +Returns a line's canonicalized fieldname (the dictionary key that will +be used to index it) if the line is a legal \rfc{2822} header; otherwise +returns \code{None} (implying that parsing should stop here and the +line be pushed back on the input stream). It is sometimes useful to +override this method in a subclass. +\end{methoddesc} + +\begin{methoddesc}{islast}{line} +Return true if the given line is a delimiter on which Message should +stop. The delimiter line is consumed, and the file object's read +location positioned immediately after it. By default this method just +checks that the line is blank, but you can override it in a subclass. +\end{methoddesc} + +\begin{methoddesc}{iscomment}{line} +Return \code{True} if the given line should be ignored entirely, just skipped. +By default this is a stub that always returns \code{False}, but you can +override it in a subclass. +\end{methoddesc} + +\begin{methoddesc}{getallmatchingheaders}{name} +Return a list of lines consisting of all headers matching +\var{name}, if any. Each physical line, whether it is a continuation +line or not, is a separate list item. Return the empty list if no +header matches \var{name}. +\end{methoddesc} + +\begin{methoddesc}{getfirstmatchingheader}{name} +Return a list of lines comprising the first header matching +\var{name}, and its continuation line(s), if any. Return +\code{None} if there is no header matching \var{name}. +\end{methoddesc} + +\begin{methoddesc}{getrawheader}{name} +Return a single string consisting of the text after the colon in the +first header matching \var{name}. This includes leading whitespace, +the trailing linefeed, and internal linefeeds and whitespace if there +any continuation line(s) were present. Return \code{None} if there is +no header matching \var{name}. +\end{methoddesc} + +\begin{methoddesc}{getheader}{name\optional{, default}} +Like \code{getrawheader(\var{name})}, but strip leading and trailing +whitespace. Internal whitespace is not stripped. The optional +\var{default} argument can be used to specify a different default to +be returned when there is no header matching \var{name}. +\end{methoddesc} + +\begin{methoddesc}{get}{name\optional{, default}} +An alias for \method{getheader()}, to make the interface more compatible +with regular dictionaries. +\end{methoddesc} + +\begin{methoddesc}{getaddr}{name} +Return a pair \code{(\var{full name}, \var{email address})} parsed +from the string returned by \code{getheader(\var{name})}. If no +header matching \var{name} exists, return \code{(None, None)}; +otherwise both the full name and the address are (possibly empty) +strings. + +Example: If \var{m}'s first \mailheader{From} header contains the +string \code{'jack@cwi.nl (Jack Jansen)'}, then +\code{m.getaddr('From')} will yield the pair +\code{('Jack Jansen', 'jack@cwi.nl')}. +If the header contained +\code{'Jack Jansen <jack@cwi.nl>'} instead, it would yield the +exact same result. +\end{methoddesc} + +\begin{methoddesc}{getaddrlist}{name} +This is similar to \code{getaddr(\var{list})}, but parses a header +containing a list of email addresses (e.g.\ a \mailheader{To} header) and +returns a list of \code{(\var{full name}, \var{email address})} pairs +(even if there was only one address in the header). If there is no +header matching \var{name}, return an empty list. + +If multiple headers exist that match the named header (e.g. if there +are several \mailheader{Cc} headers), all are parsed for addresses. +Any continuation lines the named headers contain are also parsed. +\end{methoddesc} + +\begin{methoddesc}{getdate}{name} +Retrieve a header using \method{getheader()} and parse it into a 9-tuple +compatible with \function{time.mktime()}; note that fields 6, 7, and 8 +are not usable. If there is no header matching +\var{name}, or it is unparsable, return \code{None}. + +Date parsing appears to be a black art, and not all mailers adhere to +the standard. While it has been tested and found correct on a large +collection of email from many sources, it is still possible that this +function may occasionally yield an incorrect result. +\end{methoddesc} + +\begin{methoddesc}{getdate_tz}{name} +Retrieve a header using \method{getheader()} and parse it into a +10-tuple; the first 9 elements will make a tuple compatible with +\function{time.mktime()}, and the 10th is a number giving the offset +of the date's timezone from UTC. Note that fields 6, 7, and 8 +are not usable. Similarly to \method{getdate()}, if +there is no header matching \var{name}, or it is unparsable, return +\code{None}. +\end{methoddesc} + +\class{Message} instances also support a limited mapping interface. +In particular: \code{\var{m}[name]} is like +\code{\var{m}.getheader(name)} but raises \exception{KeyError} if +there is no matching header; and \code{len(\var{m})}, +\code{\var{m}.get(\var{name}\optional{, \var{default}})}, +\code{\var{m}.has_key(\var{name})}, \code{\var{m}.keys()}, +\code{\var{m}.values()} \code{\var{m}.items()}, and +\code{\var{m}.setdefault(\var{name}\optional{, \var{default}})} act as +expected, with the one difference that \method{setdefault()} uses +an empty string as the default value. \class{Message} instances +also support the mapping writable interface \code{\var{m}[name] = +value} and \code{del \var{m}[name]}. \class{Message} objects do not +support the \method{clear()}, \method{copy()}, \method{popitem()}, or +\method{update()} methods of the mapping interface. (Support for +\method{get()} and \method{setdefault()} was only added in Python +2.2.) + +Finally, \class{Message} instances have some public instance variables: + +\begin{memberdesc}{headers} +A list containing the entire set of header lines, in the order in +which they were read (except that setitem calls may disturb this +order). Each line contains a trailing newline. The +blank line terminating the headers is not contained in the list. +\end{memberdesc} + +\begin{memberdesc}{fp} +The file or file-like object passed at instantiation time. This can +be used to read the message content. +\end{memberdesc} + +\begin{memberdesc}{unixfrom} +The \UNIX{} \samp{From~} line, if the message had one, or an empty +string. This is needed to regenerate the message in some contexts, +such as an \code{mbox}-style mailbox file. +\end{memberdesc} + + +\subsection{AddressList Objects \label{addresslist-objects}} + +An \class{AddressList} instance has the following methods: + +\begin{methoddesc}{__len__}{} +Return the number of addresses in the address list. +\end{methoddesc} + +\begin{methoddesc}{__str__}{} +Return a canonicalized string representation of the address list. +Addresses are rendered in "name" <host@domain> form, comma-separated. +\end{methoddesc} + +\begin{methoddesc}{__add__}{alist} +Return a new \class{AddressList} instance that contains all addresses +in both \class{AddressList} operands, with duplicates removed (set +union). +\end{methoddesc} + +\begin{methoddesc}{__iadd__}{alist} +In-place version of \method{__add__()}; turns this \class{AddressList} +instance into the union of itself and the right-hand instance, +\var{alist}. +\end{methoddesc} + +\begin{methoddesc}{__sub__}{alist} +Return a new \class{AddressList} instance that contains every address +in the left-hand \class{AddressList} operand that is not present in +the right-hand address operand (set difference). +\end{methoddesc} + +\begin{methoddesc}{__isub__}{alist} +In-place version of \method{__sub__()}, removing addresses in this +list which are also in \var{alist}. +\end{methoddesc} + + +Finally, \class{AddressList} instances have one public instance variable: + +\begin{memberdesc}{addresslist} +A list of tuple string pairs, one per address. In each member, the +first is the canonicalized name part, the second is the +actual route-address (\character{@}-separated username-host.domain +pair). +\end{memberdesc} |