add hg and python

author: cinap_lenrek <cinap_lenrek@localhost> 2011-05-03 11:25:13 +0000
committer: cinap_lenrek <cinap_lenrek@localhost> 2011-05-03 11:25:13 +0000
commit: 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch)
tree: 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/lib/libcsv.tex
parent: 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff)
1 files changed, 538 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/lib/libcsv.tex b/sys/src/cmd/python/Doc/lib/libcsv.tex
new file mode 100644
index 000000000..e965e316f
--- /dev/null
+++ b/sys/src/cmd/python/Doc/lib/libcsv.tex
@@ -0,0 +1,538 @@
+\section{\module{csv} --- CSV File Reading and Writing}
+
+\declaremodule{standard}{csv}
+\modulesynopsis{Write and read tabular data to and from delimited files.}
+\sectionauthor{Skip Montanaro}{skip@pobox.com}
+
+\versionadded{2.3}
+\index{csv}
+\indexii{data}{tabular}
+
+The so-called CSV (Comma Separated Values) format is the most common import
+and export format for spreadsheets and databases.  There is no ``CSV
+standard'', so the format is operationally defined by the many applications
+which read and write it.  The lack of a standard means that subtle
+differences often exist in the data produced and consumed by different
+applications.  These differences can make it annoying to process CSV files
+from multiple sources.  Still, while the delimiters and quoting characters
+vary, the overall format is similar enough that it is possible to write a
+single module which can efficiently manipulate such data, hiding the details
+of reading and writing the data from the programmer.
+
+The \module{csv} module implements classes to read and write tabular data in
+CSV format.  It allows programmers to say, ``write this data in the format
+preferred by Excel,'' or ``read data from this file which was generated by
+Excel,'' without knowing the precise details of the CSV format used by
+Excel.  Programmers can also describe the CSV formats understood by other
+applications or define their own special-purpose CSV formats.
+
+The \module{csv} module's \class{reader} and \class{writer} objects read and
+write sequences.  Programmers can also read and write data in dictionary
+form using the \class{DictReader} and \class{DictWriter} classes.
+
+\begin{notice}
+  This version of the \module{csv} module doesn't support Unicode
+  input.  Also, there are currently some issues regarding \ASCII{} NUL
+  characters.  Accordingly, all input should be UTF-8 or printable
+  \ASCII{} to be safe; see the examples in section~\ref{csv-examples}.
+  These restrictions will be removed in the future.
+\end{notice}
+
+\begin{seealso}
+%  \seemodule{array}{Arrays of uniformly types numeric values.}
+  \seepep{305}{CSV File API}
+         {The Python Enhancement Proposal which proposed this addition
+          to Python.}
+\end{seealso}
+
+
+\subsection{Module Contents \label{csv-contents}}
+
+The \module{csv} module defines the following functions:
+
+\begin{funcdesc}{reader}{csvfile\optional{,
+                         dialect=\code{'excel'}}\optional{, fmtparam}}
+Return a reader object which will iterate over lines in the given
+{}\var{csvfile}.  \var{csvfile} can be any object which supports the
+iterator protocol and returns a string each time its \method{next}
+method is called --- file objects and list objects are both suitable.  
+If \var{csvfile} is a file object, it must be opened with
+the 'b' flag on platforms where that makes a difference.  An optional
+{}\var{dialect} parameter can be given
+which is used to define a set of parameters specific to a particular CSV
+dialect.  It may be an instance of a subclass of the \class{Dialect}
+class or one of the strings returned by the \function{list_dialects}
+function.  The other optional {}\var{fmtparam} keyword arguments can be
+given to override individual formatting parameters in the current
+dialect.  For full details about the dialect and formatting
+parameters, see section~\ref{csv-fmt-params}, ``Dialects and Formatting
+Parameters''.
+
+All data read are returned as strings.  No automatic data type
+conversion is performed.
+
+\versionchanged[
+The parser is now stricter with respect to multi-line quoted
+fields. Previously, if a line ended within a quoted field without a
+terminating newline character, a newline would be inserted into the
+returned field. This behavior caused problems when reading files
+which contained carriage return characters within fields.  The
+behavior was changed to return the field without inserting newlines. As
+a consequence, if newlines embedded within fields are important, the
+input should be split into lines in a manner which preserves the newline
+characters]{2.5}
+
+\end{funcdesc}
+
+\begin{funcdesc}{writer}{csvfile\optional{,
+                         dialect=\code{'excel'}}\optional{, fmtparam}}
+Return a writer object responsible for converting the user's data into
+delimited strings on the given file-like object.  \var{csvfile} can be any
+object with a \function{write} method.  If \var{csvfile} is a file object,
+it must be opened with the 'b' flag on platforms where that makes a
+difference.  An optional
+{}\var{dialect} parameter can be given which is used to define a set of
+parameters specific to a particular CSV dialect.  It may be an instance
+of a subclass of the \class{Dialect} class or one of the strings
+returned by the \function{list_dialects} function.  The other optional
+{}\var{fmtparam} keyword arguments can be given to override individual
+formatting parameters in the current dialect.  For full details
+about the dialect and formatting parameters, see
+section~\ref{csv-fmt-params}, ``Dialects and Formatting Parameters''.
+To make it as easy as possible to
+interface with modules which implement the DB API, the value
+\constant{None} is written as the empty string.  While this isn't a
+reversible transformation, it makes it easier to dump SQL NULL data values
+to CSV files without preprocessing the data returned from a
+\code{cursor.fetch*()} call.  All other non-string data are stringified
+with \function{str()} before being written.
+\end{funcdesc}
+
+\begin{funcdesc}{register_dialect}{name\optional{, dialect}\optional{, fmtparam}}
+Associate \var{dialect} with \var{name}.  \var{name} must be a string
+or Unicode object. The dialect can be specified either by passing a
+sub-class of \class{Dialect}, or by \var{fmtparam} keyword arguments,
+or both, with keyword arguments overriding parameters of the dialect.
+For full details about the dialect and formatting parameters, see
+section~\ref{csv-fmt-params}, ``Dialects and Formatting Parameters''.
+\end{funcdesc}
+
+\begin{funcdesc}{unregister_dialect}{name}
+Delete the dialect associated with \var{name} from the dialect registry.  An
+\exception{Error} is raised if \var{name} is not a registered dialect
+name.
+\end{funcdesc}
+
+\begin{funcdesc}{get_dialect}{name}
+Return the dialect associated with \var{name}.  An \exception{Error} is
+raised if \var{name} is not a registered dialect name.
+\end{funcdesc}
+
+\begin{funcdesc}{list_dialects}{}
+Return the names of all registered dialects.
+\end{funcdesc}
+
+\begin{funcdesc}{field_size_limit}{\optional{new_limit}}
+  Returns the current maximum field size allowed by the parser. If
+  \var{new_limit} is given, this becomes the new limit.
+  \versionadded{2.5}
+\end{funcdesc}
+
+
+The \module{csv} module defines the following classes:
+
+\begin{classdesc}{DictReader}{csvfile\optional{,
+			      fieldnames=\constant{None},\optional{,
+                              restkey=\constant{None}\optional{,
+			      restval=\constant{None}\optional{,
+                              dialect=\code{'excel'}\optional{,
+			      *args, **kwds}}}}}}
+Create an object which operates like a regular reader but maps the
+information read into a dict whose keys are given by the optional
+{} \var{fieldnames}
+parameter.  If the \var{fieldnames} parameter is omitted, the values in
+the first row of the \var{csvfile} will be used as the fieldnames.
+If the row read has fewer fields than the fieldnames sequence,
+the value of \var{restval} will be used as the default value.  If the row
+read has more fields than the fieldnames sequence, the remaining data is
+added as a sequence keyed by the value of \var{restkey}.  If the row read
+has fewer fields than the fieldnames sequence, the remaining keys take the
+value of the optional \var{restval} parameter.  Any other optional or
+keyword arguments are passed to the underlying \class{reader} instance.
+\end{classdesc}
+
+
+\begin{classdesc}{DictWriter}{csvfile, fieldnames\optional{,
+                              restval=""\optional{,
+                              extrasaction=\code{'raise'}\optional{,
+                              dialect=\code{'excel'}\optional{,
+			      *args, **kwds}}}}}
+Create an object which operates like a regular writer but maps dictionaries
+onto output rows.  The \var{fieldnames} parameter identifies the order in
+which values in the dictionary passed to the \method{writerow()} method are
+written to the \var{csvfile}.  The optional \var{restval} parameter
+specifies the value to be written if the dictionary is missing a key in
+\var{fieldnames}.  If the dictionary passed to the \method{writerow()}
+method contains a key not found in \var{fieldnames}, the optional
+\var{extrasaction} parameter indicates what action to take.  If it is set
+to \code{'raise'} a \exception{ValueError} is raised.  If it is set to
+\code{'ignore'}, extra values in the dictionary are ignored.  Any other
+optional or keyword arguments are passed to the underlying \class{writer}
+instance.
+
+Note that unlike the \class{DictReader} class, the \var{fieldnames}
+parameter of the \class{DictWriter} is not optional.  Since Python's
+\class{dict} objects are not ordered, there is not enough information
+available to deduce the order in which the row should be written to the
+\var{csvfile}.
+
+\end{classdesc}
+
+\begin{classdesc*}{Dialect}{}
+The \class{Dialect} class is a container class relied on primarily for its
+attributes, which are used to define the parameters for a specific
+\class{reader} or \class{writer} instance.
+\end{classdesc*}
+
+\begin{classdesc}{excel}{}
+The \class{excel} class defines the usual properties of an Excel-generated
+CSV file.  It is registered with the dialect name \code{'excel'}.
+\end{classdesc}
+
+\begin{classdesc}{excel_tab}{}
+The \class{excel_tab} class defines the usual properties of an
+Excel-generated TAB-delimited file.  It is registered with the dialect name
+\code{'excel-tab'}.
+\end{classdesc}
+
+\begin{classdesc}{Sniffer}{}
+The \class{Sniffer} class is used to deduce the format of a CSV file.
+\end{classdesc}
+
+The \class{Sniffer} class provides two methods:
+
+\begin{methoddesc}{sniff}{sample\optional{,delimiters=None}}
+Analyze the given \var{sample} and return a \class{Dialect} subclass
+reflecting the parameters found.  If the optional \var{delimiters} parameter
+is given, it is interpreted as a string containing possible valid delimiter
+characters.
+\end{methoddesc}
+
+\begin{methoddesc}{has_header}{sample}
+Analyze the sample text (presumed to be in CSV format) and return
+\constant{True} if the first row appears to be a series of column
+headers.
+\end{methoddesc}
+
+
+The \module{csv} module defines the following constants:
+
+\begin{datadesc}{QUOTE_ALL}
+Instructs \class{writer} objects to quote all fields.
+\end{datadesc}
+
+\begin{datadesc}{QUOTE_MINIMAL}
+Instructs \class{writer} objects to only quote those fields which contain
+special characters such as \var{delimiter}, \var{quotechar} or any of the
+characters in \var{lineterminator}.
+\end{datadesc}
+
+\begin{datadesc}{QUOTE_NONNUMERIC}
+Instructs \class{writer} objects to quote all non-numeric
+fields. 
+
+Instructs the reader to convert all non-quoted fields to type \var{float}.
+\end{datadesc}
+
+\begin{datadesc}{QUOTE_NONE}
+Instructs \class{writer} objects to never quote fields.  When the current
+\var{delimiter} occurs in output data it is preceded by the current
+\var{escapechar} character.  If \var{escapechar} is not set, the writer
+will raise \exception{Error} if any characters that require escaping
+are encountered.
+
+Instructs \class{reader} to perform no special processing of quote characters.
+\end{datadesc}
+
+
+The \module{csv} module defines the following exception:
+
+\begin{excdesc}{Error}
+Raised by any of the functions when an error is detected.
+\end{excdesc}
+
+
+\subsection{Dialects and Formatting Parameters\label{csv-fmt-params}}
+
+To make it easier to specify the format of input and output records,
+specific formatting parameters are grouped together into dialects.  A
+dialect is a subclass of the \class{Dialect} class having a set of specific
+methods and a single \method{validate()} method.  When creating \class{reader}
+or \class{writer} objects, the programmer can specify a string or a subclass
+of the \class{Dialect} class as the dialect parameter.  In addition to, or
+instead of, the \var{dialect} parameter, the programmer can also specify
+individual formatting parameters, which have the same names as the
+attributes defined below for the \class{Dialect} class.
+
+Dialects support the following attributes:
+
+\begin{memberdesc}[Dialect]{delimiter}
+A one-character string used to separate fields.  It defaults to \code{','}.
+\end{memberdesc}
+
+\begin{memberdesc}[Dialect]{doublequote}
+Controls how instances of \var{quotechar} appearing inside a field should
+be themselves be quoted.  When \constant{True}, the character is doubled.
+When \constant{False}, the \var{escapechar} is used as a prefix to the
+\var{quotechar}.  It defaults to \constant{True}.
+
+On output, if \var{doublequote} is \constant{False} and no
+\var{escapechar} is set, \exception{Error} is raised if a \var{quotechar}
+is found in a field.
+\end{memberdesc}
+
+\begin{memberdesc}[Dialect]{escapechar}
+A one-character string used by the writer to escape the \var{delimiter} if
+\var{quoting} is set to \constant{QUOTE_NONE} and the \var{quotechar}
+if \var{doublequote} is \constant{False}. On reading, the \var{escapechar}
+removes any special meaning from the following character. It defaults
+to \constant{None}, which disables escaping.
+\end{memberdesc}
+
+\begin{memberdesc}[Dialect]{lineterminator}
+The string used to terminate lines produced by the \class{writer}.
+It defaults to \code{'\e r\e n'}. 
+
+\note{The \class{reader} is hard-coded to recognise either \code{'\e r'}
+or \code{'\e n'} as end-of-line, and ignores \var{lineterminator}. This
+behavior may change in the future.}
+\end{memberdesc}
+
+\begin{memberdesc}[Dialect]{quotechar}
+A one-character string used to quote fields containing special characters,
+such as the \var{delimiter} or \var{quotechar}, or which contain new-line
+characters.  It defaults to \code{'"'}.
+\end{memberdesc}
+
+\begin{memberdesc}[Dialect]{quoting}
+Controls when quotes should be generated by the writer and recognised
+by the reader.  It can take on any of the \constant{QUOTE_*} constants
+(see section~\ref{csv-contents}) and defaults to \constant{QUOTE_MINIMAL}.
+\end{memberdesc}
+
+\begin{memberdesc}[Dialect]{skipinitialspace}
+When \constant{True}, whitespace immediately following the \var{delimiter}
+is ignored.  The default is \constant{False}.
+\end{memberdesc}
+
+
+\subsection{Reader Objects}
+
+Reader objects (\class{DictReader} instances and objects returned by
+the \function{reader()} function) have the following public methods:
+
+\begin{methoddesc}[csv reader]{next}{}
+Return the next row of the reader's iterable object as a list, parsed
+according to the current dialect.
+\end{methoddesc}
+
+Reader objects have the following public attributes:
+
+\begin{memberdesc}[csv reader]{dialect}
+A read-only description of the dialect in use by the parser.
+\end{memberdesc}
+
+\begin{memberdesc}[csv reader]{line_num}
+ The number of lines read from the source iterator. This is not the same
+ as the number of records returned, as records can span multiple lines.
+ \versionadded{2.5}
+\end{memberdesc}
+
+
+\subsection{Writer Objects}
+
+\class{Writer} objects (\class{DictWriter} instances and objects returned by
+the \function{writer()} function) have the following public methods.  A
+{}\var{row} must be a sequence of strings or numbers for \class{Writer}
+objects and a dictionary mapping fieldnames to strings or numbers (by
+passing them through \function{str()} first) for {}\class{DictWriter}
+objects.  Note that complex numbers are written out surrounded by parens.
+This may cause some problems for other programs which read CSV files
+(assuming they support complex numbers at all).
+
+\begin{methoddesc}[csv writer]{writerow}{row}
+Write the \var{row} parameter to the writer's file object, formatted
+according to the current dialect.
+\end{methoddesc}
+
+\begin{methoddesc}[csv writer]{writerows}{rows}
+Write all the \var{rows} parameters (a list of \var{row} objects as
+described above) to the writer's file object, formatted
+according to the current dialect.
+\end{methoddesc}
+
+Writer objects have the following public attribute:
+
+\begin{memberdesc}[csv writer]{dialect}
+A read-only description of the dialect in use by the writer.
+\end{memberdesc}
+
+
+
+\subsection{Examples\label{csv-examples}}
+
+The simplest example of reading a CSV file:
+
+\begin{verbatim}
+import csv
+reader = csv.reader(open("some.csv", "rb"))
+for row in reader:
+    print row
+\end{verbatim}
+
+Reading a file with an alternate format:
+
+\begin{verbatim}
+import csv
+reader = csv.reader(open("passwd", "rb"), delimiter=':', quoting=csv.QUOTE_NONE)
+for row in reader:
+    print row
+\end{verbatim}
+
+The corresponding simplest possible writing example is:
+
+\begin{verbatim}
+import csv
+writer = csv.writer(open("some.csv", "wb"))
+writer.writerows(someiterable)
+\end{verbatim}
+
+Registering a new dialect:
+
+\begin{verbatim}
+import csv
+
+csv.register_dialect('unixpwd', delimiter=':', quoting=csv.QUOTE_NONE)
+
+reader = csv.reader(open("passwd", "rb"), 'unixpwd')
+\end{verbatim}
+
+A slightly more advanced use of the reader --- catching and reporting errors:
+
+\begin{verbatim}
+import csv, sys
+filename = "some.csv"
+reader = csv.reader(open(filename, "rb"))
+try:
+    for row in reader:
+        print row
+except csv.Error, e:
+    sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e))
+\end{verbatim}
+
+And while the module doesn't directly support parsing strings, it can
+easily be done:
+
+\begin{verbatim}
+import csv
+for row in csv.reader(['one,two,three']):
+    print row
+\end{verbatim}
+
+The \module{csv} module doesn't directly support reading and writing
+Unicode, but it is 8-bit-clean save for some problems with \ASCII{} NUL
+characters.  So you can write functions or classes that handle the
+encoding and decoding for you as long as you avoid encodings like
+UTF-16 that use NULs.  UTF-8 is recommended.
+
+\function{unicode_csv_reader} below is a generator that wraps
+\class{csv.reader} to handle Unicode CSV data (a list of Unicode
+strings).  \function{utf_8_encoder} is a generator that encodes the
+Unicode strings as UTF-8, one string (or row) at a time.  The encoded
+strings are parsed by the CSV reader, and
+\function{unicode_csv_reader} decodes the UTF-8-encoded cells back
+into Unicode:
+
+\begin{verbatim}
+import csv
+
+def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
+    # csv.py doesn't do Unicode; encode temporarily as UTF-8:
+    csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
+                            dialect=dialect, **kwargs)
+    for row in csv_reader:
+        # decode UTF-8 back to Unicode, cell by cell:
+        yield [unicode(cell, 'utf-8') for cell in row]
+
+def utf_8_encoder(unicode_csv_data):
+    for line in unicode_csv_data:
+        yield line.encode('utf-8')
+\end{verbatim}
+
+For all other encodings the following \class{UnicodeReader} and
+\class{UnicodeWriter} classes can be used. They take an additional
+\var{encoding} parameter in their constructor and make sure that the data
+passes the real reader or writer encoded as UTF-8:
+
+\begin{verbatim}
+import csv, codecs, cStringIO
+
+class UTF8Recoder:
+    """
+    Iterator that reads an encoded stream and reencodes the input to UTF-8
+    """
+    def __init__(self, f, encoding):
+        self.reader = codecs.getreader(encoding)(f)
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        return self.reader.next().encode("utf-8")
+
+class UnicodeReader:
+    """
+    A CSV reader which will iterate over lines in the CSV file "f",
+    which is encoded in the given encoding.
+    """
+
+    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
+        f = UTF8Recoder(f, encoding)
+        self.reader = csv.reader(f, dialect=dialect, **kwds)
+
+    def next(self):
+        row = self.reader.next()
+        return [unicode(s, "utf-8") for s in row]
+
+    def __iter__(self):
+        return self
+
+class UnicodeWriter:
+    """
+    A CSV writer which will write rows to CSV file "f",
+    which is encoded in the given encoding.
+    """
+
+    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
+        # Redirect output to a queue
+        self.queue = cStringIO.StringIO()
+        self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
+        self.stream = f
+        self.encoder = codecs.getincrementalencoder(encoding)()
+
+    def writerow(self, row):
+        self.writer.writerow([s.encode("utf-8") for s in row])
+        # Fetch UTF-8 output from the queue ...
+        data = self.queue.getvalue()
+        data = data.decode("utf-8")
+        # ... and reencode it into the target encoding
+        data = self.encoder.encode(data)
+        # write to the target stream
+        self.stream.write(data)
+        # empty queue
+        self.queue.truncate(0)
+
+    def writerows(self, rows):
+        for row in rows:
+            self.writerow(row)
+\end{verbatim}
author	cinap_lenrek <cinap_lenrek@localhost>	2011-05-03 11:25:13 +0000
committer	cinap_lenrek <cinap_lenrek@localhost>	2011-05-03 11:25:13 +0000
commit	458120dd40db6b4df55a4e96b650e16798ef06a0 (patch)
tree	8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/lib/libcsv.tex
parent	3a742c699f6806c1145aea5149bf15de15a0afd7 (diff)