summaryrefslogtreecommitdiff
path: root/sys/src/cmd/python/Doc/lib/libshlex.tex
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@localhost>2011-05-03 11:25:13 +0000
committercinap_lenrek <cinap_lenrek@localhost>2011-05-03 11:25:13 +0000
commit458120dd40db6b4df55a4e96b650e16798ef06a0 (patch)
tree8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/lib/libshlex.tex
parent3a742c699f6806c1145aea5149bf15de15a0afd7 (diff)
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/lib/libshlex.tex')
-rw-r--r--sys/src/cmd/python/Doc/lib/libshlex.tex274
1 files changed, 274 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/lib/libshlex.tex b/sys/src/cmd/python/Doc/lib/libshlex.tex
new file mode 100644
index 000000000..3a4364889
--- /dev/null
+++ b/sys/src/cmd/python/Doc/lib/libshlex.tex
@@ -0,0 +1,274 @@
+\section{\module{shlex} ---
+ Simple lexical analysis}
+
+\declaremodule{standard}{shlex}
+\modulesynopsis{Simple lexical analysis for \UNIX\ shell-like languages.}
+\moduleauthor{Eric S. Raymond}{esr@snark.thyrsus.com}
+\moduleauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
+\sectionauthor{Eric S. Raymond}{esr@snark.thyrsus.com}
+\sectionauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
+
+\versionadded{1.5.2}
+
+The \class{shlex} class makes it easy to write lexical analyzers for
+simple syntaxes resembling that of the \UNIX{} shell. This will often
+be useful for writing minilanguages, (for example, in run control
+files for Python applications) or for parsing quoted strings.
+
+\note{The \module{shlex} module currently does not support Unicode input.}
+
+The \module{shlex} module defines the following functions:
+
+\begin{funcdesc}{split}{s\optional{, comments}}
+Split the string \var{s} using shell-like syntax. If \var{comments} is
+\constant{False} (the default), the parsing of comments in the given
+string will be disabled (setting the \member{commenters} member of the
+\class{shlex} instance to the empty string). This function operates
+in \POSIX{} mode.
+\versionadded{2.3}
+\end{funcdesc}
+
+The \module{shlex} module defines the following class:
+
+\begin{classdesc}{shlex}{\optional{instream\optional{,
+ infile\optional{, posix}}}}
+A \class{shlex} instance or subclass instance is a lexical analyzer
+object. The initialization argument, if present, specifies where to
+read characters from. It must be a file-/stream-like object with
+\method{read()} and \method{readline()} methods, or a string (strings
+are accepted since Python 2.3). If no argument is given, input will be
+taken from \code{sys.stdin}. The second optional argument is a filename
+string, which sets the initial value of the \member{infile} member. If
+the \var{instream} argument is omitted or equal to \code{sys.stdin},
+this second argument defaults to ``stdin''. The \var{posix} argument
+was introduced in Python 2.3, and defines the operational mode. When
+\var{posix} is not true (default), the \class{shlex} instance will
+operate in compatibility mode. When operating in \POSIX{} mode,
+\class{shlex} will try to be as close as possible to the \POSIX{} shell
+parsing rules. See section~\ref{shlex-objects}.
+\end{classdesc}
+
+\begin{seealso}
+ \seemodule{ConfigParser}{Parser for configuration files similar to the
+ Windows \file{.ini} files.}
+\end{seealso}
+
+
+\subsection{shlex Objects \label{shlex-objects}}
+
+A \class{shlex} instance has the following methods:
+
+\begin{methoddesc}{get_token}{}
+Return a token. If tokens have been stacked using
+\method{push_token()}, pop a token off the stack. Otherwise, read one
+from the input stream. If reading encounters an immediate
+end-of-file, \member{self.eof} is returned (the empty string (\code{''})
+in non-\POSIX{} mode, and \code{None} in \POSIX{} mode).
+\end{methoddesc}
+
+\begin{methoddesc}{push_token}{str}
+Push the argument onto the token stack.
+\end{methoddesc}
+
+\begin{methoddesc}{read_token}{}
+Read a raw token. Ignore the pushback stack, and do not interpret source
+requests. (This is not ordinarily a useful entry point, and is
+documented here only for the sake of completeness.)
+\end{methoddesc}
+
+\begin{methoddesc}{sourcehook}{filename}
+When \class{shlex} detects a source request (see
+\member{source} below) this method is given the following token as
+argument, and expected to return a tuple consisting of a filename and
+an open file-like object.
+
+Normally, this method first strips any quotes off the argument. If
+the result is an absolute pathname, or there was no previous source
+request in effect, or the previous source was a stream
+(such as \code{sys.stdin}), the result is left alone. Otherwise, if the
+result is a relative pathname, the directory part of the name of the
+file immediately before it on the source inclusion stack is prepended
+(this behavior is like the way the C preprocessor handles
+\code{\#include "file.h"}).
+
+The result of the manipulations is treated as a filename, and returned
+as the first component of the tuple, with
+\function{open()} called on it to yield the second component. (Note:
+this is the reverse of the order of arguments in instance initialization!)
+
+This hook is exposed so that you can use it to implement directory
+search paths, addition of file extensions, and other namespace hacks.
+There is no corresponding `close' hook, but a shlex instance will call
+the \method{close()} method of the sourced input stream when it
+returns \EOF.
+
+For more explicit control of source stacking, use the
+\method{push_source()} and \method{pop_source()} methods.
+\end{methoddesc}
+
+\begin{methoddesc}{push_source}{stream\optional{, filename}}
+Push an input source stream onto the input stack. If the filename
+argument is specified it will later be available for use in error
+messages. This is the same method used internally by the
+\method{sourcehook} method.
+\versionadded{2.1}
+\end{methoddesc}
+
+\begin{methoddesc}{pop_source}{}
+Pop the last-pushed input source from the input stack.
+This is the same method used internally when the lexer reaches
+\EOF{} on a stacked input stream.
+\versionadded{2.1}
+\end{methoddesc}
+
+\begin{methoddesc}{error_leader}{\optional{file\optional{, line}}}
+This method generates an error message leader in the format of a
+\UNIX{} C compiler error label; the format is \code{'"\%s", line \%d: '},
+where the \samp{\%s} is replaced with the name of the current source
+file and the \samp{\%d} with the current input line number (the
+optional arguments can be used to override these).
+
+This convenience is provided to encourage \module{shlex} users to
+generate error messages in the standard, parseable format understood
+by Emacs and other \UNIX{} tools.
+\end{methoddesc}
+
+Instances of \class{shlex} subclasses have some public instance
+variables which either control lexical analysis or can be used for
+debugging:
+
+\begin{memberdesc}{commenters}
+The string of characters that are recognized as comment beginners.
+All characters from the comment beginner to end of line are ignored.
+Includes just \character{\#} by default.
+\end{memberdesc}
+
+\begin{memberdesc}{wordchars}
+The string of characters that will accumulate into multi-character
+tokens. By default, includes all \ASCII{} alphanumerics and
+underscore.
+\end{memberdesc}
+
+\begin{memberdesc}{whitespace}
+Characters that will be considered whitespace and skipped. Whitespace
+bounds tokens. By default, includes space, tab, linefeed and
+carriage-return.
+\end{memberdesc}
+
+\begin{memberdesc}{escape}
+Characters that will be considered as escape. This will be only used
+in \POSIX{} mode, and includes just \character{\textbackslash} by default.
+\versionadded{2.3}
+\end{memberdesc}
+
+\begin{memberdesc}{quotes}
+Characters that will be considered string quotes. The token
+accumulates until the same quote is encountered again (thus, different
+quote types protect each other as in the shell.) By default, includes
+\ASCII{} single and double quotes.
+\end{memberdesc}
+
+\begin{memberdesc}{escapedquotes}
+Characters in \member{quotes} that will interpret escape characters
+defined in \member{escape}. This is only used in \POSIX{} mode, and
+includes just \character{"} by default.
+\versionadded{2.3}
+\end{memberdesc}
+
+\begin{memberdesc}{whitespace_split}
+If \code{True}, tokens will only be split in whitespaces. This is useful, for
+example, for parsing command lines with \class{shlex}, getting tokens
+in a similar way to shell arguments.
+\versionadded{2.3}
+\end{memberdesc}
+
+\begin{memberdesc}{infile}
+The name of the current input file, as initially set at class
+instantiation time or stacked by later source requests. It may
+be useful to examine this when constructing error messages.
+\end{memberdesc}
+
+\begin{memberdesc}{instream}
+The input stream from which this \class{shlex} instance is reading
+characters.
+\end{memberdesc}
+
+\begin{memberdesc}{source}
+This member is \code{None} by default. If you assign a string to it,
+that string will be recognized as a lexical-level inclusion request
+similar to the \samp{source} keyword in various shells. That is, the
+immediately following token will opened as a filename and input taken
+from that stream until \EOF, at which point the \method{close()}
+method of that stream will be called and the input source will again
+become the original input stream. Source requests may be stacked any
+number of levels deep.
+\end{memberdesc}
+
+\begin{memberdesc}{debug}
+If this member is numeric and \code{1} or more, a \class{shlex}
+instance will print verbose progress output on its behavior. If you
+need to use this, you can read the module source code to learn the
+details.
+\end{memberdesc}
+
+\begin{memberdesc}{lineno}
+Source line number (count of newlines seen so far plus one).
+\end{memberdesc}
+
+\begin{memberdesc}{token}
+The token buffer. It may be useful to examine this when catching
+exceptions.
+\end{memberdesc}
+
+\begin{memberdesc}{eof}
+Token used to determine end of file. This will be set to the empty
+string (\code{''}), in non-\POSIX{} mode, and to \code{None} in
+\POSIX{} mode.
+\versionadded{2.3}
+\end{memberdesc}
+
+\subsection{Parsing Rules\label{shlex-parsing-rules}}
+
+When operating in non-\POSIX{} mode, \class{shlex} will try to obey to
+the following rules.
+
+\begin{itemize}
+\item Quote characters are not recognized within words
+ (\code{Do"Not"Separate} is parsed as the single word
+ \code{Do"Not"Separate});
+\item Escape characters are not recognized;
+\item Enclosing characters in quotes preserve the literal value of
+ all characters within the quotes;
+\item Closing quotes separate words (\code{"Do"Separate} is parsed
+ as \code{"Do"} and \code{Separate});
+\item If \member{whitespace_split} is \code{False}, any character not
+ declared to be a word character, whitespace, or a quote will be
+ returned as a single-character token. If it is \code{True},
+ \class{shlex} will only split words in whitespaces;
+\item EOF is signaled with an empty string (\code{''});
+\item It's not possible to parse empty strings, even if quoted.
+\end{itemize}
+
+When operating in \POSIX{} mode, \class{shlex} will try to obey to the
+following parsing rules.
+
+\begin{itemize}
+\item Quotes are stripped out, and do not separate words
+ (\code{"Do"Not"Separate"} is parsed as the single word
+ \code{DoNotSeparate});
+\item Non-quoted escape characters (e.g. \character{\textbackslash})
+ preserve the literal value of the next character that follows;
+\item Enclosing characters in quotes which are not part of
+ \member{escapedquotes} (e.g. \character{'}) preserve the literal
+ value of all characters within the quotes;
+\item Enclosing characters in quotes which are part of
+ \member{escapedquotes} (e.g. \character{"}) preserves the literal
+ value of all characters within the quotes, with the exception of
+ the characters mentioned in \member{escape}. The escape characters
+ retain its special meaning only when followed by the quote in use,
+ or the escape character itself. Otherwise the escape character
+ will be considered a normal character.
+\item EOF is signaled with a \constant{None} value;
+\item Quoted empty strings (\code{''}) are allowed;
+\end{itemize}
+