diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/lib/libshlex.tex | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/lib/libshlex.tex')
-rw-r--r-- | sys/src/cmd/python/Doc/lib/libshlex.tex | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/lib/libshlex.tex b/sys/src/cmd/python/Doc/lib/libshlex.tex new file mode 100644 index 000000000..3a4364889 --- /dev/null +++ b/sys/src/cmd/python/Doc/lib/libshlex.tex @@ -0,0 +1,274 @@ +\section{\module{shlex} --- + Simple lexical analysis} + +\declaremodule{standard}{shlex} +\modulesynopsis{Simple lexical analysis for \UNIX\ shell-like languages.} +\moduleauthor{Eric S. Raymond}{esr@snark.thyrsus.com} +\moduleauthor{Gustavo Niemeyer}{niemeyer@conectiva.com} +\sectionauthor{Eric S. Raymond}{esr@snark.thyrsus.com} +\sectionauthor{Gustavo Niemeyer}{niemeyer@conectiva.com} + +\versionadded{1.5.2} + +The \class{shlex} class makes it easy to write lexical analyzers for +simple syntaxes resembling that of the \UNIX{} shell. This will often +be useful for writing minilanguages, (for example, in run control +files for Python applications) or for parsing quoted strings. + +\note{The \module{shlex} module currently does not support Unicode input.} + +The \module{shlex} module defines the following functions: + +\begin{funcdesc}{split}{s\optional{, comments}} +Split the string \var{s} using shell-like syntax. If \var{comments} is +\constant{False} (the default), the parsing of comments in the given +string will be disabled (setting the \member{commenters} member of the +\class{shlex} instance to the empty string). This function operates +in \POSIX{} mode. +\versionadded{2.3} +\end{funcdesc} + +The \module{shlex} module defines the following class: + +\begin{classdesc}{shlex}{\optional{instream\optional{, + infile\optional{, posix}}}} +A \class{shlex} instance or subclass instance is a lexical analyzer +object. The initialization argument, if present, specifies where to +read characters from. It must be a file-/stream-like object with +\method{read()} and \method{readline()} methods, or a string (strings +are accepted since Python 2.3). If no argument is given, input will be +taken from \code{sys.stdin}. The second optional argument is a filename +string, which sets the initial value of the \member{infile} member. If +the \var{instream} argument is omitted or equal to \code{sys.stdin}, +this second argument defaults to ``stdin''. The \var{posix} argument +was introduced in Python 2.3, and defines the operational mode. When +\var{posix} is not true (default), the \class{shlex} instance will +operate in compatibility mode. When operating in \POSIX{} mode, +\class{shlex} will try to be as close as possible to the \POSIX{} shell +parsing rules. See section~\ref{shlex-objects}. +\end{classdesc} + +\begin{seealso} + \seemodule{ConfigParser}{Parser for configuration files similar to the + Windows \file{.ini} files.} +\end{seealso} + + +\subsection{shlex Objects \label{shlex-objects}} + +A \class{shlex} instance has the following methods: + +\begin{methoddesc}{get_token}{} +Return a token. If tokens have been stacked using +\method{push_token()}, pop a token off the stack. Otherwise, read one +from the input stream. If reading encounters an immediate +end-of-file, \member{self.eof} is returned (the empty string (\code{''}) +in non-\POSIX{} mode, and \code{None} in \POSIX{} mode). +\end{methoddesc} + +\begin{methoddesc}{push_token}{str} +Push the argument onto the token stack. +\end{methoddesc} + +\begin{methoddesc}{read_token}{} +Read a raw token. Ignore the pushback stack, and do not interpret source +requests. (This is not ordinarily a useful entry point, and is +documented here only for the sake of completeness.) +\end{methoddesc} + +\begin{methoddesc}{sourcehook}{filename} +When \class{shlex} detects a source request (see +\member{source} below) this method is given the following token as +argument, and expected to return a tuple consisting of a filename and +an open file-like object. + +Normally, this method first strips any quotes off the argument. If +the result is an absolute pathname, or there was no previous source +request in effect, or the previous source was a stream +(such as \code{sys.stdin}), the result is left alone. Otherwise, if the +result is a relative pathname, the directory part of the name of the +file immediately before it on the source inclusion stack is prepended +(this behavior is like the way the C preprocessor handles +\code{\#include "file.h"}). + +The result of the manipulations is treated as a filename, and returned +as the first component of the tuple, with +\function{open()} called on it to yield the second component. (Note: +this is the reverse of the order of arguments in instance initialization!) + +This hook is exposed so that you can use it to implement directory +search paths, addition of file extensions, and other namespace hacks. +There is no corresponding `close' hook, but a shlex instance will call +the \method{close()} method of the sourced input stream when it +returns \EOF. + +For more explicit control of source stacking, use the +\method{push_source()} and \method{pop_source()} methods. +\end{methoddesc} + +\begin{methoddesc}{push_source}{stream\optional{, filename}} +Push an input source stream onto the input stack. If the filename +argument is specified it will later be available for use in error +messages. This is the same method used internally by the +\method{sourcehook} method. +\versionadded{2.1} +\end{methoddesc} + +\begin{methoddesc}{pop_source}{} +Pop the last-pushed input source from the input stack. +This is the same method used internally when the lexer reaches +\EOF{} on a stacked input stream. +\versionadded{2.1} +\end{methoddesc} + +\begin{methoddesc}{error_leader}{\optional{file\optional{, line}}} +This method generates an error message leader in the format of a +\UNIX{} C compiler error label; the format is \code{'"\%s", line \%d: '}, +where the \samp{\%s} is replaced with the name of the current source +file and the \samp{\%d} with the current input line number (the +optional arguments can be used to override these). + +This convenience is provided to encourage \module{shlex} users to +generate error messages in the standard, parseable format understood +by Emacs and other \UNIX{} tools. +\end{methoddesc} + +Instances of \class{shlex} subclasses have some public instance +variables which either control lexical analysis or can be used for +debugging: + +\begin{memberdesc}{commenters} +The string of characters that are recognized as comment beginners. +All characters from the comment beginner to end of line are ignored. +Includes just \character{\#} by default. +\end{memberdesc} + +\begin{memberdesc}{wordchars} +The string of characters that will accumulate into multi-character +tokens. By default, includes all \ASCII{} alphanumerics and +underscore. +\end{memberdesc} + +\begin{memberdesc}{whitespace} +Characters that will be considered whitespace and skipped. Whitespace +bounds tokens. By default, includes space, tab, linefeed and +carriage-return. +\end{memberdesc} + +\begin{memberdesc}{escape} +Characters that will be considered as escape. This will be only used +in \POSIX{} mode, and includes just \character{\textbackslash} by default. +\versionadded{2.3} +\end{memberdesc} + +\begin{memberdesc}{quotes} +Characters that will be considered string quotes. The token +accumulates until the same quote is encountered again (thus, different +quote types protect each other as in the shell.) By default, includes +\ASCII{} single and double quotes. +\end{memberdesc} + +\begin{memberdesc}{escapedquotes} +Characters in \member{quotes} that will interpret escape characters +defined in \member{escape}. This is only used in \POSIX{} mode, and +includes just \character{"} by default. +\versionadded{2.3} +\end{memberdesc} + +\begin{memberdesc}{whitespace_split} +If \code{True}, tokens will only be split in whitespaces. This is useful, for +example, for parsing command lines with \class{shlex}, getting tokens +in a similar way to shell arguments. +\versionadded{2.3} +\end{memberdesc} + +\begin{memberdesc}{infile} +The name of the current input file, as initially set at class +instantiation time or stacked by later source requests. It may +be useful to examine this when constructing error messages. +\end{memberdesc} + +\begin{memberdesc}{instream} +The input stream from which this \class{shlex} instance is reading +characters. +\end{memberdesc} + +\begin{memberdesc}{source} +This member is \code{None} by default. If you assign a string to it, +that string will be recognized as a lexical-level inclusion request +similar to the \samp{source} keyword in various shells. That is, the +immediately following token will opened as a filename and input taken +from that stream until \EOF, at which point the \method{close()} +method of that stream will be called and the input source will again +become the original input stream. Source requests may be stacked any +number of levels deep. +\end{memberdesc} + +\begin{memberdesc}{debug} +If this member is numeric and \code{1} or more, a \class{shlex} +instance will print verbose progress output on its behavior. If you +need to use this, you can read the module source code to learn the +details. +\end{memberdesc} + +\begin{memberdesc}{lineno} +Source line number (count of newlines seen so far plus one). +\end{memberdesc} + +\begin{memberdesc}{token} +The token buffer. It may be useful to examine this when catching +exceptions. +\end{memberdesc} + +\begin{memberdesc}{eof} +Token used to determine end of file. This will be set to the empty +string (\code{''}), in non-\POSIX{} mode, and to \code{None} in +\POSIX{} mode. +\versionadded{2.3} +\end{memberdesc} + +\subsection{Parsing Rules\label{shlex-parsing-rules}} + +When operating in non-\POSIX{} mode, \class{shlex} will try to obey to +the following rules. + +\begin{itemize} +\item Quote characters are not recognized within words + (\code{Do"Not"Separate} is parsed as the single word + \code{Do"Not"Separate}); +\item Escape characters are not recognized; +\item Enclosing characters in quotes preserve the literal value of + all characters within the quotes; +\item Closing quotes separate words (\code{"Do"Separate} is parsed + as \code{"Do"} and \code{Separate}); +\item If \member{whitespace_split} is \code{False}, any character not + declared to be a word character, whitespace, or a quote will be + returned as a single-character token. If it is \code{True}, + \class{shlex} will only split words in whitespaces; +\item EOF is signaled with an empty string (\code{''}); +\item It's not possible to parse empty strings, even if quoted. +\end{itemize} + +When operating in \POSIX{} mode, \class{shlex} will try to obey to the +following parsing rules. + +\begin{itemize} +\item Quotes are stripped out, and do not separate words + (\code{"Do"Not"Separate"} is parsed as the single word + \code{DoNotSeparate}); +\item Non-quoted escape characters (e.g. \character{\textbackslash}) + preserve the literal value of the next character that follows; +\item Enclosing characters in quotes which are not part of + \member{escapedquotes} (e.g. \character{'}) preserve the literal + value of all characters within the quotes; +\item Enclosing characters in quotes which are part of + \member{escapedquotes} (e.g. \character{"}) preserves the literal + value of all characters within the quotes, with the exception of + the characters mentioned in \member{escape}. The escape characters + retain its special meaning only when followed by the quote in use, + or the escape character itself. Otherwise the escape character + will be considered a normal character. +\item EOF is signaled with a \constant{None} value; +\item Quoted empty strings (\code{''}) are allowed; +\end{itemize} + |