diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/lib/libstruct.tex | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/lib/libstruct.tex')
-rw-r--r-- | sys/src/cmd/python/Doc/lib/libstruct.tex | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/lib/libstruct.tex b/sys/src/cmd/python/Doc/lib/libstruct.tex new file mode 100644 index 000000000..2c10be753 --- /dev/null +++ b/sys/src/cmd/python/Doc/lib/libstruct.tex @@ -0,0 +1,256 @@ +\section{\module{struct} --- + Interpret strings as packed binary data} +\declaremodule{builtin}{struct} + +\modulesynopsis{Interpret strings as packed binary data.} + +\indexii{C}{structures} +\indexiii{packing}{binary}{data} + +This module performs conversions between Python values and C +structs represented as Python strings. It uses \dfn{format strings} +(explained below) as compact descriptions of the lay-out of the C +structs and the intended conversion to/from Python values. This can +be used in handling binary data stored in files or from network +connections, among other sources. + +The module defines the following exception and functions: + + +\begin{excdesc}{error} + Exception raised on various occasions; argument is a string + describing what is wrong. +\end{excdesc} + +\begin{funcdesc}{pack}{fmt, v1, v2, \textrm{\ldots}} + Return a string containing the values + \code{\var{v1}, \var{v2}, \textrm{\ldots}} packed according to the given + format. The arguments must match the values required by the format + exactly. +\end{funcdesc} + +\begin{funcdesc}{pack_into}{fmt, buffer, offset, v1, v2, \moreargs} + Pack the values \code{\var{v1}, \var{v2}, \textrm{\ldots}} according to the given + format, write the packed bytes into the writable \var{buffer} starting at + \var{offset}. + Note that the offset is not an optional argument. + + \versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{unpack}{fmt, string} + Unpack the string (presumably packed by \code{pack(\var{fmt}, + \textrm{\ldots})}) according to the given format. The result is a + tuple even if it contains exactly one item. The string must contain + exactly the amount of data required by the format + (\code{len(\var{string})} must equal \code{calcsize(\var{fmt})}). +\end{funcdesc} + +\begin{funcdesc}{unpack_from}{fmt, buffer\optional{,offset \code{= 0}}} + Unpack the \var{buffer} according to tthe given format. + The result is a tuple even if it contains exactly one item. The + \var{buffer} must contain at least the amount of data required by the + format (\code{len(buffer[offset:])} must be at least + \code{calcsize(\var{fmt})}). + + \versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{calcsize}{fmt} + Return the size of the struct (and hence of the string) + corresponding to the given format. +\end{funcdesc} + +Format characters have the following meaning; the conversion between +C and Python values should be obvious given their types: + +\begin{tableiv}{c|l|l|c}{samp}{Format}{C Type}{Python}{Notes} + \lineiv{x}{pad byte}{no value}{} + \lineiv{c}{\ctype{char}}{string of length 1}{} + \lineiv{b}{\ctype{signed char}}{integer}{} + \lineiv{B}{\ctype{unsigned char}}{integer}{} + \lineiv{h}{\ctype{short}}{integer}{} + \lineiv{H}{\ctype{unsigned short}}{integer}{} + \lineiv{i}{\ctype{int}}{integer}{} + \lineiv{I}{\ctype{unsigned int}}{long}{} + \lineiv{l}{\ctype{long}}{integer}{} + \lineiv{L}{\ctype{unsigned long}}{long}{} + \lineiv{q}{\ctype{long long}}{long}{(1)} + \lineiv{Q}{\ctype{unsigned long long}}{long}{(1)} + \lineiv{f}{\ctype{float}}{float}{} + \lineiv{d}{\ctype{double}}{float}{} + \lineiv{s}{\ctype{char[]}}{string}{} + \lineiv{p}{\ctype{char[]}}{string}{} + \lineiv{P}{\ctype{void *}}{integer}{} +\end{tableiv} + +\noindent +Notes: + +\begin{description} +\item[(1)] + The \character{q} and \character{Q} conversion codes are available in + native mode only if the platform C compiler supports C \ctype{long long}, + or, on Windows, \ctype{__int64}. They are always available in standard + modes. + \versionadded{2.2} +\end{description} + + +A format character may be preceded by an integral repeat count. For +example, the format string \code{'4h'} means exactly the same as +\code{'hhhh'}. + +Whitespace characters between formats are ignored; a count and its +format must not contain whitespace though. + +For the \character{s} format character, the count is interpreted as the +size of the string, not a repeat count like for the other format +characters; for example, \code{'10s'} means a single 10-byte string, while +\code{'10c'} means 10 characters. For packing, the string is +truncated or padded with null bytes as appropriate to make it fit. +For unpacking, the resulting string always has exactly the specified +number of bytes. As a special case, \code{'0s'} means a single, empty +string (while \code{'0c'} means 0 characters). + +The \character{p} format character encodes a "Pascal string", meaning +a short variable-length string stored in a fixed number of bytes. +The count is the total number of bytes stored. The first byte stored is +the length of the string, or 255, whichever is smaller. The bytes +of the string follow. If the string passed in to \function{pack()} is too +long (longer than the count minus 1), only the leading count-1 bytes of the +string are stored. If the string is shorter than count-1, it is padded +with null bytes so that exactly count bytes in all are used. Note that +for \function{unpack()}, the \character{p} format character consumes count +bytes, but that the string returned can never contain more than 255 +characters. + +For the \character{I}, \character{L}, \character{q} and \character{Q} +format characters, the return value is a Python long integer. + +For the \character{P} format character, the return value is a Python +integer or long integer, depending on the size needed to hold a +pointer when it has been cast to an integer type. A \NULL{} pointer will +always be returned as the Python integer \code{0}. When packing pointer-sized +values, Python integer or long integer objects may be used. For +example, the Alpha and Merced processors use 64-bit pointer values, +meaning a Python long integer will be used to hold the pointer; other +platforms use 32-bit pointers and will use a Python integer. + +By default, C numbers are represented in the machine's native format +and byte order, and properly aligned by skipping pad bytes if +necessary (according to the rules used by the C compiler). + +Alternatively, the first character of the format string can be used to +indicate the byte order, size and alignment of the packed data, +according to the following table: + +\begin{tableiii}{c|l|l}{samp}{Character}{Byte order}{Size and alignment} + \lineiii{@}{native}{native} + \lineiii{=}{native}{standard} + \lineiii{<}{little-endian}{standard} + \lineiii{>}{big-endian}{standard} + \lineiii{!}{network (= big-endian)}{standard} +\end{tableiii} + +If the first character is not one of these, \character{@} is assumed. + +Native byte order is big-endian or little-endian, depending on the +host system. For example, Motorola and Sun processors are big-endian; +Intel and DEC processors are little-endian. + +Native size and alignment are determined using the C compiler's +\keyword{sizeof} expression. This is always combined with native byte +order. + +Standard size and alignment are as follows: no alignment is required +for any type (so you have to use pad bytes); +\ctype{short} is 2 bytes; +\ctype{int} and \ctype{long} are 4 bytes; +\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes; +\ctype{float} and \ctype{double} are 32-bit and 64-bit +IEEE floating point numbers, respectively. + +Note the difference between \character{@} and \character{=}: both use +native byte order, but the size and alignment of the latter is +standardized. + +The form \character{!} is available for those poor souls who claim they +can't remember whether network byte order is big-endian or +little-endian. + +There is no way to indicate non-native byte order (force +byte-swapping); use the appropriate choice of \character{<} or +\character{>}. + +The \character{P} format character is only available for the native +byte ordering (selected as the default or with the \character{@} byte +order character). The byte order character \character{=} chooses to +use little- or big-endian ordering based on the host system. The +struct module does not interpret this as native ordering, so the +\character{P} format is not available. + +Examples (all using native byte order, size and alignment, on a +big-endian machine): + +\begin{verbatim} +>>> from struct import * +>>> pack('hhl', 1, 2, 3) +'\x00\x01\x00\x02\x00\x00\x00\x03' +>>> unpack('hhl', '\x00\x01\x00\x02\x00\x00\x00\x03') +(1, 2, 3) +>>> calcsize('hhl') +8 +\end{verbatim} + +Hint: to align the end of a structure to the alignment requirement of +a particular type, end the format with the code for that type with a +repeat count of zero. For example, the format \code{'llh0l'} +specifies two pad bytes at the end, assuming longs are aligned on +4-byte boundaries. This only works when native size and alignment are +in effect; standard size and alignment does not enforce any alignment. + +\begin{seealso} + \seemodule{array}{Packed binary storage of homogeneous data.} + \seemodule{xdrlib}{Packing and unpacking of XDR data.} +\end{seealso} + +\subsection{Struct Objects \label{struct-objects}} + +The \module{struct} module also defines the following type: + +\begin{classdesc}{Struct}{format} + Return a new Struct object which writes and reads binary data according to + the format string \var{format}. Creating a Struct object once and calling + its methods is more efficient than calling the \module{struct} functions + with the same format since the format string only needs to be compiled once. + + \versionadded{2.5} +\end{classdesc} + +Compiled Struct objects support the following methods and attributes: + +\begin{methoddesc}[Struct]{pack}{v1, v2, \moreargs} + Identical to the \function{pack()} function, using the compiled format. + (\code{len(result)} will equal \member{self.size}.) +\end{methoddesc} + +\begin{methoddesc}[Struct]{pack_into}{buffer, offset, v1, v2, \moreargs} + Identical to the \function{pack_into()} function, using the compiled format. +\end{methoddesc} + +\begin{methoddesc}[Struct]{unpack}{string} + Identical to the \function{unpack()} function, using the compiled format. + (\code{len(string)} must equal \member{self.size}). +\end{methoddesc} + +\begin{methoddesc}[Struct]{unpack_from}{buffer\optional{,offset + \code{= 0}}} + Identical to the \function{unpack_from()} function, using the compiled format. + (\code{len(buffer[offset:])} must be at least \member{self.size}). +\end{methoddesc} + +\begin{memberdesc}[Struct]{format} + The format string used to construct this Struct object. +\end{memberdesc} + |