summaryrefslogtreecommitdiff
path: root/sys/src/cmd/python/Doc/lib/liblocale.tex
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@localhost>2011-05-03 11:25:13 +0000
committercinap_lenrek <cinap_lenrek@localhost>2011-05-03 11:25:13 +0000
commit458120dd40db6b4df55a4e96b650e16798ef06a0 (patch)
tree8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/lib/liblocale.tex
parent3a742c699f6806c1145aea5149bf15de15a0afd7 (diff)
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/lib/liblocale.tex')
-rw-r--r--sys/src/cmd/python/Doc/lib/liblocale.tex527
1 files changed, 527 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/lib/liblocale.tex b/sys/src/cmd/python/Doc/lib/liblocale.tex
new file mode 100644
index 000000000..319d893ec
--- /dev/null
+++ b/sys/src/cmd/python/Doc/lib/liblocale.tex
@@ -0,0 +1,527 @@
+\section{\module{locale} ---
+ Internationalization services}
+
+\declaremodule{standard}{locale}
+\modulesynopsis{Internationalization services.}
+\moduleauthor{Martin von L\"owis}{martin@v.loewis.de}
+\sectionauthor{Martin von L\"owis}{martin@v.loewis.de}
+
+
+The \module{locale} module opens access to the \POSIX{} locale
+database and functionality. The \POSIX{} locale mechanism allows
+programmers to deal with certain cultural issues in an application,
+without requiring the programmer to know all the specifics of each
+country where the software is executed.
+
+The \module{locale} module is implemented on top of the
+\module{_locale}\refbimodindex{_locale} module, which in turn uses an
+ANSI C locale implementation if available.
+
+The \module{locale} module defines the following exception and
+functions:
+
+
+\begin{excdesc}{Error}
+ Exception raised when \function{setlocale()} fails.
+\end{excdesc}
+
+\begin{funcdesc}{setlocale}{category\optional{, locale}}
+ If \var{locale} is specified, it may be a string, a tuple of the
+ form \code{(\var{language code}, \var{encoding})}, or \code{None}.
+ If it is a tuple, it is converted to a string using the locale
+ aliasing engine. If \var{locale} is given and not \code{None},
+ \function{setlocale()} modifies the locale setting for the
+ \var{category}. The available categories are listed in the data
+ description below. The value is the name of a locale. An empty
+ string specifies the user's default settings. If the modification of
+ the locale fails, the exception \exception{Error} is raised. If
+ successful, the new locale setting is returned.
+
+ If \var{locale} is omitted or \code{None}, the current setting for
+ \var{category} is returned.
+
+ \function{setlocale()} is not thread safe on most systems.
+ Applications typically start with a call of
+
+\begin{verbatim}
+import locale
+locale.setlocale(locale.LC_ALL, '')
+\end{verbatim}
+
+ This sets the locale for all categories to the user's default
+ setting (typically specified in the \envvar{LANG} environment
+ variable). If the locale is not changed thereafter, using
+ multithreading should not cause problems.
+
+ \versionchanged[Added support for tuple values of the \var{locale}
+ parameter]{2.0}
+\end{funcdesc}
+
+\begin{funcdesc}{localeconv}{}
+ Returns the database of the local conventions as a dictionary.
+ This dictionary has the following strings as keys:
+
+ \begin{tableiii}{l|l|p{3in}}{constant}{Category}{Key}{Meaning}
+ \lineiii{LC_NUMERIC}{\code{'decimal_point'}}
+ {Decimal point character.}
+ \lineiii{}{\code{'grouping'}}
+ {Sequence of numbers specifying which relative positions
+ the \code{'thousands_sep'} is expected. If the sequence is
+ terminated with \constant{CHAR_MAX}, no further grouping
+ is performed. If the sequence terminates with a \code{0},
+ the last group size is repeatedly used.}
+ \lineiii{}{\code{'thousands_sep'}}
+ {Character used between groups.}\hline
+ \lineiii{LC_MONETARY}{\code{'int_curr_symbol'}}
+ {International currency symbol.}
+ \lineiii{}{\code{'currency_symbol'}}
+ {Local currency symbol.}
+ \lineiii{}{\code{'p_cs_precedes/n_cs_precedes'}}
+ {Whether the currency symbol precedes the value (for positive resp.
+ negative values).}
+ \lineiii{}{\code{'p_sep_by_space/n_sep_by_space'}}
+ {Whether the currency symbol is separated from the value
+ by a space (for positive resp. negative values).}
+ \lineiii{}{\code{'mon_decimal_point'}}
+ {Decimal point used for monetary values.}
+ \lineiii{}{\code{'frac_digits'}}
+ {Number of fractional digits used in local formatting
+ of monetary values.}
+ \lineiii{}{\code{'int_frac_digits'}}
+ {Number of fractional digits used in international
+ formatting of monetary values.}
+ \lineiii{}{\code{'mon_thousands_sep'}}
+ {Group separator used for monetary values.}
+ \lineiii{}{\code{'mon_grouping'}}
+ {Equivalent to \code{'grouping'}, used for monetary
+ values.}
+ \lineiii{}{\code{'positive_sign'}}
+ {Symbol used to annotate a positive monetary value.}
+ \lineiii{}{\code{'negative_sign'}}
+ {Symbol used to annotate a negative monetary value.}
+ \lineiii{}{\code{'p_sign_posn/n_sign_posn'}}
+ {The position of the sign (for positive resp. negative values), see below.}
+ \end{tableiii}
+
+ All numeric values can be set to \constant{CHAR_MAX} to indicate that
+ there is no value specified in this locale.
+
+ The possible values for \code{'p_sign_posn'} and
+ \code{'n_sign_posn'} are given below.
+
+ \begin{tableii}{c|l}{code}{Value}{Explanation}
+ \lineii{0}{Currency and value are surrounded by parentheses.}
+ \lineii{1}{The sign should precede the value and currency symbol.}
+ \lineii{2}{The sign should follow the value and currency symbol.}
+ \lineii{3}{The sign should immediately precede the value.}
+ \lineii{4}{The sign should immediately follow the value.}
+ \lineii{\constant{CHAR_MAX}}{Nothing is specified in this locale.}
+ \end{tableii}
+\end{funcdesc}
+
+\begin{funcdesc}{nl_langinfo}{option}
+
+Return some locale-specific information as a string. This function is
+not available on all systems, and the set of possible options might
+also vary across platforms. The possible argument values are numbers,
+for which symbolic constants are available in the locale module.
+
+\end{funcdesc}
+
+\begin{funcdesc}{getdefaultlocale}{\optional{envvars}}
+ Tries to determine the default locale settings and returns
+ them as a tuple of the form \code{(\var{language code},
+ \var{encoding})}.
+
+ According to \POSIX, a program which has not called
+ \code{setlocale(LC_ALL, '')} runs using the portable \code{'C'}
+ locale. Calling \code{setlocale(LC_ALL, '')} lets it use the
+ default locale as defined by the \envvar{LANG} variable. Since we
+ do not want to interfere with the current locale setting we thus
+ emulate the behavior in the way described above.
+
+ To maintain compatibility with other platforms, not only the
+ \envvar{LANG} variable is tested, but a list of variables given as
+ envvars parameter. The first found to be defined will be
+ used. \var{envvars} defaults to the search path used in GNU gettext;
+ it must always contain the variable name \samp{LANG}. The GNU
+ gettext search path contains \code{'LANGUAGE'}, \code{'LC_ALL'},
+ \code{'LC_CTYPE'}, and \code{'LANG'}, in that order.
+
+ Except for the code \code{'C'}, the language code corresponds to
+ \rfc{1766}. \var{language code} and \var{encoding} may be
+ \code{None} if their values cannot be determined.
+ \versionadded{2.0}
+\end{funcdesc}
+
+\begin{funcdesc}{getlocale}{\optional{category}}
+ Returns the current setting for the given locale category as
+ sequence containing \var{language code}, \var{encoding}.
+ \var{category} may be one of the \constant{LC_*} values except
+ \constant{LC_ALL}. It defaults to \constant{LC_CTYPE}.
+
+ Except for the code \code{'C'}, the language code corresponds to
+ \rfc{1766}. \var{language code} and \var{encoding} may be
+ \code{None} if their values cannot be determined.
+ \versionadded{2.0}
+\end{funcdesc}
+
+\begin{funcdesc}{getpreferredencoding}{\optional{do_setlocale}}
+ Return the encoding used for text data, according to user
+ preferences. User preferences are expressed differently on
+ different systems, and might not be available programmatically on
+ some systems, so this function only returns a guess.
+
+ On some systems, it is necessary to invoke \function{setlocale}
+ to obtain the user preferences, so this function is not thread-safe.
+ If invoking setlocale is not necessary or desired, \var{do_setlocale}
+ should be set to \code{False}.
+
+ \versionadded{2.3}
+\end{funcdesc}
+
+\begin{funcdesc}{normalize}{localename}
+ Returns a normalized locale code for the given locale name. The
+ returned locale code is formatted for use with
+ \function{setlocale()}. If normalization fails, the original name
+ is returned unchanged.
+
+ If the given encoding is not known, the function defaults to
+ the default encoding for the locale code just like
+ \function{setlocale()}.
+ \versionadded{2.0}
+\end{funcdesc}
+
+\begin{funcdesc}{resetlocale}{\optional{category}}
+ Sets the locale for \var{category} to the default setting.
+
+ The default setting is determined by calling
+ \function{getdefaultlocale()}. \var{category} defaults to
+ \constant{LC_ALL}.
+ \versionadded{2.0}
+\end{funcdesc}
+
+\begin{funcdesc}{strcoll}{string1, string2}
+ Compares two strings according to the current
+ \constant{LC_COLLATE} setting. As any other compare function,
+ returns a negative, or a positive value, or \code{0}, depending on
+ whether \var{string1} collates before or after \var{string2} or is
+ equal to it.
+\end{funcdesc}
+
+\begin{funcdesc}{strxfrm}{string}
+ Transforms a string to one that can be used for the built-in
+ function \function{cmp()}\bifuncindex{cmp}, and still returns
+ locale-aware results. This function can be used when the same
+ string is compared repeatedly, e.g. when collating a sequence of
+ strings.
+\end{funcdesc}
+
+\begin{funcdesc}{format}{format, val\optional{, grouping\optional{, monetary}}}
+ Formats a number \var{val} according to the current
+ \constant{LC_NUMERIC} setting. The format follows the conventions
+ of the \code{\%} operator. For floating point values, the decimal
+ point is modified if appropriate. If \var{grouping} is true, also
+ takes the grouping into account.
+
+ If \var{monetary} is true, the conversion uses monetary thousands
+ separator and grouping strings.
+
+ Please note that this function will only work for exactly one \%char
+ specifier. For whole format strings, use \function{format_string()}.
+
+ \versionchanged[Added the \var{monetary} parameter]{2.5}
+\end{funcdesc}
+
+\begin{funcdesc}{format_string}{format, val\optional{, grouping}}
+ Processes formatting specifiers as in \code{format \% val},
+ but takes the current locale settings into account.
+
+ \versionadded{2.5}
+\end{funcdesc}
+
+\begin{funcdesc}{currency}{val\optional{, symbol\optional{, grouping\optional{, international}}}}
+ Formats a number \var{val} according to the current \constant{LC_MONETARY}
+ settings.
+
+ The returned string includes the currency symbol if \var{symbol} is true,
+ which is the default.
+ If \var{grouping} is true (which is not the default), grouping is done with
+ the value.
+ If \var{international} is true (which is not the default), the international
+ currency symbol is used.
+
+ Note that this function will not work with the `C' locale, so you have to set
+ a locale via \function{setlocale()} first.
+
+ \versionadded{2.5}
+\end{funcdesc}
+
+\begin{funcdesc}{str}{float}
+ Formats a floating point number using the same format as the
+ built-in function \code{str(\var{float})}, but takes the decimal
+ point into account.
+\end{funcdesc}
+
+\begin{funcdesc}{atof}{string}
+ Converts a string to a floating point number, following the
+ \constant{LC_NUMERIC} settings.
+\end{funcdesc}
+
+\begin{funcdesc}{atoi}{string}
+ Converts a string to an integer, following the
+ \constant{LC_NUMERIC} conventions.
+\end{funcdesc}
+
+\begin{datadesc}{LC_CTYPE}
+\refstmodindex{string}
+ Locale category for the character type functions. Depending on the
+ settings of this category, the functions of module
+ \refmodule{string} dealing with case change their behaviour.
+\end{datadesc}
+
+\begin{datadesc}{LC_COLLATE}
+ Locale category for sorting strings. The functions
+ \function{strcoll()} and \function{strxfrm()} of the
+ \module{locale} module are affected.
+\end{datadesc}
+
+\begin{datadesc}{LC_TIME}
+ Locale category for the formatting of time. The function
+ \function{time.strftime()} follows these conventions.
+\end{datadesc}
+
+\begin{datadesc}{LC_MONETARY}
+ Locale category for formatting of monetary values. The available
+ options are available from the \function{localeconv()} function.
+\end{datadesc}
+
+\begin{datadesc}{LC_MESSAGES}
+ Locale category for message display. Python currently does not
+ support application specific locale-aware messages. Messages
+ displayed by the operating system, like those returned by
+ \function{os.strerror()} might be affected by this category.
+\end{datadesc}
+
+\begin{datadesc}{LC_NUMERIC}
+ Locale category for formatting numbers. The functions
+ \function{format()}, \function{atoi()}, \function{atof()} and
+ \function{str()} of the \module{locale} module are affected by that
+ category. All other numeric formatting operations are not
+ affected.
+\end{datadesc}
+
+\begin{datadesc}{LC_ALL}
+ Combination of all locale settings. If this flag is used when the
+ locale is changed, setting the locale for all categories is
+ attempted. If that fails for any category, no category is changed at
+ all. When the locale is retrieved using this flag, a string
+ indicating the setting for all categories is returned. This string
+ can be later used to restore the settings.
+\end{datadesc}
+
+\begin{datadesc}{CHAR_MAX}
+ This is a symbolic constant used for different values returned by
+ \function{localeconv()}.
+\end{datadesc}
+
+The \function{nl_langinfo} function accepts one of the following keys.
+Most descriptions are taken from the corresponding description in the
+GNU C library.
+
+\begin{datadesc}{CODESET}
+Return a string with the name of the character encoding used in the
+selected locale.
+\end{datadesc}
+
+\begin{datadesc}{D_T_FMT}
+Return a string that can be used as a format string for strftime(3) to
+represent time and date in a locale-specific way.
+\end{datadesc}
+
+\begin{datadesc}{D_FMT}
+Return a string that can be used as a format string for strftime(3) to
+represent a date in a locale-specific way.
+\end{datadesc}
+
+\begin{datadesc}{T_FMT}
+Return a string that can be used as a format string for strftime(3) to
+represent a time in a locale-specific way.
+\end{datadesc}
+
+\begin{datadesc}{T_FMT_AMPM}
+The return value can be used as a format string for `strftime' to
+represent time in the am/pm format.
+\end{datadesc}
+
+\begin{datadesc}{DAY_1 ... DAY_7}
+Return name of the n-th day of the week. \warning{This
+follows the US convention of \constant{DAY_1} being Sunday, not the
+international convention (ISO 8601) that Monday is the first day of
+the week.}
+\end{datadesc}
+
+\begin{datadesc}{ABDAY_1 ... ABDAY_7}
+Return abbreviated name of the n-th day of the week.
+\end{datadesc}
+
+\begin{datadesc}{MON_1 ... MON_12}
+Return name of the n-th month.
+\end{datadesc}
+
+\begin{datadesc}{ABMON_1 ... ABMON_12}
+Return abbreviated name of the n-th month.
+\end{datadesc}
+
+\begin{datadesc}{RADIXCHAR}
+Return radix character (decimal dot, decimal comma, etc.)
+\end{datadesc}
+
+\begin{datadesc}{THOUSEP}
+Return separator character for thousands (groups of three digits).
+\end{datadesc}
+
+\begin{datadesc}{YESEXPR}
+Return a regular expression that can be used with the regex
+function to recognize a positive response to a yes/no question.
+\warning{The expression is in the syntax suitable for the
+\cfunction{regex()} function from the C library, which might differ
+from the syntax used in \refmodule{re}.}
+\end{datadesc}
+
+\begin{datadesc}{NOEXPR}
+Return a regular expression that can be used with the regex(3)
+function to recognize a negative response to a yes/no question.
+\end{datadesc}
+
+\begin{datadesc}{CRNCYSTR}
+Return the currency symbol, preceded by "-" if the symbol should
+appear before the value, "+" if the symbol should appear after the
+value, or "." if the symbol should replace the radix character.
+\end{datadesc}
+
+\begin{datadesc}{ERA}
+The return value represents the era used in the current locale.
+
+Most locales do not define this value. An example of a locale which
+does define this value is the Japanese one. In Japan, the traditional
+representation of dates includes the name of the era corresponding to
+the then-emperor's reign.
+
+Normally it should not be necessary to use this value directly.
+Specifying the \code{E} modifier in their format strings causes the
+\function{strftime} function to use this information. The format of the
+returned string is not specified, and therefore you should not assume
+knowledge of it on different systems.
+\end{datadesc}
+
+\begin{datadesc}{ERA_YEAR}
+The return value gives the year in the relevant era of the locale.
+\end{datadesc}
+
+\begin{datadesc}{ERA_D_T_FMT}
+This return value can be used as a format string for
+\function{strftime} to represent dates and times in a locale-specific
+era-based way.
+\end{datadesc}
+
+\begin{datadesc}{ERA_D_FMT}
+This return value can be used as a format string for
+\function{strftime} to represent time in a locale-specific era-based
+way.
+\end{datadesc}
+
+\begin{datadesc}{ALT_DIGITS}
+The return value is a representation of up to 100 values used to
+represent the values 0 to 99.
+\end{datadesc}
+
+Example:
+
+\begin{verbatim}
+>>> import locale
+>>> loc = locale.getlocale(locale.LC_ALL) # get current locale
+>>> locale.setlocale(locale.LC_ALL, 'de_DE') # use German locale; name might vary with platform
+>>> locale.strcoll('f\xe4n', 'foo') # compare a string containing an umlaut
+>>> locale.setlocale(locale.LC_ALL, '') # use user's preferred locale
+>>> locale.setlocale(locale.LC_ALL, 'C') # use default (C) locale
+>>> locale.setlocale(locale.LC_ALL, loc) # restore saved locale
+\end{verbatim}
+
+
+\subsection{Background, details, hints, tips and caveats}
+
+The C standard defines the locale as a program-wide property that may
+be relatively expensive to change. On top of that, some
+implementation are broken in such a way that frequent locale changes
+may cause core dumps. This makes the locale somewhat painful to use
+correctly.
+
+Initially, when a program is started, the locale is the \samp{C} locale, no
+matter what the user's preferred locale is. The program must
+explicitly say that it wants the user's preferred locale settings by
+calling \code{setlocale(LC_ALL, '')}.
+
+It is generally a bad idea to call \function{setlocale()} in some library
+routine, since as a side effect it affects the entire program. Saving
+and restoring it is almost as bad: it is expensive and affects other
+threads that happen to run before the settings have been restored.
+
+If, when coding a module for general use, you need a locale
+independent version of an operation that is affected by the locale
+(such as \function{string.lower()}, or certain formats used with
+\function{time.strftime()}), you will have to find a way to do it
+without using the standard library routine. Even better is convincing
+yourself that using locale settings is okay. Only as a last resort
+should you document that your module is not compatible with
+non-\samp{C} locale settings.
+
+The case conversion functions in the
+\refmodule{string}\refstmodindex{string} module are affected by the
+locale settings. When a call to the \function{setlocale()} function
+changes the \constant{LC_CTYPE} settings, the variables
+\code{string.lowercase}, \code{string.uppercase} and
+\code{string.letters} are recalculated. Note that code that uses
+these variable through `\keyword{from} ... \keyword{import} ...',
+e.g.\ \code{from string import letters}, is not affected by subsequent
+\function{setlocale()} calls.
+
+The only way to perform numeric operations according to the locale
+is to use the special functions defined by this module:
+\function{atof()}, \function{atoi()}, \function{format()},
+\function{str()}.
+
+\subsection{For extension writers and programs that embed Python
+ \label{embedding-locale}}
+
+Extension modules should never call \function{setlocale()}, except to
+find out what the current locale is. But since the return value can
+only be used portably to restore it, that is not very useful (except
+perhaps to find out whether or not the locale is \samp{C}).
+
+When Python code uses the \module{locale} module to change the locale,
+this also affects the embedding application. If the embedding
+application doesn't want this to happen, it should remove the
+\module{_locale} extension module (which does all the work) from the
+table of built-in modules in the \file{config.c} file, and make sure
+that the \module{_locale} module is not accessible as a shared library.
+
+
+\subsection{Access to message catalogs \label{locale-gettext}}
+
+The locale module exposes the C library's gettext interface on systems
+that provide this interface. It consists of the functions
+\function{gettext()}, \function{dgettext()}, \function{dcgettext()},
+\function{textdomain()}, \function{bindtextdomain()}, and
+\function{bind_textdomain_codeset()}. These are similar to the same
+functions in the \refmodule{gettext} module, but use the C library's
+binary format for message catalogs, and the C library's search
+algorithms for locating message catalogs.
+
+Python applications should normally find no need to invoke these
+functions, and should use \refmodule{gettext} instead. A known
+exception to this rule are applications that link use additional C
+libraries which internally invoke \cfunction{gettext()} or
+\function{dcgettext()}. For these applications, it may be necessary to
+bind the text domain, so that the libraries can properly locate their
+message catalogs.