diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/lib/python/encodings/__init__.py | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/lib/python/encodings/__init__.py')
-rw-r--r-- | sys/lib/python/encodings/__init__.py | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/sys/lib/python/encodings/__init__.py b/sys/lib/python/encodings/__init__.py new file mode 100644 index 000000000..98ae2fae7 --- /dev/null +++ b/sys/lib/python/encodings/__init__.py @@ -0,0 +1,154 @@ +""" Standard "encodings" Package + + Standard Python encoding modules are stored in this package + directory. + + Codec modules must have names corresponding to normalized encoding + names as defined in the normalize_encoding() function below, e.g. + 'utf-8' must be implemented by the module 'utf_8.py'. + + Each codec module must export the following interface: + + * getregentry() -> codecs.CodecInfo object + The getregentry() API must a CodecInfo object with encoder, decoder, + incrementalencoder, incrementaldecoder, streamwriter and streamreader + atttributes which adhere to the Python Codec Interface Standard. + + In addition, a module may optionally also define the following + APIs which are then used by the package's codec search function: + + * getaliases() -> sequence of encoding name strings to use as aliases + + Alias names returned by getaliases() must be normalized encoding + names as defined by normalize_encoding(). + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +"""#" + +import codecs, types +from encodings import aliases + +_cache = {} +_unknown = '--unknown--' +_import_tail = ['*'] +_norm_encoding_map = (' . ' + '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ ' + ' abcdefghijklmnopqrstuvwxyz ' + ' ' + ' ' + ' ') +_aliases = aliases.aliases + +class CodecRegistryError(LookupError, SystemError): + pass + +def normalize_encoding(encoding): + + """ Normalize an encoding name. + + Normalization works as follows: all non-alphanumeric + characters except the dot used for Python package names are + collapsed and replaced with a single underscore, e.g. ' -;#' + becomes '_'. Leading and trailing underscores are removed. + + Note that encoding names should be ASCII only; if they do use + non-ASCII characters, these must be Latin-1 compatible. + + """ + # Make sure we have an 8-bit string, because .translate() works + # differently for Unicode strings. + if type(encoding) is types.UnicodeType: + # Note that .encode('latin-1') does *not* use the codec + # registry, so this call doesn't recurse. (See unicodeobject.c + # PyUnicode_AsEncodedString() for details) + encoding = encoding.encode('latin-1') + return '_'.join(encoding.translate(_norm_encoding_map).split()) + +def search_function(encoding): + + # Cache lookup + entry = _cache.get(encoding, _unknown) + if entry is not _unknown: + return entry + + # Import the module: + # + # First try to find an alias for the normalized encoding + # name and lookup the module using the aliased name, then try to + # lookup the module using the standard import scheme, i.e. first + # try in the encodings package, then at top-level. + # + norm_encoding = normalize_encoding(encoding) + aliased_encoding = _aliases.get(norm_encoding) or \ + _aliases.get(norm_encoding.replace('.', '_')) + if aliased_encoding is not None: + modnames = [aliased_encoding, + norm_encoding] + else: + modnames = [norm_encoding] + for modname in modnames: + if not modname or '.' in modname: + continue + try: + mod = __import__('encodings.' + modname, + globals(), locals(), _import_tail) + except ImportError: + pass + else: + break + else: + mod = None + + try: + getregentry = mod.getregentry + except AttributeError: + # Not a codec module + mod = None + + if mod is None: + # Cache misses + _cache[encoding] = None + return None + + # Now ask the module for the registry entry + entry = getregentry() + if not isinstance(entry, codecs.CodecInfo): + if not 4 <= len(entry) <= 7: + raise CodecRegistryError,\ + 'module "%s" (%s) failed to register' % \ + (mod.__name__, mod.__file__) + if not callable(entry[0]) or \ + not callable(entry[1]) or \ + (entry[2] is not None and not callable(entry[2])) or \ + (entry[3] is not None and not callable(entry[3])) or \ + (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \ + (len(entry) > 5 and entry[5] is not None and not callable(entry[5])): + raise CodecRegistryError,\ + 'incompatible codecs in module "%s" (%s)' % \ + (mod.__name__, mod.__file__) + if len(entry)<7 or entry[6] is None: + entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],) + entry = codecs.CodecInfo(*entry) + + # Cache the codec registry entry + _cache[encoding] = entry + + # Register its aliases (without overwriting previously registered + # aliases) + try: + codecaliases = mod.getaliases() + except AttributeError: + pass + else: + for alias in codecaliases: + if not _aliases.has_key(alias): + _aliases[alias] = modname + + # Return the registry entry + return entry + +# Register the search_function in the Python codec registry +codecs.register(search_function) |