python, hg: tow outside the environment.

they've served us well, and can ride off into the sunset.
author: Ori Bernstein <ori@eigenstate.org> 2021-06-14 00:00:37 +0000
committer: Ori Bernstein <ori@eigenstate.org> 2021-06-14 00:00:37 +0000
commit: a73a964e51247ed169d322c725a3a18859f109a3 (patch)
tree: 3f752d117274d444bda44e85609aeac1acf313f3 /sys/src/cmd/python/Tools/unicode/gencodec.py
parent: e64efe273fcb921a61bf27d33b230c4e64fcd425 (diff)
1 files changed, 0 insertions, 426 deletions
diff --git a/sys/src/cmd/python/Tools/unicode/gencodec.py b/sys/src/cmd/python/Tools/unicode/gencodec.py
deleted file mode 100644
index 8a2ca6447..000000000
--- a/sys/src/cmd/python/Tools/unicode/gencodec.py
+++ /dev/null
@@ -1,426 +0,0 @@
-""" Unicode Mapping Parser and Codec Generator.
-
-This script parses Unicode mapping files as available from the Unicode
-site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
-modules from them. The codecs use the standard character mapping codec
-to actually apply the mapping.
-
-Synopsis: gencodec.py dir codec_prefix
-
-All files in dir are scanned and those producing non-empty mappings
-will be written to <codec_prefix><mapname>.py with <mapname> being the
-first part of the map's filename ('a' in a.b.c.txt) converted to
-lowercase with hyphens replaced by underscores.
-
-The tool also writes marshalled versions of the mapping tables to the
-same location (with .mapping extension).
-
-Written by Marc-Andre Lemburg (mal@lemburg.com).
-
-(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
-(c) Copyright Guido van Rossum, 2000.
-
-Table generation:
-(c) Copyright Marc-Andre Lemburg, 2005.
-    Licensed to PSF under a Contributor Agreement.
-
-"""#"
-
-import re, os, time, marshal, codecs
-
-# Maximum allowed size of charmap tables
-MAX_TABLE_SIZE = 8192
-
-# Standard undefined Unicode code point
-UNI_UNDEFINED = unichr(0xFFFE)
-
-mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'
-                   '\s+'
-                   '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'
-                   '\s*'
-                   '(#.+)?')
-
-def parsecodes(codes,
-               len=len, filter=filter,range=range):
-
-    """ Converts code combinations to either a single code integer
-        or a tuple of integers.
-
-        meta-codes (in angular brackets, e.g. <LR> and <RL>) are
-        ignored.
-
-        Empty codes or illegal ones are returned as None.
-
-    """
-    if not codes:
-        return None
-    l = codes.split('+')
-    if len(l) == 1:
-        return int(l[0],16)
-    for i in range(len(l)):
-        try:
-            l[i] = int(l[i],16)
-        except ValueError:
-            l[i] = None
-    l = filter(lambda x: x is not None, l)
-    if len(l) == 1:
-        return l[0]
-    else:
-        return tuple(l)
-
-def readmap(filename):
-
-    f = open(filename,'r')
-    lines = f.readlines()
-    f.close()
-    enc2uni = {}
-    identity = []
-    unmapped = range(256)
-
-    # UTC mapping tables per convention don't include the identity
-    # mappings for code points 0x00 - 0x1F and 0x7F, unless these are
-    # explicitly mapped to different characters or undefined
-    for i in range(32) + [127]:
-        identity.append(i)
-        unmapped.remove(i)
-        enc2uni[i] = (i, 'CONTROL CHARACTER')
-
-    for line in lines:
-        line = line.strip()
-        if not line or line[0] == '#':
-            continue
-        m = mapRE.match(line)
-        if not m:
-            #print '* not matched: %s' % repr(line)
-            continue
-        enc,uni,comment = m.groups()
-        enc = parsecodes(enc)
-        uni = parsecodes(uni)
-        if comment is None:
-            comment = ''
-        else:
-            comment = comment[1:].strip()
-        if enc < 256:
-            if enc in unmapped:
-                unmapped.remove(enc)
-            if enc == uni:
-                identity.append(enc)
-            enc2uni[enc] = (uni,comment)
-        else:
-            enc2uni[enc] = (uni,comment)
-
-    # If there are more identity-mapped entries than unmapped entries,
-    # it pays to generate an identity dictionary first, and add explicit
-    # mappings to None for the rest
-    if len(identity) >= len(unmapped):
-        for enc in unmapped:
-            enc2uni[enc] = (None, "")
-        enc2uni['IDENTITY'] = 256
-
-    return enc2uni
-
-def hexrepr(t, precision=4):
-
-    if t is None:
-        return 'None'
-    try:
-        len(t)
-    except:
-        return '0x%0*X' % (precision, t)
-    try:
-        return '(' + ', '.join(['0x%0*X' % (precision, item)
-                                for item in t]) + ')'
-    except TypeError, why:
-        print '* failed to convert %r: %s' % (t, why)
-        raise
-
-def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)):
-
-    l = []
-    append = l.append
-    if map.has_key("IDENTITY"):
-        append("%s = codecs.make_identity_dict(range(%d))" %
-               (varname, map["IDENTITY"]))
-        append("%s.update({" % varname)
-        splits = 1
-        del map["IDENTITY"]
-        identity = 1
-    else:
-        append("%s = {" % varname)
-        splits = 0
-        identity = 0
-
-    mappings = map.items()
-    mappings.sort()
-    i = 0
-    key_precision, value_precision = precisions
-    for mapkey, mapvalue in mappings:
-        mapcomment = ''
-        if isinstance(mapkey, tuple):
-            (mapkey, mapcomment) = mapkey
-        if isinstance(mapvalue, tuple):
-            (mapvalue, mapcomment) = mapvalue
-        if mapkey is None:
-            continue
-        if (identity and
-            mapkey == mapvalue and
-            mapkey < 256):
-            # No need to include identity mappings, since these
-            # are already set for the first 256 code points.
-            continue
-        key = hexrepr(mapkey, key_precision)
-        value = hexrepr(mapvalue, value_precision)
-        if mapcomment and comments:
-            append('    %s: %s,\t#  %s' % (key, value, mapcomment))
-        else:
-            append('    %s: %s,' % (key, value))
-        i += 1
-        if i == 4096:
-            # Split the definition into parts to that the Python
-            # parser doesn't dump core
-            if splits == 0:
-                append('}')
-            else:
-                append('})')
-            append('%s.update({' % varname)
-            i = 0
-            splits = splits + 1
-    if splits == 0:
-        append('}')
-    else:
-        append('})')
-
-    return l
-
-def python_tabledef_code(varname, map, comments=1, key_precision=2):
-
-    l = []
-    append = l.append
-    append('%s = (' % varname)
-
-    # Analyze map and create table dict
-    mappings = map.items()
-    mappings.sort()
-    table = {}
-    maxkey = 0
-    if map.has_key('IDENTITY'):
-        for key in range(256):
-            table[key] = (key, '')
-        maxkey = 255
-        del map['IDENTITY']
-    for mapkey, mapvalue in mappings:
-        mapcomment = ''
-        if isinstance(mapkey, tuple):
-            (mapkey, mapcomment) = mapkey
-        if isinstance(mapvalue, tuple):
-            (mapvalue, mapcomment) = mapvalue
-        if mapkey is None:
-            continue
-        table[mapkey] = (mapvalue, mapcomment)
-        if mapkey > maxkey:
-            maxkey = mapkey
-    if maxkey > MAX_TABLE_SIZE:
-        # Table too large
-        return None
-
-    # Create table code
-    for key in range(maxkey + 1):
-        if key not in table:
-            mapvalue = None
-            mapcomment = 'UNDEFINED'
-        else:
-            mapvalue, mapcomment = table[key]
-        if mapvalue is None:
-            mapchar = UNI_UNDEFINED
-        else:
-            if isinstance(mapvalue, tuple):
-                # 1-n mappings not supported
-                return None
-            else:
-                mapchar = unichr(mapvalue)
-        if mapcomment and comments:
-            append('    %r\t#  %s -> %s' % (mapchar,
-                                            hexrepr(key, key_precision),
-                                            mapcomment))
-        else:
-            append('    %r' % mapchar)
-
-    append(')')
-    return l
-
-def codegen(name, map, encodingname, comments=1):
-
-    """ Returns Python source for the given map.
-
-        Comments are included in the source, if comments is true (default).
-
-    """
-    # Generate code
-    decoding_map_code = python_mapdef_code(
-        'decoding_map',
-        map,
-        comments=comments)
-    decoding_table_code = python_tabledef_code(
-        'decoding_table',
-        map,
-        comments=comments)
-    encoding_map_code = python_mapdef_code(
-        'encoding_map',
-        codecs.make_encoding_map(map),
-        comments=comments,
-        precisions=(4, 2))
-
-    if decoding_table_code:
-        suffix = 'table'
-    else:
-        suffix = 'map'
-
-    l = [
-        '''\
-""" Python Character Mapping Codec %s generated from '%s' with gencodec.py.
-
-"""#"
-
-import codecs
-
-### Codec APIs
-
-class Codec(codecs.Codec):
-
-    def encode(self,input,errors='strict'):
-        return codecs.charmap_encode(input,errors,encoding_%s)
-
-    def decode(self,input,errors='strict'):
-        return codecs.charmap_decode(input,errors,decoding_%s)
-''' % (encodingname, name, suffix, suffix)]
-    l.append('''\
-class IncrementalEncoder(codecs.IncrementalEncoder):
-    def encode(self, input, final=False):
-        return codecs.charmap_encode(input,self.errors,encoding_%s)[0]
-
-class IncrementalDecoder(codecs.IncrementalDecoder):
-    def decode(self, input, final=False):
-        return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %
-        (suffix, suffix))
-
-    l.append('''
-class StreamWriter(Codec,codecs.StreamWriter):
-    pass
-
-class StreamReader(Codec,codecs.StreamReader):
-    pass
-
-### encodings module API
-
-def getregentry():
-    return codecs.CodecInfo(
-        name=%r,
-        encode=Codec().encode,
-        decode=Codec().decode,
-        incrementalencoder=IncrementalEncoder,
-        incrementaldecoder=IncrementalDecoder,
-        streamreader=StreamReader,
-        streamwriter=StreamWriter,
-    )
-''' % encodingname.replace('_', '-'))
-
-    # Add decoding table or map (with preference to the table)
-    if not decoding_table_code:
-        l.append('''
-### Decoding Map
-''')
-        l.extend(decoding_map_code)
-    else:
-        l.append('''
-### Decoding Table
-''')
-        l.extend(decoding_table_code)
-
-    # Add encoding map
-    if decoding_table_code:
-        l.append('''
-### Encoding table
-encoding_table=codecs.charmap_build(decoding_table)
-''')
-    else:
-        l.append('''
-### Encoding Map
-''')
-        l.extend(encoding_map_code)
-
-    # Final new-line
-    l.append('')
-
-    return '\n'.join(l).expandtabs()
-
-def pymap(name,map,pyfile,encodingname,comments=1):
-
-    code = codegen(name,map,encodingname,comments)
-    f = open(pyfile,'w')
-    f.write(code)
-    f.close()
-
-def marshalmap(name,map,marshalfile):
-
-    d = {}
-    for e,(u,c) in map.items():
-        d[e] = (u,c)
-    f = open(marshalfile,'wb')
-    marshal.dump(d,f)
-    f.close()
-
-def convertdir(dir, dirprefix='', nameprefix='', comments=1):
-
-    mapnames = os.listdir(dir)
-    for mapname in mapnames:
-        mappathname = os.path.join(dir, mapname)
-        if not os.path.isfile(mappathname):
-            continue
-        name = os.path.split(mapname)[1]
-        name = name.replace('-','_')
-        name = name.split('.')[0]
-        name = name.lower()
-        name = nameprefix + name
-        codefile = name + '.py'
-        marshalfile = name + '.mapping'
-        print 'converting %s to %s and %s' % (mapname,
-                                              dirprefix + codefile,
-                                              dirprefix + marshalfile)
-        try:
-            map = readmap(os.path.join(dir,mapname))
-            if not map:
-                print '* map is empty; skipping'
-            else:
-                pymap(mappathname, map, dirprefix + codefile,name,comments)
-                marshalmap(mappathname, map, dirprefix + marshalfile)
-        except ValueError, why:
-            print '* conversion failed: %s' % why
-            raise
-
-def rewritepythondir(dir, dirprefix='', comments=1):
-
-    mapnames = os.listdir(dir)
-    for mapname in mapnames:
-        if not mapname.endswith('.mapping'):
-            continue
-        name = mapname[:-len('.mapping')]
-        codefile = name + '.py'
-        print 'converting %s to %s' % (mapname,
-                                       dirprefix + codefile)
-        try:
-            map = marshal.load(open(os.path.join(dir,mapname),
-                               'rb'))
-            if not map:
-                print '* map is empty; skipping'
-            else:
-                pymap(mapname, map, dirprefix + codefile,name,comments)
-        except ValueError, why:
-            print '* conversion failed: %s' % why
-
-if __name__ == '__main__':
-
-    import sys
-    if 1:
-        apply(convertdir,tuple(sys.argv[1:]))
-    else:
-        apply(rewritepythondir,tuple(sys.argv[1:]))
author	Ori Bernstein <ori@eigenstate.org>	2021-06-14 00:00:37 +0000
committer	Ori Bernstein <ori@eigenstate.org>	2021-06-14 00:00:37 +0000
commit	a73a964e51247ed169d322c725a3a18859f109a3 (patch)
tree	3f752d117274d444bda44e85609aeac1acf313f3 /sys/src/cmd/python/Tools/unicode/gencodec.py
parent	e64efe273fcb921a61bf27d33b230c4e64fcd425 (diff)