summaryrefslogtreecommitdiff
path: root/sys/src/cmd/hg/hgext/win32mbcs.py
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@localhost>2011-05-03 11:25:13 +0000
committercinap_lenrek <cinap_lenrek@localhost>2011-05-03 11:25:13 +0000
commit458120dd40db6b4df55a4e96b650e16798ef06a0 (patch)
tree8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/hg/hgext/win32mbcs.py
parent3a742c699f6806c1145aea5149bf15de15a0afd7 (diff)
add hg and python
Diffstat (limited to 'sys/src/cmd/hg/hgext/win32mbcs.py')
-rw-r--r--sys/src/cmd/hg/hgext/win32mbcs.py147
1 files changed, 147 insertions, 0 deletions
diff --git a/sys/src/cmd/hg/hgext/win32mbcs.py b/sys/src/cmd/hg/hgext/win32mbcs.py
new file mode 100644
index 000000000..a707f053e
--- /dev/null
+++ b/sys/src/cmd/hg/hgext/win32mbcs.py
@@ -0,0 +1,147 @@
+# win32mbcs.py -- MBCS filename support for Mercurial
+#
+# Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
+#
+# Version: 0.2
+# Author: Shun-ichi Goto <shunichi.goto@gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2, incorporated herein by reference.
+#
+
+'''allow the use of MBCS paths with problematic encodings
+
+Some MBCS encodings are not good for some path operations (i.e.
+splitting path, case conversion, etc.) with its encoded bytes. We call
+such a encoding (i.e. shift_jis and big5) as "problematic encoding".
+This extension can be used to fix the issue with those encodings by
+wrapping some functions to convert to Unicode string before path
+operation.
+
+This extension is useful for:
+
+- Japanese Windows users using shift_jis encoding.
+- Chinese Windows users using big5 encoding.
+- All users who use a repository with one of problematic encodings on
+ case-insensitive file system.
+
+This extension is not needed for:
+
+- Any user who use only ASCII chars in path.
+- Any user who do not use any of problematic encodings.
+
+Note that there are some limitations on using this extension:
+
+- You should use single encoding in one repository.
+- You should set same encoding for the repository by locale or
+ HGENCODING.
+
+Path encoding conversion are done between Unicode and
+encoding.encoding which is decided by Mercurial from current locale
+setting or HGENCODING.
+'''
+
+import os, sys
+from mercurial.i18n import _
+from mercurial import util, encoding
+
+def decode(arg):
+ if isinstance(arg, str):
+ uarg = arg.decode(encoding.encoding)
+ if arg == uarg.encode(encoding.encoding):
+ return uarg
+ raise UnicodeError("Not local encoding")
+ elif isinstance(arg, tuple):
+ return tuple(map(decode, arg))
+ elif isinstance(arg, list):
+ return map(decode, arg)
+ elif isinstance(arg, dict):
+ for k, v in arg.items():
+ arg[k] = decode(v)
+ return arg
+
+def encode(arg):
+ if isinstance(arg, unicode):
+ return arg.encode(encoding.encoding)
+ elif isinstance(arg, tuple):
+ return tuple(map(encode, arg))
+ elif isinstance(arg, list):
+ return map(encode, arg)
+ elif isinstance(arg, dict):
+ for k, v in arg.items():
+ arg[k] = encode(v)
+ return arg
+
+def appendsep(s):
+ # ensure the path ends with os.sep, appending it if necessary.
+ try:
+ us = decode(s)
+ except UnicodeError:
+ us = s
+ if us and us[-1] not in ':/\\':
+ s += os.sep
+ return s
+
+def wrapper(func, args, kwds):
+ # check argument is unicode, then call original
+ for arg in args:
+ if isinstance(arg, unicode):
+ return func(*args, **kwds)
+
+ try:
+ # convert arguments to unicode, call func, then convert back
+ return encode(func(*decode(args), **decode(kwds)))
+ except UnicodeError:
+ raise util.Abort(_("[win32mbcs] filename conversion failed with"
+ " %s encoding\n") % (encoding.encoding))
+
+def wrapperforlistdir(func, args, kwds):
+ # Ensure 'path' argument ends with os.sep to avoids
+ # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
+ if args:
+ args = list(args)
+ args[0] = appendsep(args[0])
+ if kwds.has_key('path'):
+ kwds['path'] = appendsep(kwds['path'])
+ return func(*args, **kwds)
+
+def wrapname(name, wrapper):
+ module, name = name.rsplit('.', 1)
+ module = sys.modules[module]
+ func = getattr(module, name)
+ def f(*args, **kwds):
+ return wrapper(func, args, kwds)
+ try:
+ f.__name__ = func.__name__ # fail with python23
+ except Exception:
+ pass
+ setattr(module, name, f)
+
+# List of functions to be wrapped.
+# NOTE: os.path.dirname() and os.path.basename() are safe because
+# they use result of os.path.split()
+funcs = '''os.path.join os.path.split os.path.splitext
+ os.path.splitunc os.path.normpath os.path.normcase os.makedirs
+ mercurial.util.endswithsep mercurial.util.splitpath mercurial.util.checkcase
+ mercurial.util.fspath mercurial.windows.pconvert'''
+
+# codec and alias names of sjis and big5 to be faked.
+problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs
+ hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
+ sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
+ shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
+
+def reposetup(ui, repo):
+ # TODO: decide use of config section for this extension
+ if not os.path.supports_unicode_filenames:
+ ui.warn(_("[win32mbcs] cannot activate on this platform.\n"))
+ return
+
+ # fake is only for relevant environment.
+ if encoding.encoding.lower() in problematic_encodings.split():
+ for f in funcs.split():
+ wrapname(f, wrapper)
+ wrapname("mercurial.osutil.listdir", wrapperforlistdir)
+ ui.debug(_("[win32mbcs] activated with encoding: %s\n")
+ % encoding.encoding)
+