diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/lib/python/test/test_normalization.py | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/lib/python/test/test_normalization.py')
-rw-r--r-- | sys/lib/python/test/test_normalization.py | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/sys/lib/python/test/test_normalization.py b/sys/lib/python/test/test_normalization.py new file mode 100644 index 000000000..81bdfbd96 --- /dev/null +++ b/sys/lib/python/test/test_normalization.py @@ -0,0 +1,88 @@ +from test.test_support import (verbose, TestFailed, TestSkipped, verify, + open_urlresource) +import sys +import os +from unicodedata import normalize + +TESTDATAFILE = "NormalizationTest" + os.extsep + "txt" +TESTDATAURL = "http://www.unicode.org/Public/4.1.0/ucd/" + TESTDATAFILE + +class RangeError(Exception): + pass + +def NFC(str): + return normalize("NFC", str) + +def NFKC(str): + return normalize("NFKC", str) + +def NFD(str): + return normalize("NFD", str) + +def NFKD(str): + return normalize("NFKD", str) + +def unistr(data): + data = [int(x, 16) for x in data.split(" ")] + for x in data: + if x > sys.maxunicode: + raise RangeError + return u"".join([unichr(x) for x in data]) + +def test_main(): + part1_data = {} + for line in open_urlresource(TESTDATAURL): + if '#' in line: + line = line.split('#')[0] + line = line.strip() + if not line: + continue + if line.startswith("@Part"): + part = line.split()[0] + continue + if part == "@Part3": + # XXX we don't support PRI #29 yet, so skip these tests for now + continue + try: + c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] + except RangeError: + # Skip unsupported characters; + # try atleast adding c1 if we are in part1 + if part == "@Part1": + try: + c1=unistr(line.split(';')[0]) + except RangeError: + pass + else: + part1_data[c1] = 1 + continue + + if verbose: + print line + + # Perform tests + verify(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) + verify(c4 == NFC(c4) == NFC(c5), line) + verify(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) + verify(c5 == NFD(c4) == NFD(c5), line) + verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5), + line) + verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5), + line) + + # Record part 1 data + if part == "@Part1": + part1_data[c1] = 1 + + # Perform tests for all other data + for c in range(sys.maxunicode+1): + X = unichr(c) + if X in part1_data: + continue + assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c + + # Check for bug 834676 + normalize('NFC',u'\ud55c\uae00') + +if __name__ == "__main__": + test_main() |