diff options
author | Jacob Moody <moody@posixcafe.org> | 2023-03-26 01:02:20 +0000 |
---|---|---|
committer | Jacob Moody <moody@posixcafe.org> | 2023-03-26 01:02:20 +0000 |
commit | 04759ec9af6dcc78ea5873ceaf6db2e3b3920b22 (patch) | |
tree | 0e20684eee0f36fff952c46bc14828f1005dfb31 /sys/src/libc/test | |
parent | 2163aebcb85e8214869a2c026b3fc5bd9ddac22c (diff) |
runecomp(2)
Diffstat (limited to 'sys/src/libc/test')
-rw-r--r-- | sys/src/libc/test/mkfile | 8 | ||||
-rw-r--r-- | sys/src/libc/test/runebreak.c | 112 | ||||
-rw-r--r-- | sys/src/libc/test/runenorm.c | 92 |
3 files changed, 212 insertions, 0 deletions
diff --git a/sys/src/libc/test/mkfile b/sys/src/libc/test/mkfile index b6b5cce33..2bbc1fa13 100644 --- a/sys/src/libc/test/mkfile +++ b/sys/src/libc/test/mkfile @@ -3,6 +3,14 @@ TEST=\ date\ pow\ + runebreak\ + runenorm\ strchr\ </sys/src/cmd/mktest + +/lib/ucd/%: + cd /lib/ucd && mk $stem + +runebreak.test: /lib/ucd/GraphemeBreakTest.txt /lib/ucd/WordBreakTest.txt +runenorm.test: /lib/ucd/NormalizationTest.txt diff --git a/sys/src/libc/test/runebreak.c b/sys/src/libc/test/runebreak.c new file mode 100644 index 000000000..c9cca51f4 --- /dev/null +++ b/sys/src/libc/test/runebreak.c @@ -0,0 +1,112 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> + +static int +estrtoul(char *s) +{ + char *epr; + Rune code; + + code = strtoul(s, &epr, 16); + if(s == epr) + sysfatal("bad code point hex string"); + return code; +} + +static Rune* +check(Rune *r, Rune* (*fn)(Rune*), char* (*fn2)(char*)) +{ + Rune *r2, *tmp; + char *p, *p2; + + p = smprint("%S", r); + r2 = fn(r); + p2 = fn2(p); + + tmp = runesmprint("%.*s", (int)(p2-p), p); + if(memcmp(r, tmp, r2-r) != 0) + print("utf mismstach\n"); + + free(p); + free(tmp); + return r2; +} + +static void +run(char *file, Rune* (*fn)(Rune*), char* (*fn2)(char*)) +{ + Biobuf *b; + char *p, *dot; + char *pieces[16]; + int i, j, n; + Rune stack[16], ops[16]; + int nstack, nops; + Rune r, *rp, *rp2; + char *line; + + b = Bopen(file, OREAD); + if(b == nil) + sysfatal("could not load composition exclusions: %r"); + + for(;(p = Brdline(b, '\n')) != nil; free(line)){ + p[Blinelen(b)-1] = 0; + line = strdup(p); + if(p[0] == 0 || p[0] == '#') + continue; + if((dot = strstr(p, "#")) != nil) + *dot = 0; + n = getfields(p, pieces, nelem(pieces), 0, " "); + nstack = nops = 0; + for(i = 0; i < n; i++){ + chartorune(&r, pieces[i]); + if(r != L'÷' && r != L'×'){ + r = estrtoul(pieces[i]); + stack[nstack++] = r; + stack[nstack] = 0; + } else { + ops[nops++] = r; + ops[nops] = 0; + } + } + + rp = stack; + for(i = 1; i < nops-1;){ + rp2 = check(rp, fn, fn2); + switch(ops[i]){ + case L'÷': + if(rp2 != rp+1){ + print("break fail %X %X || %s\n", rp[0], rp[1], line); + goto Break; + } + rp++; + i++; + break; + case L'×': + if(rp2 - rp == 0){ + for(j = i; j < nops - 1; j++) + if(ops[j] != L'×') + print("skipped %d %d %s\n", i, nops, line); + goto Break; + } + for(; rp < (rp2-1); rp++, i++){ + if(ops[i] != L'×') + print("skipped %d %d %s\n", i, nops, line); + } + rp = rp2; + i++; + break; + } + } +Break: + ; + } +} + +void +main(int, char) +{ + run("/lib/ucd/GraphemeBreakTest.txt", runegbreak, utfgbreak); + run("/lib/ucd/WordBreakTest.txt", runewbreak, utfwbreak); + exits(nil); +} diff --git a/sys/src/libc/test/runenorm.c b/sys/src/libc/test/runenorm.c new file mode 100644 index 000000000..fb4168e28 --- /dev/null +++ b/sys/src/libc/test/runenorm.c @@ -0,0 +1,92 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> + +static int +estrtoul(char *s) +{ + char *epr; + Rune code; + + code = strtoul(s, &epr, 16); + if(s == epr) + sysfatal("bad code point hex string"); + return code; +} + +void +main(int, char) +{ + Rune buffer1[64]; + Rune buffer2[64]; + char utfbuff1[128]; + char utfbuff2[128]; + char srctmp[128], tmp1[128], tmp2[128]; + char *fields[10]; + char *runes[32]; + char *p; + int n, n2; + int i; + uint fail; + Biobuf *b; + + b = Bopen("/lib/ucd/NormalizationTest.txt", OREAD); + if(b == nil) + sysfatal("could not load composition exclusions: %r"); + + struct { + Rune src[32]; + Rune nfc[32]; + Rune nfd[32]; + } test; + while((p = Brdline(b, '\n')) != nil){ + p[Blinelen(b)-1] = 0; + if(p[0] == 0 || p[0] == '#' || p[0] == '@') + continue; + getfields(p, fields, 6 + 1, 0, ";"); + n = getfields(fields[0], runes, nelem(runes), 0, " "); + for(i = 0; i < n; i++) + test.src[i] = estrtoul(runes[i]); + test.src[i] = 0; + + n = getfields(fields[1], runes, nelem(runes), 0, " "); + for(i = 0; i < n; i++) + test.nfc[i] = estrtoul(runes[i]); + test.nfc[i] = 0; + + n = getfields(fields[2], runes, nelem(runes), 0, " "); + for(i = 0; i < n; i++) + test.nfd[i] = estrtoul(runes[i]); + test.nfd[i] = 0; + + n = runecomp(buffer1, test.src, nelem(buffer1)); + n2 = runedecomp(buffer2, test.src, nelem(buffer2)); + fail = 0; + + if(runestrcmp(buffer1, test.nfc) != 0) + fail |= 1<<0; + if(runestrcmp(buffer2, test.nfd) != 0) + fail |= 1<<1; + if(fail) + print("%d %d %S %S %S %S %S\n", fail, i, test.src, test.nfd, test.nfc, buffer2, buffer1); + assert(n == runestrlen(test.nfc)); + assert(n2 == runestrlen(test.nfd)); + + snprint(srctmp, sizeof tmp1, "%S", test.src); + snprint(tmp1, sizeof tmp1, "%S", test.nfc); + snprint(tmp2, sizeof tmp2, "%S", test.nfd); + + n = utfcomp(utfbuff1, srctmp, nelem(utfbuff1)); + n2 = utfdecomp(utfbuff2, srctmp, nelem(utfbuff2)); + + if(strcmp(utfbuff1, tmp1) != 0) + fail |= 1<<2; + if(strcmp(utfbuff2, tmp2) != 0) + fail |= 1<<3; + if(fail) + print("%d %d %s %s %s %s %s\n", fail, i, srctmp, tmp2, tmp1, utfbuff2, utfbuff1); + assert(n == strlen(tmp1)); + assert(n2 == strlen(tmp2)); + } + exits(nil); +} |