summaryrefslogtreecommitdiff
path: root/sys/src/libc/test
diff options
context:
space:
mode:
authorJacob Moody <moody@posixcafe.org>2023-03-26 01:02:20 +0000
committerJacob Moody <moody@posixcafe.org>2023-03-26 01:02:20 +0000
commit04759ec9af6dcc78ea5873ceaf6db2e3b3920b22 (patch)
tree0e20684eee0f36fff952c46bc14828f1005dfb31 /sys/src/libc/test
parent2163aebcb85e8214869a2c026b3fc5bd9ddac22c (diff)
runecomp(2)
Diffstat (limited to 'sys/src/libc/test')
-rw-r--r--sys/src/libc/test/mkfile8
-rw-r--r--sys/src/libc/test/runebreak.c112
-rw-r--r--sys/src/libc/test/runenorm.c92
3 files changed, 212 insertions, 0 deletions
diff --git a/sys/src/libc/test/mkfile b/sys/src/libc/test/mkfile
index b6b5cce33..2bbc1fa13 100644
--- a/sys/src/libc/test/mkfile
+++ b/sys/src/libc/test/mkfile
@@ -3,6 +3,14 @@
TEST=\
date\
pow\
+ runebreak\
+ runenorm\
strchr\
</sys/src/cmd/mktest
+
+/lib/ucd/%:
+ cd /lib/ucd && mk $stem
+
+runebreak.test: /lib/ucd/GraphemeBreakTest.txt /lib/ucd/WordBreakTest.txt
+runenorm.test: /lib/ucd/NormalizationTest.txt
diff --git a/sys/src/libc/test/runebreak.c b/sys/src/libc/test/runebreak.c
new file mode 100644
index 000000000..c9cca51f4
--- /dev/null
+++ b/sys/src/libc/test/runebreak.c
@@ -0,0 +1,112 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+static int
+estrtoul(char *s)
+{
+ char *epr;
+ Rune code;
+
+ code = strtoul(s, &epr, 16);
+ if(s == epr)
+ sysfatal("bad code point hex string");
+ return code;
+}
+
+static Rune*
+check(Rune *r, Rune* (*fn)(Rune*), char* (*fn2)(char*))
+{
+ Rune *r2, *tmp;
+ char *p, *p2;
+
+ p = smprint("%S", r);
+ r2 = fn(r);
+ p2 = fn2(p);
+
+ tmp = runesmprint("%.*s", (int)(p2-p), p);
+ if(memcmp(r, tmp, r2-r) != 0)
+ print("utf mismstach\n");
+
+ free(p);
+ free(tmp);
+ return r2;
+}
+
+static void
+run(char *file, Rune* (*fn)(Rune*), char* (*fn2)(char*))
+{
+ Biobuf *b;
+ char *p, *dot;
+ char *pieces[16];
+ int i, j, n;
+ Rune stack[16], ops[16];
+ int nstack, nops;
+ Rune r, *rp, *rp2;
+ char *line;
+
+ b = Bopen(file, OREAD);
+ if(b == nil)
+ sysfatal("could not load composition exclusions: %r");
+
+ for(;(p = Brdline(b, '\n')) != nil; free(line)){
+ p[Blinelen(b)-1] = 0;
+ line = strdup(p);
+ if(p[0] == 0 || p[0] == '#')
+ continue;
+ if((dot = strstr(p, "#")) != nil)
+ *dot = 0;
+ n = getfields(p, pieces, nelem(pieces), 0, " ");
+ nstack = nops = 0;
+ for(i = 0; i < n; i++){
+ chartorune(&r, pieces[i]);
+ if(r != L'÷' && r != L'×'){
+ r = estrtoul(pieces[i]);
+ stack[nstack++] = r;
+ stack[nstack] = 0;
+ } else {
+ ops[nops++] = r;
+ ops[nops] = 0;
+ }
+ }
+
+ rp = stack;
+ for(i = 1; i < nops-1;){
+ rp2 = check(rp, fn, fn2);
+ switch(ops[i]){
+ case L'÷':
+ if(rp2 != rp+1){
+ print("break fail %X %X || %s\n", rp[0], rp[1], line);
+ goto Break;
+ }
+ rp++;
+ i++;
+ break;
+ case L'×':
+ if(rp2 - rp == 0){
+ for(j = i; j < nops - 1; j++)
+ if(ops[j] != L'×')
+ print("skipped %d %d %s\n", i, nops, line);
+ goto Break;
+ }
+ for(; rp < (rp2-1); rp++, i++){
+ if(ops[i] != L'×')
+ print("skipped %d %d %s\n", i, nops, line);
+ }
+ rp = rp2;
+ i++;
+ break;
+ }
+ }
+Break:
+ ;
+ }
+}
+
+void
+main(int, char)
+{
+ run("/lib/ucd/GraphemeBreakTest.txt", runegbreak, utfgbreak);
+ run("/lib/ucd/WordBreakTest.txt", runewbreak, utfwbreak);
+ exits(nil);
+}
diff --git a/sys/src/libc/test/runenorm.c b/sys/src/libc/test/runenorm.c
new file mode 100644
index 000000000..fb4168e28
--- /dev/null
+++ b/sys/src/libc/test/runenorm.c
@@ -0,0 +1,92 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+static int
+estrtoul(char *s)
+{
+ char *epr;
+ Rune code;
+
+ code = strtoul(s, &epr, 16);
+ if(s == epr)
+ sysfatal("bad code point hex string");
+ return code;
+}
+
+void
+main(int, char)
+{
+ Rune buffer1[64];
+ Rune buffer2[64];
+ char utfbuff1[128];
+ char utfbuff2[128];
+ char srctmp[128], tmp1[128], tmp2[128];
+ char *fields[10];
+ char *runes[32];
+ char *p;
+ int n, n2;
+ int i;
+ uint fail;
+ Biobuf *b;
+
+ b = Bopen("/lib/ucd/NormalizationTest.txt", OREAD);
+ if(b == nil)
+ sysfatal("could not load composition exclusions: %r");
+
+ struct {
+ Rune src[32];
+ Rune nfc[32];
+ Rune nfd[32];
+ } test;
+ while((p = Brdline(b, '\n')) != nil){
+ p[Blinelen(b)-1] = 0;
+ if(p[0] == 0 || p[0] == '#' || p[0] == '@')
+ continue;
+ getfields(p, fields, 6 + 1, 0, ";");
+ n = getfields(fields[0], runes, nelem(runes), 0, " ");
+ for(i = 0; i < n; i++)
+ test.src[i] = estrtoul(runes[i]);
+ test.src[i] = 0;
+
+ n = getfields(fields[1], runes, nelem(runes), 0, " ");
+ for(i = 0; i < n; i++)
+ test.nfc[i] = estrtoul(runes[i]);
+ test.nfc[i] = 0;
+
+ n = getfields(fields[2], runes, nelem(runes), 0, " ");
+ for(i = 0; i < n; i++)
+ test.nfd[i] = estrtoul(runes[i]);
+ test.nfd[i] = 0;
+
+ n = runecomp(buffer1, test.src, nelem(buffer1));
+ n2 = runedecomp(buffer2, test.src, nelem(buffer2));
+ fail = 0;
+
+ if(runestrcmp(buffer1, test.nfc) != 0)
+ fail |= 1<<0;
+ if(runestrcmp(buffer2, test.nfd) != 0)
+ fail |= 1<<1;
+ if(fail)
+ print("%d %d %S %S %S %S %S\n", fail, i, test.src, test.nfd, test.nfc, buffer2, buffer1);
+ assert(n == runestrlen(test.nfc));
+ assert(n2 == runestrlen(test.nfd));
+
+ snprint(srctmp, sizeof tmp1, "%S", test.src);
+ snprint(tmp1, sizeof tmp1, "%S", test.nfc);
+ snprint(tmp2, sizeof tmp2, "%S", test.nfd);
+
+ n = utfcomp(utfbuff1, srctmp, nelem(utfbuff1));
+ n2 = utfdecomp(utfbuff2, srctmp, nelem(utfbuff2));
+
+ if(strcmp(utfbuff1, tmp1) != 0)
+ fail |= 1<<2;
+ if(strcmp(utfbuff2, tmp2) != 0)
+ fail |= 1<<3;
+ if(fail)
+ print("%d %d %s %s %s %s %s\n", fail, i, srctmp, tmp2, tmp1, utfbuff2, utfbuff1);
+ assert(n == strlen(tmp1));
+ assert(n2 == strlen(tmp2));
+ }
+ exits(nil);
+}