1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
#include <u.h>
#include <libc.h>
#include <bio.h>
static int
estrtoul(char *s)
{
char *epr;
Rune code;
code = strtoul(s, &epr, 16);
if(s == epr)
sysfatal("bad code point hex string");
return code;
}
static Rune*
check(Rune *r, Rune* (*fn)(Rune*), char* (*fn2)(char*))
{
Rune *r2, *tmp;
char *p, *p2;
p = smprint("%S", r);
r2 = fn(r);
p2 = fn2(p);
tmp = runesmprint("%.*s", (int)(p2-p), p);
if(memcmp(r, tmp, r2-r) != 0)
print("utf mismstach\n");
free(p);
free(tmp);
return r2;
}
static void
run(char *file, Rune* (*fn)(Rune*), char* (*fn2)(char*))
{
Biobuf *b;
char *p, *dot;
char *pieces[16];
int i, j, n;
Rune stack[16], ops[16];
int nstack, nops;
Rune r, *rp, *rp2;
char *line;
b = Bopen(file, OREAD);
if(b == nil)
sysfatal("could not load composition exclusions: %r");
for(;(p = Brdline(b, '\n')) != nil; free(line)){
p[Blinelen(b)-1] = 0;
line = strdup(p);
if(p[0] == 0 || p[0] == '#')
continue;
if((dot = strstr(p, "#")) != nil)
*dot = 0;
n = getfields(p, pieces, nelem(pieces), 0, " ");
nstack = nops = 0;
for(i = 0; i < n; i++){
chartorune(&r, pieces[i]);
if(r != L'÷' && r != L'×'){
r = estrtoul(pieces[i]);
stack[nstack++] = r;
stack[nstack] = 0;
} else {
ops[nops++] = r;
ops[nops] = 0;
}
}
rp = stack;
for(i = 1; i < nops-1;){
rp2 = check(rp, fn, fn2);
switch(ops[i]){
case L'÷':
if(rp2 != rp+1){
print("break fail %X %X || %s\n", rp[0], rp[1], line);
goto Break;
}
rp++;
i++;
break;
case L'×':
if(rp2 - rp == 0){
for(j = i; j < nops - 1; j++)
if(ops[j] != L'×')
print("skipped %d %d %s\n", i, nops, line);
goto Break;
}
for(; rp < (rp2-1); rp++, i++){
if(ops[i] != L'×')
print("skipped %d %d %s\n", i, nops, line);
}
rp = rp2;
i++;
break;
}
}
Break:
;
}
}
void
main(int, char)
{
run("/lib/ucd/GraphemeBreakTest.txt", runegbreak, utfgbreak);
run("/lib/ucd/WordBreakTest.txt", runewbreak, utfwbreak);
exits(nil);
}
|