summaryrefslogtreecommitdiff
path: root/sys/src/libc/test/runebreak.c
blob: c9cca51f47d8dc99fd8b34def298160b64bf7ff8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#include <u.h>
#include <libc.h>
#include <bio.h>

static int
estrtoul(char *s)
{
	char *epr;
	Rune code;

	code = strtoul(s, &epr, 16);
	if(s == epr)
		sysfatal("bad code point hex string");
	return code;
}

static Rune*
check(Rune *r, Rune* (*fn)(Rune*), char* (*fn2)(char*))
{
	Rune *r2, *tmp;
	char *p, *p2;

	p = smprint("%S", r);
	r2 = fn(r);
	p2 = fn2(p);

	tmp = runesmprint("%.*s", (int)(p2-p), p);
	if(memcmp(r, tmp, r2-r) != 0)
		print("utf mismstach\n");
	
	free(p);
	free(tmp);
	return r2;
}

static void
run(char *file, Rune* (*fn)(Rune*), char* (*fn2)(char*))
{
	Biobuf *b;
	char *p, *dot;
	char *pieces[16];
	int i, j, n;
	Rune stack[16], ops[16];
	int nstack, nops;
	Rune r, *rp, *rp2;
	char *line;

	b = Bopen(file, OREAD);
	if(b == nil)
		sysfatal("could not load composition exclusions: %r");

	for(;(p = Brdline(b, '\n')) != nil; free(line)){
		p[Blinelen(b)-1] = 0;
		line = strdup(p);
		if(p[0] == 0 || p[0] == '#')
			continue;
		if((dot = strstr(p, "#")) != nil)
			*dot = 0;
		n = getfields(p, pieces, nelem(pieces), 0, " ");
		nstack = nops = 0;
		for(i = 0; i < n; i++){
			chartorune(&r, pieces[i]);
			if(r != L'÷' && r != L'×'){
				r = estrtoul(pieces[i]);
				stack[nstack++] = r;
				stack[nstack] = 0;
			} else {
				ops[nops++] = r;
				ops[nops] = 0;
			}
		}

		rp = stack;
		for(i = 1; i < nops-1;){
			rp2 = check(rp, fn, fn2);
			switch(ops[i]){
			case L'÷':
				if(rp2 != rp+1){
					print("break fail %X %X || %s\n", rp[0], rp[1], line);
					goto Break;
				}
				rp++;
				i++;
				break;
			case L'×':
				if(rp2 - rp == 0){
					for(j = i; j < nops - 1; j++)
						if(ops[j] !=  L'×')
							print("skipped %d %d %s\n", i, nops, line);
					goto Break;
				}
				for(; rp < (rp2-1); rp++, i++){
					if(ops[i] != L'×')
						print("skipped %d %d %s\n", i, nops, line);
				}
				rp = rp2;
				i++;
				break;
			}
		}
Break:
		;
	}
}

void
main(int, char)
{
	run("/lib/ucd/GraphemeBreakTest.txt", runegbreak, utfgbreak);
	run("/lib/ucd/WordBreakTest.txt", runewbreak, utfwbreak);
	exits(nil);
}