summaryrefslogtreecommitdiff
path: root/sys/src/libc/port/runenorm.c
diff options
context:
space:
mode:
authorJacob Moody <moody@posixcafe.org>2023-03-26 01:02:20 +0000
committerJacob Moody <moody@posixcafe.org>2023-03-26 01:02:20 +0000
commit04759ec9af6dcc78ea5873ceaf6db2e3b3920b22 (patch)
tree0e20684eee0f36fff952c46bc14828f1005dfb31 /sys/src/libc/port/runenorm.c
parent2163aebcb85e8214869a2c026b3fc5bd9ddac22c (diff)
runecomp(2)
Diffstat (limited to 'sys/src/libc/port/runenorm.c')
-rw-r--r--sys/src/libc/port/runenorm.c334
1 files changed, 334 insertions, 0 deletions
diff --git a/sys/src/libc/port/runenorm.c b/sys/src/libc/port/runenorm.c
new file mode 100644
index 000000000..942ea4d49
--- /dev/null
+++ b/sys/src/libc/port/runenorm.c
@@ -0,0 +1,334 @@
+#include <u.h>
+#include <libc.h>
+
+#include "runenormdata"
+
+//Unicode Standard: Section 3.12 Conjoining Jamo Behavior
+enum {
+ SBase = 0xAC00,
+ LBase = 0x1100,
+ VBase = 0x1161,
+ TBase = 0x11A7,
+
+ LCount = 19,
+ VCount = 21,
+ TCount = 28,
+ NCount = VCount * TCount,
+ SCount = LCount * NCount,
+
+ LLast = LBase + LCount - 1,
+ SLast = SBase + SCount - 1,
+ VLast = VBase + VCount - 1,
+ TLast = TBase + TCount - 1,
+};
+
+static void
+_runedecomp(Rune dst[2], Rune c)
+{
+ uint x;
+
+ if(c >= SBase && c <= SLast){
+ c -= SBase;
+ x = c % TCount;
+ if(x){
+ dst[0] = SBase + ((c / TCount) * TCount);
+ dst[1] = TBase + x;
+ return;
+ }
+ dst[0] = LBase + (c / NCount);
+ dst[1] = VBase + ((c % NCount) / TCount);
+ return;
+ }
+ x = decomplkup(c);
+ if((x & 0xFFFF) != 0){
+ dst[0] = x>>16;
+ dst[1] = x & 0xFFFF;
+ return;
+ }
+ x >>= 16;
+ if(x >= 0xEEEE && x <0xF8FF){
+ memmove(dst, _decompexceptions[x - 0xEEEE], sizeof(Rune)*2);
+ return;
+ }
+ dst[0] = x;
+ dst[1] = 0;
+}
+
+static Rune
+_runerecomp(Rune r[2])
+{
+ uint x, y, *p, next;
+
+ if(r[0] >= LBase && r[0] <= LLast){
+ if(r[1] < VBase || r[1] > VLast)
+ return 0;
+ x = (r[0] - LBase) * NCount + (r[1] - VBase) * TCount;
+ return SBase + x;
+ }
+ if(r[0] >= SBase && r[0] <= SLast && (r[0] - SBase) % TCount == 0){
+ if(r[1] > TBase && r[1] <= TLast)
+ return r[0] + (r[1] - TBase);
+ return 0;
+ }
+ if(r[0] > 0xFFFF || r[1] > 0xFFFF){
+ for(x = 0; x < nelem(_recompexceptions); x++)
+ if(r[0] == _recompexceptions[x][1] && r[1] == _recompexceptions[x][2])
+ return _recompexceptions[x][0];
+ return 0;
+ }
+ y = x = r[0]<<16 | r[1];
+ x ^= x >> 16;
+ x *= 0x21f0aaad;
+ x ^= x >> 15;
+ x *= 0xd35a2d97;
+ x ^= x >> 15;
+ p = _recompdata + (x%512)*2;
+ while(p[0] != y){
+ next = p[1]>>16;
+ if(!next)
+ return 0;
+ p = _recompcoll + (next-1)*2;
+ }
+ return p[1] & 0xFFFF;
+}
+
+static void
+runecccsort(Rune *a, int len)
+{
+ Rune r;
+ int i;
+ int fail;
+
+ do {
+ fail = 0;
+ for(i = 0; i < len - 1; i++){
+ if(ccclkup(a[i]) > ccclkup(a[i+1]) > 0){
+ r = a[i];
+ a[i] = a[i+1];
+ a[i + 1] = r;
+ fail = 1;
+ }
+ }
+ } while(fail);
+}
+
+char*
+fullutfnorm(char *s, int n)
+{
+ Rune r, peek;
+ char *p, *p2;
+
+ p = s;
+ if(fullrune(p, n) == 0)
+ return s;
+
+ p += chartorune(&r, p);
+ n -= (p - s);
+
+ if((r >= LBase && r <= LLast) || (r >= SBase && r <= SLast)){
+ do {
+ if(fullrune(p, n) == 0)
+ return s;
+ p2 = p + chartorune(&peek, p);
+ n -= (p2 - p);
+ p = p2;
+ } while(n > 0 && (peek >= VBase && peek <= VLast) || (peek > TBase && peek <= TLast));
+ if(n <= 0)
+ return s;
+ return p;
+ }
+
+ do {
+ if(fullrune(p, n) == 0)
+ return s;
+ p2 = p + chartorune(&peek, p);
+ n -= (p2 - p);
+ p = p2;
+ if(ccclkup(peek) == 0)
+ return p;
+ } while(n > 0);
+
+ return s;
+}
+
+Rune*
+fullrunenorm(Rune *r, int n)
+{
+ Rune *e, *p;
+
+ p = r;
+ e = p + n;
+
+ if((*p >= LBase && *p <= LLast) || (*p >= SBase && *p <= SLast)){
+ p++;
+ while(p < e && (*p >= VBase && *p <= VLast) || (*p > TBase && *p <= TLast))
+ p++;
+
+ if(p >= e)
+ return r;
+ return p;
+ }
+
+ for(; p < e && p + 1 < e; p++)
+ if(ccclkup(p[1]) == 0)
+ return p + 1;
+
+ return r;
+}
+
+static int
+runenorm(Rune *dst, Rune *src, char *sdst, char *ssrc, int max, int compose)
+{
+ Rune c, r[2], _stack[32];
+ Rune *p, *stack, *sp, *tp;
+ char *strp, *strstop;
+ Rune *rp, *rrp;
+ Rune *stop;
+ Rune peek;
+ int w, w2, size;
+ int mode;
+
+ if(src){
+ mode = 1;
+ p = src;
+ stop = dst + (max - 1);
+ strp = "";
+ strstop = nil;
+ } else {
+ mode = 0;
+ p = L"";
+ stop = nil;
+ strp = ssrc;
+ strstop = sdst + (max - 1);
+ }
+
+ stack = _stack + nelem(_stack)/2;
+ size = 0;
+ w = w2 = 0;
+ while(*strp || *p){
+ if(mode)
+ c = *p;
+ else
+ w = chartorune(&c, strp);
+
+ sp = stack - 1;
+ tp = stack;
+ _runedecomp(r, c);
+ while(r[0] != 0){
+ c = r[0];
+ if(r[1] != 0){
+ *sp-- = r[1];
+ if(sp == _stack)
+ break;
+ }
+ _runedecomp(r, c);
+ }
+
+ *sp = c;
+ if(mode)
+ peek = p[1];
+ else
+ w2 = chartorune(&peek, strp+w);
+
+ if((*sp >= LBase && *sp <= LLast) || (*sp >= SBase && *sp <= SLast)){
+ while(peek != 0 && (peek >= VBase && peek <= VLast) || (peek > TBase && peek <= TLast)){
+ *tp++ = peek;
+ if(mode){
+ p++;
+ peek = p[1];
+ } else {
+ strp += w;
+ w = w2;
+ w2 = chartorune(&peek, strp+w);
+ }
+ if(tp == _stack + nelem(_stack))
+ break;
+ }
+ }
+ while(peek != 0 && ccclkup(peek) != 0){
+ _runedecomp(r, peek);
+ if(r[1] != 0){
+ if(tp+1 >= _stack + nelem(_stack))
+ break;
+ *tp++ = r[0];
+ *tp++ = r[1];
+ } else if(r[0] != 0)
+ *tp++ = r[0];
+ else
+ *tp++ = peek;
+
+ if(mode){
+ p++;
+ peek = p[1];
+ } else {
+ strp += w;
+ w = w2;
+ w2 = chartorune(&peek, strp+w);
+ }
+ if(tp == _stack + nelem(_stack))
+ break;
+ }
+ runecccsort(sp, tp - sp);
+
+ if(compose && ccclkup(*sp) == 0){
+ for(rp = sp + 1; rp < tp; rp++){
+ r[0] = *sp;
+ r[1] = *rp;
+ c = _runerecomp(r);
+ if(c != 0){
+ *sp = c;
+ for(rrp = rp; rrp > sp; rrp--)
+ *rrp = rrp[-1];
+ sp++;
+ } else while(rp + 1 < tp && ccclkup(*rp) == ccclkup(*(rp+1)))
+ rp++;
+ }
+ }
+
+ for(; sp < tp; sp++){
+ if(mode){
+ if(dst < stop)
+ *dst++ = *sp;
+ size++;
+ } else {
+ w2 = runelen(*sp);
+ if(sdst+w2 < strstop)
+ sdst += runetochar(sdst, sp);
+ size += w2;
+ }
+ }
+ if(mode)
+ p++;
+ else
+ strp += w;
+ }
+ if(mode)
+ *dst = 0;
+ else
+ *sdst = 0;
+ return size;
+}
+
+int
+runecomp(Rune *dst, Rune *src, int max)
+{
+ return runenorm(dst, src, nil, nil, max, 1);
+}
+
+int
+runedecomp(Rune *dst, Rune *src, int max)
+{
+ return runenorm(dst, src, nil, nil, max, 0);
+}
+
+int
+utfcomp(char *dst, char *src, int max)
+{
+ return runenorm(nil, nil, dst, src, max, 1);
+}
+
+int
+utfdecomp(char *dst, char *src, int max)
+{
+ return runenorm(nil, nil, dst, src, max, 0);
+}