diff options
author | Jacob Moody <moody@posixcafe.org> | 2023-03-26 01:02:20 +0000 |
---|---|---|
committer | Jacob Moody <moody@posixcafe.org> | 2023-03-26 01:02:20 +0000 |
commit | 04759ec9af6dcc78ea5873ceaf6db2e3b3920b22 (patch) | |
tree | 0e20684eee0f36fff952c46bc14828f1005dfb31 /sys/src/libc/port/runebreak.c | |
parent | 2163aebcb85e8214869a2c026b3fc5bd9ddac22c (diff) |
runecomp(2)
Diffstat (limited to 'sys/src/libc/port/runebreak.c')
-rw-r--r-- | sys/src/libc/port/runebreak.c | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/sys/src/libc/port/runebreak.c b/sys/src/libc/port/runebreak.c new file mode 100644 index 000000000..1d113b38f --- /dev/null +++ b/sys/src/libc/port/runebreak.c @@ -0,0 +1,293 @@ +#include <u.h> +#include <libc.h> + +#include "runebreakdata" + +enum { + OTHER, + Hebrew_Letter, Newline, Extend, Format, + Katakana, ALetter, MidLetter, MidNum, + MidNumLet, Numeric, ExtendNumLet, WSegSpace, + PREPEND = 0x10, CONTROL = 0x20, EXTEND = 0x30, REGION = 0x40, + L = 0x50, V = 0x60, T = 0x70, LV = 0x80, LVT = 0x90, SPACEMK = 0xA0, + EMOJIEX = 0xB0, + + ZWJ = 0x200DU, + LINETAB = 0xB, +}; + +#define IS(x, y) ((x&0xf) == y) +#define ISG(x, y) ((x&0xf0) == y) + +Rune* +runegbreak(Rune *s) +{ + Rune l, r; + uchar lt, rt; + Rune *p; + + p = s; + if((l = *p++) == 0) + return s; + if((r = *p) == 0) + return s; + lt = breaklkup(l); + rt = breaklkup(r); + if(l == '\r' && r == '\n') + goto Done; + if(ISG(lt, CONTROL) || l == '\r' || l == '\n') + return p; + if(ISG(rt, CONTROL) || r == '\r' || r == '\n') + return p; + if(ISG(lt, L) && (ISG(rt, L) || ISG(rt, V) || ISG(rt, LV) || ISG(rt, LVT))) + goto Done; + if((ISG(lt, LV) || ISG(lt, V)) && (ISG(rt, V) || ISG(rt, T))) + goto Done; + if((ISG(lt, LVT) || ISG(lt, T)) && (ISG(rt, T) || ISG(rt, T))) + goto Done; + if(ISG(rt, SPACEMK) || ISG(lt, PREPEND)) + goto Done; + if(ISG(lt, EMOJIEX) && (ISG(rt, EXTEND) || r == ZWJ)){ + while(ISG(rt, EXTEND)){ + p++; + if((r = *p) == 0) + return s; + rt = breaklkup(r); + } + if(r != ZWJ) + return p; + p++; + if((r = *p) == 0) + return s; + rt = breaklkup(r); + if(ISG(rt, EMOJIEX)) + goto Done; + return p; + } + if(ISG(rt, EXTEND) || r == ZWJ) + goto Done; + if(ISG(lt, REGION) && ISG(rt, REGION)) + goto Done; + + return p; + +Done: + if(p[1] == 0) + return s; + return p + 1; +} + +char* +utfgbreak(char *s) +{ + Rune l, r; + uchar lt, rt; + char *p; + + p = s; + p += chartorune(&l, p); + if(l == 0) + return s; + chartorune(&r, p); + if(r == 0) + return s; + lt = breaklkup(l); + rt = breaklkup(r); + if(l == '\r' && r == '\n') + goto Done; + if(ISG(lt, CONTROL) || l == '\r' || l == '\n') + return p; + if(ISG(rt, CONTROL) || r == '\r' || r == '\n') + return p; + if(ISG(lt, L) && (ISG(rt, L) || ISG(rt, V) || ISG(rt, LV) || ISG(rt, LVT))) + goto Done; + if((ISG(lt, LV) || ISG(lt, V)) && (ISG(rt, V) || ISG(rt, T))) + goto Done; + if((ISG(lt, LVT) || ISG(lt, T)) && (ISG(rt, T) || ISG(rt, T))) + goto Done; + if(ISG(rt, SPACEMK) || ISG(lt, PREPEND)) + goto Done; + if(ISG(lt, EMOJIEX) && (ISG(rt, EXTEND) || r == ZWJ)){ + while(ISG(rt, EXTEND)){ + p += chartorune(&r, p); + chartorune(&r, p); + if(r == 0) + return s; + rt = breaklkup(r); + } + if(r != ZWJ) + return p; + + p += chartorune(&r, p); + chartorune(&r, p); + if(r == 0) + return s; + rt = breaklkup(r); + if(ISG(rt, EMOJIEX)) + goto Done; + return p; + } + if(ISG(rt, EXTEND) || r == ZWJ) + goto Done; + if(ISG(lt, REGION) && ISG(rt, REGION)) + goto Done; + + return p; + +Done: + p += chartorune(&r, p); + chartorune(&r, p); + if(r == 0) + return s; + return p; +} + +#define AH(x) (IS(x, ALetter) || IS(x, Hebrew_Letter)) +#define MNLQ(x) (IS(x, MidNumLet) || x == '\'') + +Rune* +runewbreak(Rune *s) +{ + Rune l, r; + uchar lt, rt; + Rune *p; + + p = s; + if((l = *p++) == 0) + return s; + if((r = *p) == 0) + return s; + lt = breaklkup(l); + rt = breaklkup(r); + if(l == '\r' && r == '\n') + goto Done; + if(l == '\r' || l == '\n' || l == LINETAB) + return p; + if(r == '\r' || r == '\n' || l == LINETAB) + return p; + if(IS(lt, WSegSpace) && IS(rt, WSegSpace)) + goto Done; + if(IS(rt, Format) || IS(rt, Extend)) + goto Done; + if(AH(lt)){ + if(AH(rt)) + goto Done; + if((IS(rt, MidLetter) || MNLQ(rt)) && p[1] != 0 && AH(breaklkup(p[1]))) + goto Done; + if(IS(lt, Hebrew_Letter) && r == '\'') + goto Done; + if(IS(lt, Hebrew_Letter) && r == '"' && p[1] != 0 && IS(breaklkup(p[1]), Hebrew_Letter)) + goto Done; + if(IS(rt, Numeric)) + goto Done; + } + if(IS(lt, Numeric) && (AH(rt) || IS(rt, Numeric))) + goto Done; + if(IS(lt, Numeric) && (IS(rt, MidNum) || MNLQ(rt)) && p[1] != 0 && IS(breaklkup(p[1]), Numeric)) + goto Done; + if(IS(lt, Katakana) && IS(rt, Katakana)) + goto Done; + if(AH(lt) || IS(lt, Numeric) || IS(lt, Katakana) || IS(lt, ExtendNumLet)) + if(IS(rt, ExtendNumLet)) + goto Done; + if(IS(lt, ExtendNumLet) && (AH(rt) || IS(rt, Numeric) || IS(rt, Katakana))) + goto Done; + if(ISG(lt, REGION)){ + if(ISG(rt, REGION)) + goto Done; + if(r != ZWJ) + return p; + p++; + if((r = *p) == 0) + return s; + rt = breaklkup(r); + if(ISG(rt, REGION)) + goto Done; + } + + return p; + +Done: + if(p[1] == 0) + return s; + return p + 1; +} + +char* +utfwbreak(char *s) +{ + Rune l, r; + Rune peek; + uchar lt, rt; + char *p; + + p = s; + p += chartorune(&l, p); + if(l == 0) + return s; + chartorune(&peek, p+chartorune(&r, p)); + if(r == 0) + return s; + lt = breaklkup(l); + rt = breaklkup(r); + if(l == '\r' && r == '\n') + goto Done; + if(l == '\r' || l == '\n' || l == LINETAB) + return p; + if(r == '\r' || r == '\n' || l == LINETAB) + return p; + if(IS(lt, WSegSpace) && IS(rt, WSegSpace)) + goto Done; + if(IS(rt, Format) || IS(rt, Extend)) + goto Done; + if(AH(lt)){ + if(AH(rt)) + goto Done; + if(IS(rt, MidLetter) || MNLQ(rt)) + if(peek != 0 && AH(breaklkup(peek))) + goto Done; + + if(IS(lt, Hebrew_Letter) && r == '\'') + goto Done; + + if(IS(lt, Hebrew_Letter) && r == '"') + if(peek != 0 && IS(breaklkup(peek), Hebrew_Letter)) + goto Done; + + if(IS(rt, Numeric)) + goto Done; + } + if(IS(lt, Numeric) && (AH(rt) || IS(rt, Numeric))) + goto Done; + if(IS(lt, Numeric) && (IS(rt, MidNum) || MNLQ(rt)) && peek != 0 && IS(breaklkup(peek), Numeric)) + goto Done; + if(IS(lt, Katakana) && IS(rt, Katakana)) + goto Done; + if(AH(lt) || IS(lt, Numeric) || IS(lt, Katakana) || IS(lt, ExtendNumLet)) + if(IS(rt, ExtendNumLet)) + goto Done; + if(IS(lt, ExtendNumLet) && (AH(rt) || IS(rt, Numeric) || IS(rt, Katakana))) + goto Done; + if(ISG(lt, REGION)){ + if(ISG(rt, REGION)) + goto Done; + if(r != ZWJ) + return p; + p += chartorune(&r, p); + chartorune(&r, p); + if(r == 0) + return s; + rt = breaklkup(r); + if(ISG(rt, REGION)) + goto Done; + } + + return p; + +Done: + p += chartorune(&r, p); + chartorune(&r, p); + if(r == 0) + return s; + return p; +} |