diff options
author | Jacob Moody <moody@posixcafe.org> | 2023-03-27 03:45:32 +0000 |
---|---|---|
committer | Jacob Moody <moody@posixcafe.org> | 2023-03-27 03:45:32 +0000 |
commit | d2753b4d5f877b14426b55554945863364b0fbbf (patch) | |
tree | e9e4adb115c54c6ee14062d7a109fd9e1e49f72a /sys | |
parent | e0d114547c7f54ebd32b69f922e7d6538a63636e (diff) |
tcs: add nfc and nfd output formats
Diffstat (limited to 'sys')
-rw-r--r-- | sys/man/1/tcs | 6 | ||||
-rw-r--r-- | sys/src/cmd/tcs/hdr.h | 2 | ||||
-rw-r--r-- | sys/src/cmd/tcs/tcs.c | 4 | ||||
-rw-r--r-- | sys/src/cmd/tcs/utf.c | 40 |
4 files changed, 52 insertions, 0 deletions
diff --git a/sys/man/1/tcs b/sys/man/1/tcs index 664073d36..e13b21c0e 100644 --- a/sys/man/1/tcs +++ b/sys/man/1/tcs @@ -144,6 +144,12 @@ IBM PC: CP 437 .TP .B atari Atari-ST character set +.TP +.B nfd +Unicode Normalization Form D +.TP +.B nfc +Unicode Normalization Form C .SH EXAMPLES .TP .B tcs -f 8859-1 diff --git a/sys/src/cmd/tcs/hdr.h b/sys/src/cmd/tcs/hdr.h index 35ec19993..62d70b790 100644 --- a/sys/src/cmd/tcs/hdr.h +++ b/sys/src/cmd/tcs/hdr.h @@ -23,6 +23,8 @@ int fixsurrogate(Rune *rp, Rune r2); void utf_in(int, long *, struct convert *); void utf_out(Rune *, int, long *); +void utfnfc_out(Rune *, int, long *); +void utfnfd_out(Rune *, int, long *); void isoutf_in(int, long *, struct convert *); void isoutf_out(Rune *, int, long *); diff --git a/sys/src/cmd/tcs/tcs.c b/sys/src/cmd/tcs/tcs.c index e3ed46705..229059574 100644 --- a/sys/src/cmd/tcs/tcs.c +++ b/sys/src/cmd/tcs/tcs.c @@ -613,6 +613,10 @@ struct convert convert[] = { "utf-16be", "alias for unicode-be (MIME)", Func, 0, (Fnptr)unicode_out_be }, { "utf-16le", "alias for unicode-le (MIME)", From|Func, 0, (Fnptr)unicode_in_le }, { "utf-16le", "alias for unicode-le (MIME)", Func, 0, (Fnptr)unicode_out_le }, + { "nfc", "Unicode Normalization Form C", From|Func, 0, (Fnptr)utf_in }, + { "nfc", "Unicode Normalization Form C", Func, 0, (Fnptr)utfnfc_out }, + { "nfd", "Unicode Normalization Form D", From|Func, 0, (Fnptr)utf_in }, + { "nfd", "Unicode Normalization Form D", Func, 0, (Fnptr)utfnfd_out }, { "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 }, { "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 }, { "vscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii }, diff --git a/sys/src/cmd/tcs/utf.c b/sys/src/cmd/tcs/utf.c index 16438daf5..40ef65b2d 100644 --- a/sys/src/cmd/tcs/utf.c +++ b/sys/src/cmd/tcs/utf.c @@ -69,6 +69,46 @@ utf_out(Rune *base, int n, long *) } void +utfnorm_out(Rune *base, int n, int (*fn)(Rune*,Rune*,int)) +{ + static Rune rbuf[32]; + static int nremain = 0; + Rune src[N + 1 + nelem(rbuf)]; + Rune dst[N + 1 + nelem(rbuf)]; + Rune *p, *p2, *e; + int i; + + e = base+n; + for(i = 0; i < nremain; i++,n++) + src[i] = rbuf[i]; + nremain = 0; + for(p2 = p = base; n > 0;){ + p2 = fullrunenorm(p, n); + if(p == p2) + break; + n -= p2-p; + for(;p < p2; p++) + src[i++] = *p; + } + src[i] = 0; + utf_out(dst, fn(dst, src, sizeof dst), nil); + for(; p2 < e; p2++) + rbuf[nremain++] = *p2; +} + +void +utfnfc_out(Rune *base, int n, long *) +{ + utfnorm_out(base, n, runecomp); +} + +void +utfnfd_out(Rune *base, int n, long *) +{ + utfnorm_out(base, n, runedecomp); +} + +void isoutf_in(int fd, long *, struct convert *out) { char buf[N]; |