summaryrefslogtreecommitdiff
path: root/sys/src/cmd/tcs/tcs.c
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@felloff.net>2014-05-11 00:54:59 +0200
committercinap_lenrek <cinap_lenrek@felloff.net>2014-05-11 00:54:59 +0200
commitedca217bb99f7c32413c117239d12acdc223e811 (patch)
treec77aa8a8494ffe8784bf3b4e264579a50c2c4233 /sys/src/cmd/tcs/tcs.c
parent7388792a124756a528666cb5c375ee919db9ca11 (diff)
tcs: handle surrogate pairs
Diffstat (limited to 'sys/src/cmd/tcs/tcs.c')
-rw-r--r--sys/src/cmd/tcs/tcs.c142
1 files changed, 97 insertions, 45 deletions
diff --git a/sys/src/cmd/tcs/tcs.c b/sys/src/cmd/tcs/tcs.c
index 2a8b5b353..7732a03b5 100644
--- a/sys/src/cmd/tcs/tcs.c
+++ b/sys/src/cmd/tcs/tcs.c
@@ -73,7 +73,6 @@ main(int argc, char **argv)
break;
} ARGEND
- USED(argc);
if(verbose)
squawk = 1;
if(listem){
@@ -214,49 +213,63 @@ Again:
}
void
-unicode_in_be(int fd, long *notused, struct convert *out)
+unicode_in_be(int fd, long *, struct convert *out)
{
- int i, n;
- Rune buf[N], r;
- uchar *p;
+ uchar buf[2*N], *p, *e;
+ Rune *r, r2;
+ int n;
- USED(notused);
- while((n = cread(fd, (char *)buf, 2*N, 2)) > 0){
- /* go backwards as sizeof(Rune) >= 2 */
- p = (uchar*)buf + n;
+ r2 = 0;
+ while((n = cread(fd, (char*)buf, 2*N, 2)) > 0){
ninput += n;
- n /= 2;
- for(i=n-1; i>=0; i--){
- r = *(--p);
- r |= *(--p) << 8;
- buf[i] = r;
+ p = buf;
+ e = buf + n;
+ r = runes;
+ while(p < e){
+ *r = *p++ << 8;
+ *r |= *p++;
+ if(fixsurrogate(r, r2)){
+ r2 = *r;
+ continue;
+ }
+ r2 = 0;
+ r++;
+ }
+ if(r > runes){
+ OUT(out, runes, r-runes);
}
- OUT(out, buf, n);
}
- OUT(out, buf, 0);
+ OUT(out, runes, 0);
}
void
-unicode_in_le(int fd, long *notused, struct convert *out)
+unicode_in_le(int fd, long *, struct convert *out)
{
- int i, n;
- Rune buf[N], r;
- uchar *p;
+ uchar buf[2*N], *p, *e;
+ Rune *r, r2;
+ int n;
- USED(notused);
- while((n = cread(fd, (char *)buf, 2*N, 2)) > 0){
- /* go backwards as sizeof(Rune) >= 2 */
- p = (uchar*)buf + n;
+ r2 = 0;
+ while((n = cread(fd, (char*)buf, 2*N, 2)) > 0){
ninput += n;
- n /= 2;
- for(i=n-1; i>=0; i--){
- r = *(--p) << 8;
- r |= *(--p);
- buf[i] = r;
+ p = buf;
+ e = buf + n;
+ r = runes;
+ while(p < e){
+ *r = *p++;
+ *r |= *p++ << 8;
+ if(fixsurrogate(r, r2)){
+ r2 = *r;
+ continue;
+ }
+ r2 = 0;
+ r++;
+ }
+ if(r > runes){
+ OUT(out, runes, r-runes);
}
- OUT(out, buf, n);
}
- OUT(out, buf, 0);
+ OUT(out, runes, 0);
}
void
@@ -284,41 +297,57 @@ unicode_in(int fd, long *notused, struct convert *out)
}
void
-unicode_out_be(Rune *base, int n, long *notused)
+unicode_out_be(Rune *base, int n, long *)
{
int i;
uchar *p;
- Rune r;
+ unsigned long r;
- USED(notused);
p = (uchar*)base;
for(i=0; i<n; i++){
r = base[i];
- *p++ = r>>8;
- *p++ = r;
+ if(r > 0xFFFF){
+ r -= 0x10000;
+ *p++ = ((r>>18)&3) + 0xD8;
+ *p++ = r>>10;
+ *p++ = ((r>>8)&3) + 0xDC;
+ *p++ = r;
+ } else {
+ *p++ = r>>8;
+ *p++ = r;
+ }
}
nrunes += n;
- noutput += 2*n;
- write(1, (char *)base, 2*n);
+ n = p - (uchar*)base;
+ noutput += n;
+ write(1, (char *)base, n);
}
void
-unicode_out_le(Rune *base, int n, long *notused)
+unicode_out_le(Rune *base, int n, long *)
{
int i;
uchar *p;
- Rune r;
+ unsigned long r;
- USED(notused);
p = (uchar*)base;
for(i=0; i<n; i++){
r = base[i];
- *p++ = r;
- *p++ = r>>8;
+ if(r > 0xFFFF){
+ r -= 0x10000;
+ *p++ = r>>10;
+ *p++ = ((r>>18)&3) + 0xD8;
+ *p++ = r;
+ *p++ = ((r>>8)&3) + 0xDC;
+ } else {
+ *p++ = r;
+ *p++ = r>>8;
+ }
}
nrunes += n;
- noutput += 2*n;
- write(1, (char *)base, 2*n);
+ n = p - (uchar*)base;
+ noutput += n;
+ write(1, (char *)base, n);
}
void
@@ -403,6 +432,29 @@ outtable(Rune *base, int n, long *map)
write(1, obuf, p-obuf);
}
+int
+fixsurrogate(Rune *rp, Rune r2)
+{
+ Rune r1;
+
+ r1 = *rp;
+ if(r1 >= 0xD800 && r1 <= 0xDBFF){
+ if(r2 >= 0xDC00 && r2 <= 0xDFFF){
+ *rp = 0x10000 + (((r1 - 0xD800)<<10) | (r2 - 0xDC00));
+ return 0;
+ }
+ return 1;
+ } else
+ if(r1 >= 0xDC00 && r1 <= 0xDFFF){
+ if(r2 >= 0xD800 && r2 <= 0xDBFF){
+ *rp = 0x10000 + (((r2 - 0xD800)<<10) | (r1 - 0xDC00));
+ return 0;
+ }
+ return 1;
+ }
+ return 0;
+}
+
long tabascii[256] =
{
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,