From 744475a503e3af4597a4a038c3686c25abb48ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigrid=20Solveig=20Hafl=C3=ADnud=C3=B3ttir?= Date: Sat, 5 Nov 2022 23:41:43 +0000 Subject: read: add -r to read runes instead of bytes (thanks umbraticus) --- sys/man/1/cat | 23 ++++++++++------------ sys/src/cmd/read.c | 57 +++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 58 insertions(+), 22 deletions(-) (limited to 'sys') diff --git a/sys/man/1/cat b/sys/man/1/cat index 778cb5b42..3c14ee67d 100644 --- a/sys/man/1/cat +++ b/sys/man/1/cat @@ -10,12 +10,15 @@ cat, read \- catenate files .B read [ .B -m -] [ +| .B -n .I nlines -] [ +| .B -c .I nbytes +| +.B -r +.I nrunes ] [ .I file ... ] @@ -59,20 +62,14 @@ flag causes it to continue reading and writing multiple lines until end of file; causes it to read no more than .I nlines lines. -.PP -With the +The .B -c -flag, -.I read -copies exactly -.I nbytes -of characters instead of lines. It is mutually exclusive with -.B -n and -.B -m -flag. +.B -r +flags specify a number of bytes or runes to read instead of lines. .PP -.I Read +When reading lines, +.I read always executes a single .B write for each line of input, which can be helpful when diff --git a/sys/src/cmd/read.c b/sys/src/cmd/read.c index 6377fdc1d..e71a36d4f 100644 --- a/sys/src/cmd/read.c +++ b/sys/src/cmd/read.c @@ -2,8 +2,7 @@ #include int multi; -int nlines; -vlong nchars; +vlong count; char *status = nil; int @@ -50,7 +49,7 @@ lines(int fd, char *file) do{ if(line(fd, file) == 0) break; - }while(multi || --nlines>0); + }while(multi || --count > 0); } void @@ -60,10 +59,10 @@ chars(int fd, char *file) vlong m; int n; - for(m = 0; m < nchars; m += n){ + for(m = 0; m < count; m += n){ n = sizeof(buf); - if(n > (nchars - m)) - n = nchars - m; + if(n > (count - m)) + n = count - m; if((n = read(fd, buf, n)) < 0){ fprint(2, "read: error reading %s: %r\n", file); exits("read error"); @@ -77,10 +76,46 @@ chars(int fd, char *file) } } +void +runes(int fd, char *file) +{ + char buf[8*1024], *s, *e; + Rune r; + + while(count > 0){ + e = buf + read(fd, buf, count + UTFmax < sizeof buf ? count : sizeof buf - UTFmax); + if(e < buf){ + fprint(2, "read: error reading %s: %r\n", file); + exits("read error"); + } + if(e == buf) + break; + for(s = buf; s < e && fullrune(s, e - s); s += chartorune(&r, s)) + count--; + if(s < e){ + while(!fullrune(s, e - s)) + switch(read(fd, e, 1)){ + case -1: + fprint(2, "read: error reading %s: %r\n", file); + exits("read error"); + case 0: + fprint(2, "warning: partial rune at end of %s: %r\n", file); + write(1, buf, e - buf); + return; + case 1: + e++; + break; + } + count--; + } + write(1, buf, e - buf); + } +} + void usage(void) { - fprint(2, "usage: read [-m] [-n nlines] [-c nbytes] [files...]\n"); + fprint(2, "usage: read [ -m | -n nlines | -c nbytes | -r nrunes ] [ file ... ]\n"); exits("usage"); } @@ -93,11 +128,15 @@ main(int argc, char *argv[]) proc = lines; ARGBEGIN{ case 'c': - nchars = atoll(EARGF(usage())); + count = atoll(EARGF(usage())); proc = chars; break; + case 'r': + count = atoll(EARGF(usage())); + proc = runes; + break; case 'n': - nlines = atoi(EARGF(usage())); + count = atoi(EARGF(usage())); break; case 'm': multi = 1; -- cgit v1.2.3