diff options
author | cinap_lenrek <cinap_lenrek@gmx.de> | 2013-04-24 20:13:18 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@gmx.de> | 2013-04-24 20:13:18 +0200 |
commit | 667010554b30c46e35b9cad62edcfa01e37e1576 (patch) | |
tree | 418f828288c6c5c5ba0e6a18775af855966579f0 /sys/src/cmd/rc | |
parent | 78c7ba36a1a732c08fbb7e4f8b19d1bc825c5b7e (diff) |
make all the commands agnostic about Rune width. (from sources)
Diffstat (limited to 'sys/src/cmd/rc')
-rw-r--r-- | sys/src/cmd/rc/glob.c | 35 | ||||
-rw-r--r-- | sys/src/cmd/rc/lex.c | 20 | ||||
-rw-r--r-- | sys/src/cmd/rc/rc.h | 6 |
3 files changed, 33 insertions, 28 deletions
diff --git a/sys/src/cmd/rc/glob.c b/sys/src/cmd/rc/glob.c index 1c4983e40..295d7b6a6 100644 --- a/sys/src/cmd/rc/glob.c +++ b/sys/src/cmd/rc/glob.c @@ -118,18 +118,16 @@ glob(void *ap) int equtf(uchar *p, uchar *q) { + Rune pr, qr; + if(*p!=*q) - return 0; - if(twobyte(*p)) return p[1]==q[1]; - if(threebyte(*p)){ - if(p[1]!=q[1]) - return 0; - if(p[1]=='\0') - return 1; /* broken code at end of string! */ - return p[2]==q[2]; - } - return 1; + return 0; + + chartorune(&pr, (char*)p); + chartorune(&qr, (char*)q); + return pr == qr; } + /* * Return a pointer to the next utf code in the string, * not jumping past nuls in broken utf codes! @@ -138,10 +136,11 @@ equtf(uchar *p, uchar *q) uchar* nextutf(uchar *p) { - if(twobyte(*p)) return p[1]=='\0'?p+1:p+2; - if(threebyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p+3; - return p+1; + Rune dummy; + + return p + chartorune(&dummy, (char*)p); } + /* * Convert the utf code at *p to a unicode value */ @@ -149,14 +148,12 @@ nextutf(uchar *p) int unicode(uchar *p) { - int u = *p; + Rune r; - if(twobyte(u)) - return ((u&0x1f)<<6)|(p[1]&0x3f); - if(threebyte(u)) - return (u<<12)|((p[1]&0x3f)<<6)|(p[2]&0x3f); - return u; + chartorune(&r, (char*)p); + return r; } + /* * Does the string s match the pattern p * . and .. are only matched by patterns starting with . diff --git a/sys/src/cmd/rc/lex.c b/sys/src/cmd/rc/lex.c index 369348328..fecd0ec64 100644 --- a/sys/src/cmd/rc/lex.c +++ b/sys/src/cmd/rc/lex.c @@ -166,15 +166,25 @@ addtok(char *p, int val) char* addutf(char *p, int c) { - p = addtok(p, c); - if(twobyte(c)) /* 2-byte escape */ - return addtok(p, advance()); - if(threebyte(c)){ /* 3-byte escape */ + uchar b, m; + int i; + + p = addtok(p, c); /* 1-byte UTF runes are special */ + if(onebyte(c)) + return p; + + m = 0xc0; + b = 0x80; + for(i=1; i < UTFmax; i++){ + if((c&m) == b) + break; p = addtok(p, advance()); - return addtok(p, advance()); + b = m; + m = (m >> 1)|0x80; } return p; } + int lastdol; /* was the last token read '$' or '$#' or '"'? */ int lastword; /* was the last token read a word or compound word terminator? */ diff --git a/sys/src/cmd/rc/rc.h b/sys/src/cmd/rc/rc.h index 242a9b5ea..2e1d9ae59 100644 --- a/sys/src/cmd/rc/rc.h +++ b/sys/src/cmd/rc/rc.h @@ -123,12 +123,10 @@ int mypid; */ #define GLOB ((char)0x01) /* - * onebyte(c), twobyte(c), threebyte(c) - * Is c the first character of a one- two- or three-byte utf sequence? + * onebyte(c) + * Is c the first character of a one-byte utf sequence? */ #define onebyte(c) ((c&0x80)==0x00) -#define twobyte(c) ((c&0xe0)==0xc0) -#define threebyte(c) ((c&0xf0)==0xe0) char **argp; char **args; |