summaryrefslogtreecommitdiff
path: root/sys/src/cmd/rc
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@gmx.de>2013-04-24 20:13:18 +0200
committercinap_lenrek <cinap_lenrek@gmx.de>2013-04-24 20:13:18 +0200
commit667010554b30c46e35b9cad62edcfa01e37e1576 (patch)
tree418f828288c6c5c5ba0e6a18775af855966579f0 /sys/src/cmd/rc
parent78c7ba36a1a732c08fbb7e4f8b19d1bc825c5b7e (diff)
make all the commands agnostic about Rune width. (from sources)
Diffstat (limited to 'sys/src/cmd/rc')
-rw-r--r--sys/src/cmd/rc/glob.c35
-rw-r--r--sys/src/cmd/rc/lex.c20
-rw-r--r--sys/src/cmd/rc/rc.h6
3 files changed, 33 insertions, 28 deletions
diff --git a/sys/src/cmd/rc/glob.c b/sys/src/cmd/rc/glob.c
index 1c4983e40..295d7b6a6 100644
--- a/sys/src/cmd/rc/glob.c
+++ b/sys/src/cmd/rc/glob.c
@@ -118,18 +118,16 @@ glob(void *ap)
int
equtf(uchar *p, uchar *q)
{
+ Rune pr, qr;
+
if(*p!=*q)
- return 0;
- if(twobyte(*p)) return p[1]==q[1];
- if(threebyte(*p)){
- if(p[1]!=q[1])
- return 0;
- if(p[1]=='\0')
- return 1; /* broken code at end of string! */
- return p[2]==q[2];
- }
- return 1;
+ return 0;
+
+ chartorune(&pr, (char*)p);
+ chartorune(&qr, (char*)q);
+ return pr == qr;
}
+
/*
* Return a pointer to the next utf code in the string,
* not jumping past nuls in broken utf codes!
@@ -138,10 +136,11 @@ equtf(uchar *p, uchar *q)
uchar*
nextutf(uchar *p)
{
- if(twobyte(*p)) return p[1]=='\0'?p+1:p+2;
- if(threebyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p+3;
- return p+1;
+ Rune dummy;
+
+ return p + chartorune(&dummy, (char*)p);
}
+
/*
* Convert the utf code at *p to a unicode value
*/
@@ -149,14 +148,12 @@ nextutf(uchar *p)
int
unicode(uchar *p)
{
- int u = *p;
+ Rune r;
- if(twobyte(u))
- return ((u&0x1f)<<6)|(p[1]&0x3f);
- if(threebyte(u))
- return (u<<12)|((p[1]&0x3f)<<6)|(p[2]&0x3f);
- return u;
+ chartorune(&r, (char*)p);
+ return r;
}
+
/*
* Does the string s match the pattern p
* . and .. are only matched by patterns starting with .
diff --git a/sys/src/cmd/rc/lex.c b/sys/src/cmd/rc/lex.c
index 369348328..fecd0ec64 100644
--- a/sys/src/cmd/rc/lex.c
+++ b/sys/src/cmd/rc/lex.c
@@ -166,15 +166,25 @@ addtok(char *p, int val)
char*
addutf(char *p, int c)
{
- p = addtok(p, c);
- if(twobyte(c)) /* 2-byte escape */
- return addtok(p, advance());
- if(threebyte(c)){ /* 3-byte escape */
+ uchar b, m;
+ int i;
+
+ p = addtok(p, c); /* 1-byte UTF runes are special */
+ if(onebyte(c))
+ return p;
+
+ m = 0xc0;
+ b = 0x80;
+ for(i=1; i < UTFmax; i++){
+ if((c&m) == b)
+ break;
p = addtok(p, advance());
- return addtok(p, advance());
+ b = m;
+ m = (m >> 1)|0x80;
}
return p;
}
+
int lastdol; /* was the last token read '$' or '$#' or '"'? */
int lastword; /* was the last token read a word or compound word terminator? */
diff --git a/sys/src/cmd/rc/rc.h b/sys/src/cmd/rc/rc.h
index 242a9b5ea..2e1d9ae59 100644
--- a/sys/src/cmd/rc/rc.h
+++ b/sys/src/cmd/rc/rc.h
@@ -123,12 +123,10 @@ int mypid;
*/
#define GLOB ((char)0x01)
/*
- * onebyte(c), twobyte(c), threebyte(c)
- * Is c the first character of a one- two- or three-byte utf sequence?
+ * onebyte(c)
+ * Is c the first character of a one-byte utf sequence?
*/
#define onebyte(c) ((c&0x80)==0x00)
-#define twobyte(c) ((c&0xe0)==0xc0)
-#define threebyte(c) ((c&0xf0)==0xe0)
char **argp;
char **args;