summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@gmx.de>2012-10-05 23:14:23 +0200
committercinap_lenrek <cinap_lenrek@gmx.de>2012-10-05 23:14:23 +0200
commit4af54089530023c4f8e704fbbb0921c13c7bd4fd (patch)
tree59ba7a55d4b865342cd65e5a783fe7a235fccfd7
parent0c93da13aeda2337f1e7a81f8793bca52887c741 (diff)
replace urlencode with c version that isnt broken for utf-8
-rwxr-xr-xrc/bin/urlencode36
-rw-r--r--sys/src/cmd/urlencode.c98
2 files changed, 98 insertions, 36 deletions
diff --git a/rc/bin/urlencode b/rc/bin/urlencode
deleted file mode 100755
index a23359328..000000000
--- a/rc/bin/urlencode
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/awk -f
-BEGIN {
-# We assume an awk implementation that is just plain dumb.
-# We will convert an character to its ASCII value with the
-# table ord[], and produce two-digit hexadecimal output
-# without the printf("%02X") feature.
-
-EOL = "%0A" # "end of line" string (encoded)
-split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
-hextab [0] = 0
-for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
-}
-{
-encoded = ""
-for ( i=1; i<=length ($0); ++i ) {
- c = substr ($0, i, 1)
- if ( c ~ /[a-zA-Z0-9.-]/ ) {
- encoded = encoded c # safe character
- } else if ( c == " " ) {
- encoded = encoded "+" # special handling
- } else {
- # unsafe character, encode it as a two-digit hex-number
- lo = ord [c] % 16
- hi = int (ord [c] / 16);
- encoded = encoded "%" hextab [hi] hextab [lo]
- }
-}
-if ( EncodeEOL ) {
- printf ("%s", encoded EOL)
-} else {
- print encoded
-}
-}
-END {
- #if ( EncodeEOL ) print ""
-}
diff --git a/sys/src/cmd/urlencode.c b/sys/src/cmd/urlencode.c
new file mode 100644
index 000000000..561ca9157
--- /dev/null
+++ b/sys/src/cmd/urlencode.c
@@ -0,0 +1,98 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+Biobuf bin;
+Biobuf bout;
+int dflag;
+
+char hex[] = "0123456789abcdef";
+char Hex[] = "0123456789ABCDEF";
+
+int
+hexdigit(int c)
+{
+ char *p;
+
+ if(c >= 0){
+ if((p = strchr(Hex, c)) != 0)
+ return p - Hex;
+ if((p = strchr(hex, c)) != 0)
+ return p - hex;
+ }
+ return -1;
+}
+
+void
+usage(void)
+{
+ fprint(2, "Usage: %s [ -d ] [ file ]\n", argv0);
+ exits("usage");
+}
+
+void
+main(int argc, char *argv[])
+{
+ int c;
+
+ ARGBEGIN {
+ case 'd':
+ dflag = 1;
+ break;
+ default:
+ usage();
+ } ARGEND;
+ if(argc == 1){
+ close(0);
+ if(open(*argv, OREAD) < 0)
+ sysfatal("%r");
+ } else if(argc > 1)
+ usage();
+
+ Binit(&bin, 0, OREAD);
+ Binit(&bout, 1, OWRITE);
+
+ if(dflag){
+ while((c = Bgetc(&bin)) >= 0){
+ if(c == '%'){
+ int c1, c2, x1, x2;
+
+ if((c1 = Bgetc(&bin)) < 0)
+ break;
+ if((x1 = hexdigit(c1)) < 0){
+ Bungetc(&bin);
+ Bputc(&bout, c);
+ continue;
+ }
+ if((c2 = Bgetc(&bin)) < 0)
+ break;
+ if((x2 = hexdigit(c2)) < 0){
+ Bungetc(&bin);
+ Bputc(&bout, c);
+ Bputc(&bout, c1);
+ continue;
+ }
+ c = x1<<4 | x2;
+ }
+ Bputc(&bout, c);
+ }
+ } else {
+ while((c = Bgetc(&bin)) >= 0){
+ if(strchr("/$-_@.!*'(),", c)
+ || 'a'<=c && c<='z'
+ || 'A'<=c && c<='Z'
+ || '0'<=c && c<='9')
+ Bputc(&bout, c);
+ else if(c == ' ')
+ Bputc(&bout, '+');
+ else {
+ Bputc(&bout, '%');
+ Bputc(&bout, Hex[c>>4]);
+ Bputc(&bout, Hex[c&15]);
+ }
+ }
+ }
+
+ Bflush(&bout);
+ exits(0);
+}