diff options
author | cinap_lenrek <cinap_lenrek@centraldogma> | 2011-09-20 00:37:06 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@centraldogma> | 2011-09-20 00:37:06 +0200 |
commit | e7df0daa66531eccb2d37f7b66e27d16c9ae4391 (patch) | |
tree | ebafd5367cbfb29da96570de0d2fca9de22a3110 /sys/src/cmd/tcs/charsets.awk | |
parent | 19070c5ce5da71967eb3938b26991b1fda19e3fe (diff) |
tcs: use character set alias names from abaco, more tolerant html entity support
Diffstat (limited to 'sys/src/cmd/tcs/charsets.awk')
-rw-r--r-- | sys/src/cmd/tcs/charsets.awk | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/sys/src/cmd/tcs/charsets.awk b/sys/src/cmd/tcs/charsets.awk new file mode 100644 index 000000000..e822f80e8 --- /dev/null +++ b/sys/src/cmd/tcs/charsets.awk @@ -0,0 +1,36 @@ +#!/bin/awk -f +# makes a table of character sets from http://www.iana.org/assignments/character-sets +# and tcs.txt + +BEGIN{ + if(ARGC != 3){ + print "Usage: " ARGV[0] " charsets.txt tcs.txt" + exit 1 + } + while(getline<ARGV[1]){ + if(/^Name:/){ + i = 0 + name=tolower($2) + names[name] = name + alias[name i] = name + nalias[name] = ++i + + } + if(/^Alias:/){ + a = tolower($2) + if(a != "none"){ + names[a] = name + alias[name i ] = a + nalias[name] = ++i + } + } + } +} +{ + tcs = $1 + if(tcs in names){ + name = names[tcs] + for(i=0; i<nalias[name]; i++) + print "\"" alias[name i] "\", \"" $2 "\"," + } +} |