summaryrefslogtreecommitdiff
path: root/sys/src/cmd/tcs/charsets.awk
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@centraldogma>2011-09-20 00:37:06 +0200
committercinap_lenrek <cinap_lenrek@centraldogma>2011-09-20 00:37:06 +0200
commite7df0daa66531eccb2d37f7b66e27d16c9ae4391 (patch)
treeebafd5367cbfb29da96570de0d2fca9de22a3110 /sys/src/cmd/tcs/charsets.awk
parent19070c5ce5da71967eb3938b26991b1fda19e3fe (diff)
tcs: use character set alias names from abaco, more tolerant html entity support
Diffstat (limited to 'sys/src/cmd/tcs/charsets.awk')
-rw-r--r--sys/src/cmd/tcs/charsets.awk36
1 files changed, 36 insertions, 0 deletions
diff --git a/sys/src/cmd/tcs/charsets.awk b/sys/src/cmd/tcs/charsets.awk
new file mode 100644
index 000000000..e822f80e8
--- /dev/null
+++ b/sys/src/cmd/tcs/charsets.awk
@@ -0,0 +1,36 @@
+#!/bin/awk -f
+# makes a table of character sets from http://www.iana.org/assignments/character-sets
+# and tcs.txt
+
+BEGIN{
+ if(ARGC != 3){
+ print "Usage: " ARGV[0] " charsets.txt tcs.txt"
+ exit 1
+ }
+ while(getline<ARGV[1]){
+ if(/^Name:/){
+ i = 0
+ name=tolower($2)
+ names[name] = name
+ alias[name i] = name
+ nalias[name] = ++i
+
+ }
+ if(/^Alias:/){
+ a = tolower($2)
+ if(a != "none"){
+ names[a] = name
+ alias[name i ] = a
+ nalias[name] = ++i
+ }
+ }
+ }
+}
+{
+ tcs = $1
+ if(tcs in names){
+ name = names[tcs]
+ for(i=0; i<nalias[name]; i++)
+ print "\"" alias[name i] "\", \"" $2 "\","
+ }
+}