summaryrefslogtreecommitdiff
path: root/sys/src/cmd/aux/msexceltables.c
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/cmd/aux/msexceltables.c
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/cmd/aux/msexceltables.c')
-rwxr-xr-xsys/src/cmd/aux/msexceltables.c841
1 files changed, 841 insertions, 0 deletions
diff --git a/sys/src/cmd/aux/msexceltables.c b/sys/src/cmd/aux/msexceltables.c
new file mode 100755
index 000000000..16d850b40
--- /dev/null
+++ b/sys/src/cmd/aux/msexceltables.c
@@ -0,0 +1,841 @@
+/* msexceltables.c Steve Simon 5-Jan-2005 */
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+
+enum {
+ Tillegal = 0,
+ Tnumber, // cell types
+ Tlabel,
+ Tindex,
+ Tbool,
+ Terror,
+
+ Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode
+
+ Nwidths = 4096,
+};
+
+
+typedef struct Biff Biff;
+typedef struct Col Col;
+typedef struct Row Row;
+
+struct Row {
+ Row *next; // next row
+ int r; // row number
+ Col *col; // list of cols in row
+};
+
+struct Col {
+ Col *next; // next col in row
+ int c; // col number
+ int f; // index into formating table (Xf)
+ int type; // type of value for union below
+ union { // value
+ int index; // index into string table (Strtab)
+ int error;
+ int bool;
+ char *label;
+ double number;
+ };
+};
+
+struct Biff {
+ Biobuf *bp; // input file
+ int op; // current record type
+ int len; // length of current record
+};
+
+// options
+static int Nopad = 0; // disable padding cells to colum width
+static int Trunc = 0; // truncate cells to colum width
+static int All = 0; // dump all sheet types, Worksheets only by default
+static char *Delim = " "; // field delimiter
+static char *Sheetrange = nil; // range of sheets wanted
+static char *Columnrange = nil; // range of collums wanted
+static int Debug = 0;
+
+// file scope
+static int Defwidth = 10; // default colum width if non given
+static int Biffver; // file vesion
+static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1
+static char **Strtab = nil; // label contents heap
+static int Nstrtab = 0; // # of above
+static int *Xf; // array of extended format indices
+static int Nxf = 0; // # of above
+static Biobuf *bo; // stdout (sic)
+static int Doquote = 1; // quote text fields if they are rc(1) unfriendly
+
+// table scope
+static int Width[Nwidths]; // array of colum widths
+static int Ncols = -1; // max colums in table used
+static int Content = 0; // type code for contents of sheet
+static Row *Root = nil; // one worksheet's worth of cells
+
+static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+
+static char *Errmsgs[] = {
+ [0x0] "#NULL!", // intersection of two cell ranges is empty
+ [0x7] "#DIV/0!", // division by zero
+ [0xf] "#VALUE!", // wrong type of operand
+ [0x17] "#REF!", // illegal or deleted cell reference
+ [0x1d] "#NAME?", // wrong function or range name
+ [0x24] "#NUM!", // value range overflow
+ [0x2a] "#N/A!", // argument of function not available
+};
+
+int
+wanted(char *range, int here)
+{
+ int n, s;
+ char *p;
+
+ if (! range)
+ return 1;
+
+ s = -1;
+ p = range;
+ while(1){
+ n = strtol(p, &p, 10);
+ switch(*p){
+ case 0:
+ if(n == here)
+ return 1;
+ if(s != -1 && here > s && here < n)
+ return 1;
+ return 0;
+ case ',':
+ if(n == here)
+ return 1;
+ if(s != -1 && here > s && here < n)
+ return 1;
+ s = -1;
+ p++;
+ break;
+ case '-':
+ if(n == here)
+ return 1;
+ s = n;
+ p++;
+ break;
+ default:
+ sysfatal("%s malformed range spec", range);
+ break;
+ }
+ }
+}
+
+
+void
+cell(int r, int c, int f, int type, void *val)
+{
+ Row *row, *nrow;
+ Col *col, *ncol;
+
+ if(c > Ncols)
+ Ncols = c;
+
+ if((ncol = malloc(sizeof(Col))) == nil)
+ sysfatal("no memory");
+ ncol->c = c;
+ ncol->f = f;
+ ncol->type = type;
+ ncol->next = nil;
+
+ switch(type){
+ case Tnumber: ncol->number = *(double *)val; break;
+ case Tlabel: ncol->label = (char *)val; break;
+ case Tindex: ncol->index = *(int *)val; break;
+ case Tbool: ncol->bool = *(int *)val; break;
+ case Terror: ncol->error = *(int *)val; break;
+ default: sysfatal("can't happen error");
+ }
+
+ if(Root == nil || Root->r > r){
+ if((nrow = malloc(sizeof(Row))) == nil)
+ sysfatal("no memory");
+ nrow->col = ncol;
+ ncol->next = nil;
+ nrow->r = r;
+ nrow->next = Root;
+ Root = nrow;
+ return;
+ }
+
+ for(row = Root; row; row = row->next){
+ if(row->r == r){
+ if(row->col->c > c){
+ ncol->next = row->col;
+ row->col = ncol;
+ return;
+ }
+ else{
+ for(col = row->col; col; col = col->next)
+ if(col->next == nil || col->next->c > c){
+ ncol->next = col->next;
+ col->next = ncol;
+ return;
+ }
+ }
+ }
+
+ if(row->next == nil || row->next->r > r){
+ if((nrow = malloc(sizeof(Row))) == nil)
+ sysfatal("no memory");
+ nrow->col = ncol;
+ nrow->r = r;
+ nrow->next = row->next;
+ row->next = nrow;
+ return;
+ }
+ }
+ sysfatal("cannot happen error");
+}
+
+struct Tm *
+bifftime(double num)
+{
+ long long t = num;
+
+ /* Beware - These epochs are wrong, this
+ * is due to Excel still remaining compatible
+ * with Lotus-123, which incorrectly believed 1900
+ * was a leap year
+ */
+ if(Datemode)
+ t -= 24107; // epoch = 1/1/1904
+ else
+ t -= 25569; // epoch = 31/12/1899
+ t *= 60*60*24;
+
+ return localtime((long)t);
+}
+
+void
+numfmt(int fmt, int min, int max, double num)
+{
+ char buf[1024];
+ struct Tm *tm;
+
+ if(fmt == 9)
+ snprint(buf, sizeof(buf),"%.0f%%", num);
+ else
+ if(fmt == 10)
+ snprint(buf, sizeof(buf),"%f%%", num);
+ else
+ if(fmt == 11 || fmt == 48)
+ snprint(buf, sizeof(buf),"%e", num);
+ else
+ if(fmt >= 14 && fmt <= 17){
+ tm = bifftime(num);
+ snprint(buf, sizeof(buf),"%d-%s-%d",
+ tm->mday, Months[tm->mon], tm->year+1900);
+ }
+ else
+ if((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){
+ tm = bifftime(num);
+ snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec);
+
+ }
+ else
+ if(fmt == 22){
+ tm = bifftime(num);
+ snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d",
+ tm->hour, tm->min, tm->sec,
+ tm->mday, Months[tm->mon], tm->year+1900);
+
+ }else
+ snprint(buf, sizeof(buf),"%g", num);
+
+ Bprint(bo, "%-*.*q", min, max, buf);
+}
+
+void
+dump(void)
+{
+ Row *r;
+ Col *c, *c1;
+ char *strfmt;
+ int i, n, last, min, max;
+
+ if(Doquote)
+ strfmt = "%-*.*q";
+ else
+ strfmt = "%-*.*s";
+
+ for(r = Root; r; r = r->next){
+ n = 1;
+ for(c = r->col; c; c = c->next){
+ n++;
+ if(! wanted(Columnrange, n))
+ continue;
+
+ if(c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0)
+ min = Defwidth;
+ if((c->next && c->c == c->next->c) || Nopad)
+ min = 0;
+ max = -1;
+ if(Trunc && min > 2)
+ max = min -2; // FIXME: -2 because of bug %q format ?
+
+ switch(c->type){
+ case Tnumber:
+ if(Xf == nil || Xf[c->f] == 0)
+ Bprint(bo, "%-*.*g", min, max, c->number);
+ else
+ numfmt(Xf[c->f], min, max, c->number);
+ break;
+ case Tlabel:
+ Bprint(bo, strfmt, min, max, c->label);
+ break;
+ case Tbool:
+ Bprint(bo, strfmt, min, max, (c->bool)? "True": "False");
+ break;
+ case Tindex:
+ if(c->index < 0 || c->index >= Nstrtab)
+ sysfatal("SST string out of range - corrupt file?");
+ Bprint(bo, strfmt, min, max, Strtab[c->index]);
+ break;
+ case Terror:
+ if(c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error])
+ Bprint(bo, "#ERR=%d", c->index);
+ else
+ Bprint(bo, strfmt, min, max, Errmsgs[c->error]);
+ break;
+ default:
+ sysfatal("cannot happen error");
+ break;
+ }
+
+ last = 1;
+ for(i = n+1, c1 = c->next; c1; c1 = c1->next, i++)
+ if(wanted(Columnrange, i)){
+ last = 0;
+ break;
+ }
+
+ if(! last){
+ if(c->next->c == c->c) // bar charts
+ Bprint(bo, "=");
+ else{
+ Bprint(bo, "%s", Delim);
+ for(i = c->c; c->next && i < c->next->c -1; i++)
+ Bprint(bo, "%-*.*s%s", min, max, "", Delim);
+ }
+ }
+ }
+ if(r->next)
+ for(i = r->r; i < r->next->r; i++)
+ Bprint(bo, "\n");
+
+ }
+ Bprint(bo, "\n");
+}
+
+void
+release(void)
+{
+ Row *r, *or;
+ Col *c, *oc;
+
+ r = Root;
+ while(r){
+ c = r->col;
+ while(c){
+ if(c->type == Tlabel)
+ free(c->label);
+ oc = c;
+ c = c->next;
+ free(oc);
+ }
+ or = r;
+ r = r->next;
+ free(or);
+ }
+ Root = nil;
+
+ memset(Width, 0, sizeof(Width));
+ Ncols = -1;
+}
+
+void
+skip(Biff *b, int len)
+{
+ assert(len <= b->len);
+ if(Bseek(b->bp, len, 1) == -1)
+ sysfatal("seek failed - %r");
+ b->len -= len;
+}
+
+void
+gmem(Biff *b, void *p, int n)
+{
+ if(b->len < n)
+ sysfatal("short record %d < %d", b->len, n);
+ if(Bread(b->bp, p, n) != n)
+ sysfatal("unexpected EOF - %r");
+ b->len -= n;
+}
+
+void
+xd(Biff *b)
+{
+ uvlong off;
+ uchar buf[16];
+ int addr, got, n, i, j;
+
+ addr = 0;
+ off = Boffset(b->bp);
+ while(addr < b->len){
+ n = (b->len >= sizeof(buf))? sizeof(buf): b->len;
+ got = Bread(b->bp, buf, n);
+
+ Bprint(bo, " %6d ", addr);
+ addr += n;
+
+ for(i = 0; i < got; i++)
+ Bprint(bo, "%02x ", buf[i]);
+ for(j = i; j < 16; j++)
+ Bprint(bo, " ");
+ Bprint(bo, " ");
+ for(i = 0; i < got; i++)
+ Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.');
+ Bprint(bo, "\n");
+ }
+ Bseek(b->bp, off, 0);
+}
+
+static int
+getrec(Biff *b)
+{
+ int c;
+ if((c = Bgetc(b->bp)) == -1)
+ return -1; // real EOF
+ b->op = c;
+ if((c = Bgetc(b->bp)) == -1)
+ sysfatal("unexpected EOF - %r");
+ b->op |= c << 8;
+ if((c = Bgetc(b->bp)) == -1)
+ sysfatal("unexpected EOF - %r");
+ b->len = c;
+ if((c = Bgetc(b->bp)) == -1)
+ sysfatal("unexpected EOF - %r");
+ b->len |= c << 8;
+ if(b->op == 0 && b->len == 0)
+ return -1;
+ if(Debug){
+ Bprint(bo, "op=0x%x len=%d\n", b->op, b->len);
+ xd(b);
+ }
+ return 0;
+}
+
+static uvlong
+gint(Biff *b, int n)
+{
+ int i, c;
+ uvlong vl, rc;
+
+ if(b->len < n)
+ return -1;
+ rc = 0;
+ for(i = 0; i < n; i++){
+ if((c = Bgetc(b->bp)) == -1)
+ sysfatal("unexpected EOF - %r");
+ b->len--;
+ vl = c;
+ rc |= vl << (8*i);
+ }
+ return rc;
+}
+
+double
+grk(Biff *b)
+{
+ int f;
+ uvlong n;
+ double d;
+
+ n = gint(b, 4);
+ f = n & 3;
+ n &= ~3LL;
+ if(f & 2){
+ d = n / 4.0;
+ }
+ else{
+ n <<= 32;
+ memcpy(&d, &n, sizeof(d));
+ }
+
+ if(f & 1)
+ d /= 100.0;
+ return d;
+}
+
+double
+gdoub(Biff *b)
+{
+ double d;
+ uvlong n = gint(b, 8);
+ memcpy(&d, &n, sizeof(n));
+ return d;
+}
+
+char *
+gstr(Biff *b, int len_width)
+{
+ Rune r;
+ char *buf, *p;
+ int nch, w, ap, ln, rt, opt;
+ enum {
+ Unicode = 1,
+ Asian_phonetic = 4,
+ Rich_text = 8,
+ };
+
+ if(b->len < len_width){
+ if(getrec(b) == -1)
+ sysfatal("starting STRING expected CONTINUE, got EOF");
+ if(b->op != 0x03c)
+ sysfatal("starting STRING expected CONTINUE, got op=0x%x", b->op);
+ }
+
+ ln = gint(b, len_width);
+ if(Biffver != Ver8){
+ if((buf = calloc(ln+1, sizeof(char))) == nil)
+ sysfatal("no memory");
+ gmem(b, buf, ln);
+ return buf;
+ }
+
+
+ if((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil)
+ sysfatal("no memory");
+ p = buf;
+
+ if(ln == 0)
+ return buf;
+ nch = 0;
+ *buf = 0;
+ opt = gint(b, 1);
+ if(opt & Rich_text)
+ rt = gint(b, 2);
+ else
+ rt = 0;
+ if(opt & Asian_phonetic)
+ ap = gint(b, 4);
+ else
+ ap = 0;
+ for(;;){
+ w = (opt & Unicode)? sizeof(Rune): sizeof(char);
+
+ while(b->len > 0){
+ r = gint(b, w);
+ p += runetochar(p, &r);
+ if(++nch >= ln){
+ if(rt)
+ skip(b, rt*4);
+ if(ap)
+ skip(b, ap);
+ return buf;
+ }
+ }
+ if(getrec(b) == -1)
+ sysfatal("in STRING expected CONTINUE, got EOF");
+ if(b->op != 0x03c)
+ sysfatal("in STRING expected CONTINUE, got op=0x%x", b->op);
+ opt = gint(b, 1);
+ }
+}
+
+void
+sst(Biff *b)
+{
+ int n;
+
+ skip(b, 4); // total # strings
+ Nstrtab = gint(b, 4); // # unique strings
+ if((Strtab = calloc(Nstrtab, sizeof(char *))) == nil)
+ sysfatal("no memory");
+ for(n = 0; n < Nstrtab; n++)
+ Strtab[n] = gstr(b, 2);
+
+}
+
+void
+boolerr(Biff *b)
+{
+ int r = gint(b, 2); // row
+ int c = gint(b, 2); // col
+ int f = gint(b, 2); // formatting ref
+ int v = gint(b, 1); // bool value / err code
+ int t = gint(b, 1); // type
+ cell(r, c, f, (t)? Terror: Tbool, &v);
+}
+
+void
+rk(Biff *b)
+{
+ int r = gint(b, 2); // row
+ int c = gint(b, 2); // col
+ int f = gint(b, 2); // formatting ref
+ double v = grk(b); // value
+ cell(r, c, f, Tnumber, &v);
+}
+
+void
+mulrk(Biff *b)
+{
+ int r = gint(b, 2); // row
+ int c = gint(b, 2); // first col
+ while(b->len >= 6){
+ int f = gint(b, 2); // formatting ref
+ double v = grk(b); // value
+ cell(r, c++, f, Tnumber, &v);
+ }
+}
+
+void
+number(Biff *b)
+{
+ int r = gint(b, 2); // row
+ int c = gint(b, 2); // col
+ int f = gint(b, 2); // formatting ref
+ double v = gdoub(b); // double
+ cell(r, c, f, Tnumber, &v);
+}
+
+void
+label(Biff *b)
+{
+ int r = gint(b, 2); // row
+ int c = gint(b, 2); // col
+ int f = gint(b, 2); // formatting ref
+ char *s = gstr(b, 2); // byte string
+ cell(r, c, f, Tlabel, s);
+}
+
+
+void
+labelsst(Biff *b)
+{
+ int r = gint(b, 2); // row
+ int c = gint(b, 2); // col
+ int f = gint(b, 2); // formatting ref
+ int i = gint(b, 2); // sst string ref
+ cell(r, c, f, Tindex, &i);
+}
+
+void
+bof(Biff *b)
+{
+ Biffver = gint(b, 2);
+ Content = gint(b, 2);
+}
+
+void
+defcolwidth(Biff *b)
+{
+ Defwidth = gint(b, 2);
+}
+
+void
+datemode(Biff *b)
+{
+ Datemode = gint(b, 2);
+}
+
+void
+eof(Biff *b)
+{
+ int i;
+ struct {
+ int n;
+ char *s;
+ } names[] = {
+ 0x005, "Workbook globals",
+ 0x006, "Visual Basic module",
+ 0x010, "Worksheet",
+ 0x020, "Chart",
+ 0x040, "Macro sheet",
+ 0x100, "Workspace file",
+ };
+ static int sheet = 0;
+
+ if(! wanted(Sheetrange, ++sheet)){
+ release();
+ return;
+ }
+
+ if(Ncols != -1){
+ if(All){
+ for(i = 0; i < nelem(names); i++)
+ if(names[i].n == Content){
+ Bprint(bo, "\n# contents %s\n", names[i].s);
+ dump();
+ }
+ }
+ else
+ if(Content == 0x10) // Worksheet
+ dump();
+ }
+ release();
+ USED(b);
+}
+
+void
+colinfo(Biff *b)
+{
+ int c;
+ int c1 = gint(b, 2);
+ int c2 = gint(b, 2);
+ int w = gint(b, 2);
+
+ if(c1 < 0)
+ sysfatal("negative column number (%d)", c1);
+ if(c2 >= Nwidths)
+ sysfatal("too many columns (%d > %d)", c2, Nwidths);
+ w /= 256;
+
+ if(w > 100)
+ w = 100;
+ if(w < 0)
+ w = 0;
+
+ for(c = c1; c <= c2; c++)
+ Width[c] = w;
+}
+
+void
+xf(Biff *b)
+{
+ int fmt;
+ static int nalloc = 0;
+
+ skip(b, 2);
+ fmt = gint(b, 2);
+ if(nalloc >= Nxf){
+ nalloc += 20;
+ if((Xf = realloc(Xf, nalloc*sizeof(int))) == nil)
+ sysfatal("no memory");
+ }
+ Xf[Nxf++] = fmt;
+}
+
+void
+writeaccess(Biff *b)
+{
+ Bprint(bo, "# author %s\n", gstr(b, 2));
+}
+
+void
+codepage(Biff *b)
+{
+ int codepage = gint(b, 2);
+ if(codepage != 1200) // 1200 == UTF-16
+ Bprint(bo, "# codepage %d\n", codepage);
+}
+
+void
+xls2csv(Biobuf *bp)
+{
+ int i;
+ Biff biff, *b;
+ struct {
+ int op;
+ void (*func)(Biff *);
+ } dispatch[] = {
+ 0x000a, eof,
+ 0x0022, datemode,
+ 0x0042, codepage,
+ 0x0055, defcolwidth,
+ 0x005c, writeaccess,
+ 0x007d, colinfo,
+ 0x00bd, mulrk,
+ 0x00fc, sst,
+ 0x00fd, labelsst,
+ 0x0203, number,
+ 0x0204, label,
+ 0x0205, boolerr,
+ 0x027e, rk,
+ 0x0809, bof,
+ 0x00e0, xf,
+ };
+
+ b = &biff;
+ b->bp = bp;
+ while(getrec(b) != -1){
+ for(i = 0; i < nelem(dispatch); i++)
+ if(b->op == dispatch[i].op)
+ (*dispatch[i].func)(b);
+ skip(b, b->len);
+ }
+}
+
+void
+usage(void)
+{
+ fprint(2, "usage: %s [-Danqt] [-w worksheets] [-c columns] [-d delim] /mnt/doc/Workbook\n", argv0);
+ exits("usage");
+}
+
+void
+main(int argc, char *argv[])
+{
+ int i;
+ Biobuf bin, bout, *bp;
+
+ ARGBEGIN{
+ case 'D':
+ Debug = 1;
+ break;
+ case 'a':
+ All = 1;
+ break;
+ case 'q':
+ Doquote = 0;
+ break;
+ case 'd':
+ Delim = EARGF(usage());
+ break;
+ case 'n':
+ Nopad = 1;
+ break;
+ case 't':
+ Trunc = 1;
+ break;
+ case 'c':
+ Columnrange = EARGF(usage());
+ break;
+ case 'w':
+ Sheetrange = EARGF(usage());
+ break;
+ default:
+ usage();
+ break;
+ }ARGEND;
+
+ if(argc != 1)
+ usage();
+
+ bo = &bout;
+ quotefmtinstall();
+ Binit(bo, OWRITE, 1);
+
+ if(argc > 0) {
+ for(i = 0; i < argc; i++){
+ if((bp = Bopen(argv[i], OREAD)) == nil)
+ sysfatal("%s cannot open - %r", argv[i]);
+ xls2csv(bp);
+ Bterm(bp);
+ }
+ } else {
+ Binit(&bin, 0, OREAD);
+ xls2csv(&bin);
+ }
+ exits(0);
+}
+