summaryrefslogtreecommitdiff
path: root/sys/src/cmd/aux/bflz.c
diff options
context:
space:
mode:
authoriru <devnull@localhost>2011-04-14 00:35:48 -0300
committeriru <devnull@localhost>2011-04-14 00:35:48 -0300
commitfd8d404d520a3f10c143f5cbe7c170606fffc75c (patch)
treefbc8bf7218eca7d6290ffb1109fcc73b5b5cda33 /sys/src/cmd/aux/bflz.c
parentdd468419f2fbaa2c3fec570a88d13e8eae8f3faf (diff)
Promote the old installer/livecd specific tools to normal tools under /sys/src/cmd. Where similar common tools already existed, I kept them.
Diffstat (limited to 'sys/src/cmd/aux/bflz.c')
-rw-r--r--sys/src/cmd/aux/bflz.c374
1 files changed, 374 insertions, 0 deletions
diff --git a/sys/src/cmd/aux/bflz.c b/sys/src/cmd/aux/bflz.c
new file mode 100644
index 000000000..89cb361c2
--- /dev/null
+++ b/sys/src/cmd/aux/bflz.c
@@ -0,0 +1,374 @@
+/*
+ * Extraordinarily brute force Lempel & Ziv-like
+ * compressor. The input file must fit in memory
+ * during compression, and the output file will
+ * be reconstructed in memory during decompression.
+ * We search for large common sequences and use a
+ * greedy algorithm to choose which sequence gets
+ * compressed first.
+ *
+ * Files begin with "BLZ\n" and a 32-bit uncompressed file length.
+ *
+ * Output format is a series of blocks followed by
+ * a raw data section. Each block begins with a 32-bit big-endian
+ * number. The top bit is type and the next 31 bits
+ * are uncompressed size. Type is one of
+ * 0 - use raw data for this length
+ * 1 - a 32-bit offset follows
+ * After the blocks come the raw data. (The end of the blocks can be
+ * noted by summing block lengths until you reach the file length.)
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+#define malloc sbrk
+
+int minrun = 16;
+int win = 16;
+ulong outn;
+int verbose;
+int mindist;
+
+enum { Prime = 16777213 }; /* smallest prime < 2^24 (so p*256+256 < 2^32) */
+enum { NOFF = 3 };
+
+Biobuf bout;
+ulong length;
+uchar *data;
+ulong sum32(ulong, void*, long);
+uchar *odat;
+int nodat;
+int nraw;
+int rawstart;
+int acct;
+int zlength;
+int maxchain;
+int maxrle[256];
+int nnew;
+
+typedef struct Node Node;
+struct Node {
+ Node *link;
+ ulong key;
+ ulong offset[NOFF];
+};
+
+Node *nodepool;
+int nnodepool;
+
+Node **hash;
+uint nhash;
+
+uint maxlen;
+uint maxsame;
+uint replacesame = 8*1024*1024;
+
+Node *freelist, **freeend;
+uint nalloc;
+
+Node*
+allocnode(void)
+{
+ int i;
+ Node *n;
+
+ if(nnodepool == 0){
+ nnodepool = 256*1024;
+ nodepool = malloc(sizeof(Node)*nnodepool);
+ }
+ if(freelist){
+ n = freelist;
+ freelist = n->link;
+ return n;
+ }
+ assert(nnodepool > 0);
+ nalloc++;
+ n = &nodepool[--nnodepool];
+ for(i=0; i<NOFF; i++)
+ n->offset[i] = -1;
+
+ return n;
+}
+
+void
+freenode(Node *n)
+{
+ if(freelist == nil)
+ freelist = n;
+ else
+ *freeend = n;
+ freeend = &n->link;
+ n->link = nil;
+}
+
+Node**
+llookup(ulong key)
+{
+ uint c;
+ Node **l, **top, *n;
+
+ if(nhash == 0){
+ uint x;
+
+ x = length/8;
+ for(nhash=1; nhash<x; nhash<<=1)
+ ;
+ hash = sbrk(sizeof(Node*)*nhash);
+ }
+
+ top = &hash[key&(nhash-1)];
+ c = 0;
+ for(l=top; *l; l=&(*l)->link){
+ c++;
+ if((*l)->key == key){
+ /* move to front */
+ n = *l;
+ *l = n->link;
+ n->link = *top;
+ *top = n;
+ return top;
+ }
+ }
+ if(c > maxlen)
+ maxlen = c;
+ return l;
+}
+
+Node*
+lookup(ulong key)
+{
+ return *llookup(key);
+}
+
+void
+insertnode(ulong key, ulong offset)
+{
+ int i;
+ Node *n, **l;
+
+ l = llookup(key);
+ if(*l == nil){
+ if(l==&hash[key&(nhash-1)])
+ nnew++;
+ *l = allocnode();
+ (*l)->key = key;
+ }
+ n = *l;
+
+ /* add or replace last */
+ for(i=0; i<NOFF-1 && n->offset[i]!=-1; i++)
+ ;
+ n->offset[i] = offset;
+}
+
+void
+Bputint(Biobufhdr *b, int n)
+{
+ uchar p[4];
+
+ p[0] = n>>24;
+ p[1] = n>>16;
+ p[2] = n>>8;
+ p[3] = n;
+ Bwrite(b, p, 4);
+}
+
+void
+flushraw(void)
+{
+ if(nraw){
+ if(verbose)
+ fprint(2, "Raw %d+%d\n", rawstart, nraw);
+ zlength += 4+nraw;
+ Bputint(&bout, (1<<31)|nraw);
+ memmove(odat+nodat, data+rawstart, nraw);
+ nodat += nraw;
+ nraw = 0;
+ }
+}
+
+int
+rawbyte(int i)
+{
+ assert(acct == i);
+ if(nraw == 0)
+ rawstart = i;
+ acct++;
+ nraw++;
+ return 1;
+}
+
+int
+refblock(int i, int len, int off)
+{
+ assert(acct == i);
+ acct += len;
+ if(nraw)
+ flushraw();
+ if(verbose)
+ fprint(2, "Copy %d+%d from %d\n", i, len, off);
+ Bputint(&bout, len);
+ Bputint(&bout, off);
+ zlength += 4+4;
+ return len;
+}
+
+int
+cmprun(uchar *a, uchar *b, int len)
+{
+ int i;
+
+ if(a==b)
+ return 0;
+ for(i=0; i<len && a[i]==b[i]; i++)
+ ;
+ return i;
+}
+
+int
+countrle(uchar *a)
+{
+ int i;
+
+ for(i=0; a[i]==a[0]; i++)
+ ;
+ return i;
+}
+
+void
+compress(void)
+{
+ int best, i, j, o, rle, run, maxrun, maxoff;
+ ulong sum;
+ Node *n;
+
+ sum = 0;
+ for(i=0; i<win && i<length; i++)
+ sum = (sum*256+data[i])%Prime;
+ for(i=0; i<length-win; ){
+ maxrun = 0;
+ maxoff = 0;
+ if(verbose)
+ fprint(2, "look %.6lux\n", sum);
+ n = lookup(sum);
+ if(n){
+ best = -1;
+ for(o=0; o<NOFF; o++){
+ if(n->offset[o] == -1)
+ break;
+ run = cmprun(data+i, data+n->offset[o], length-i);
+ if(run > maxrun && n->offset[o]+mindist < i){
+ maxrun = run;
+ maxoff = n->offset[o];
+ best = o;
+ }
+ }
+ if(best > 0){
+ o = n->offset[best];
+ for(j=best; j>0; j--)
+ n->offset[j] = n->offset[j-1];
+ n->offset[0] = o;
+ }
+ }
+
+ if(maxrun >= minrun)
+ j = i+refblock(i, maxrun, maxoff);
+ else
+ j = i+rawbyte(i);
+ for(; i<j; i++){
+ /* avoid huge chains from large runs of same byte */
+ rle = countrle(data+i);
+ if(rle<4)
+ insertnode(sum, i);
+ else if(rle>maxrle[data[i]]){
+ maxrle[data[i]] = rle;
+ insertnode(sum, i);
+ }
+ sum = (sum*256+data[i+win]) % Prime;
+ sum = (sum + data[i]*outn) % Prime;
+ }
+ }
+ /* could do better here */
+ for(; i<length; i++)
+ rawbyte(i);
+ flushraw();
+}
+
+void
+usage(void)
+{
+ fprint(2, "usage: bflz [-n winsize] [file]\n");
+ exits("usage");
+}
+
+void
+main(int argc, char **argv)
+{
+ int fd, i, n;
+ char buf[10485760];
+
+ ARGBEGIN{
+ case 'd':
+ verbose = 1;
+ break;
+ case 's':
+ replacesame = atoi(EARGF(usage()));
+ break;
+ case 'm':
+ mindist = atoi(EARGF(usage()));
+ break;
+ case 'n':
+ win = atoi(EARGF(usage()));
+ minrun = win;
+ break;
+ default:
+ usage();
+ }ARGEND
+
+ switch(argc){
+ default:
+ usage();
+ case 0:
+ fd = 0;
+ break;
+ case 1:
+ if((fd = open(argv[0], OREAD)) < 0)
+ sysfatal("open %s: %r", argv[0]);
+ break;
+ }
+
+ while((n = readn(fd, buf, sizeof buf)) > 0){
+ data = realloc(data, length+n);
+ if(data == nil)
+ sysfatal("realloc: %r");
+ memmove(data+length, buf, n);
+ length += n;
+ if(n < sizeof buf)
+ break;
+ }
+ odat = malloc(length);
+ if(odat == nil)
+ sysfatal("malloc: %r");
+
+ Binit(&bout, 1, OWRITE);
+ Bprint(&bout, "BLZ\n");
+ Bputint(&bout, length);
+ outn = 1;
+ for(i=0; i<win; i++)
+ outn = (outn * 256) % Prime;
+
+ if(verbose)
+ fprint(2, "256^%d = %.6lux\n", win, outn);
+ outn = Prime - outn;
+ if(verbose)
+ fprint(2, "outn = %.6lux\n", outn);
+
+ compress();
+ Bwrite(&bout, odat, nodat);
+ Bterm(&bout);
+ fprint(2, "brk %p\n", sbrk(1));
+ fprint(2, "%d nodes used; %d of %d hash slots used\n", nalloc, nnew, nhash);
+ exits(nil);
+}