diff options
author | iru <devnull@localhost> | 2011-04-14 00:35:48 -0300 |
---|---|---|
committer | iru <devnull@localhost> | 2011-04-14 00:35:48 -0300 |
commit | fd8d404d520a3f10c143f5cbe7c170606fffc75c (patch) | |
tree | fbc8bf7218eca7d6290ffb1109fcc73b5b5cda33 /sys/src/cmd/aux/bflz.c | |
parent | dd468419f2fbaa2c3fec570a88d13e8eae8f3faf (diff) |
Promote the old installer/livecd specific tools to normal tools under /sys/src/cmd. Where similar common tools already existed, I kept them.
Diffstat (limited to 'sys/src/cmd/aux/bflz.c')
-rw-r--r-- | sys/src/cmd/aux/bflz.c | 374 |
1 files changed, 374 insertions, 0 deletions
diff --git a/sys/src/cmd/aux/bflz.c b/sys/src/cmd/aux/bflz.c new file mode 100644 index 000000000..89cb361c2 --- /dev/null +++ b/sys/src/cmd/aux/bflz.c @@ -0,0 +1,374 @@ +/* + * Extraordinarily brute force Lempel & Ziv-like + * compressor. The input file must fit in memory + * during compression, and the output file will + * be reconstructed in memory during decompression. + * We search for large common sequences and use a + * greedy algorithm to choose which sequence gets + * compressed first. + * + * Files begin with "BLZ\n" and a 32-bit uncompressed file length. + * + * Output format is a series of blocks followed by + * a raw data section. Each block begins with a 32-bit big-endian + * number. The top bit is type and the next 31 bits + * are uncompressed size. Type is one of + * 0 - use raw data for this length + * 1 - a 32-bit offset follows + * After the blocks come the raw data. (The end of the blocks can be + * noted by summing block lengths until you reach the file length.) + */ + +#include <u.h> +#include <libc.h> +#include <bio.h> + +#define malloc sbrk + +int minrun = 16; +int win = 16; +ulong outn; +int verbose; +int mindist; + +enum { Prime = 16777213 }; /* smallest prime < 2^24 (so p*256+256 < 2^32) */ +enum { NOFF = 3 }; + +Biobuf bout; +ulong length; +uchar *data; +ulong sum32(ulong, void*, long); +uchar *odat; +int nodat; +int nraw; +int rawstart; +int acct; +int zlength; +int maxchain; +int maxrle[256]; +int nnew; + +typedef struct Node Node; +struct Node { + Node *link; + ulong key; + ulong offset[NOFF]; +}; + +Node *nodepool; +int nnodepool; + +Node **hash; +uint nhash; + +uint maxlen; +uint maxsame; +uint replacesame = 8*1024*1024; + +Node *freelist, **freeend; +uint nalloc; + +Node* +allocnode(void) +{ + int i; + Node *n; + + if(nnodepool == 0){ + nnodepool = 256*1024; + nodepool = malloc(sizeof(Node)*nnodepool); + } + if(freelist){ + n = freelist; + freelist = n->link; + return n; + } + assert(nnodepool > 0); + nalloc++; + n = &nodepool[--nnodepool]; + for(i=0; i<NOFF; i++) + n->offset[i] = -1; + + return n; +} + +void +freenode(Node *n) +{ + if(freelist == nil) + freelist = n; + else + *freeend = n; + freeend = &n->link; + n->link = nil; +} + +Node** +llookup(ulong key) +{ + uint c; + Node **l, **top, *n; + + if(nhash == 0){ + uint x; + + x = length/8; + for(nhash=1; nhash<x; nhash<<=1) + ; + hash = sbrk(sizeof(Node*)*nhash); + } + + top = &hash[key&(nhash-1)]; + c = 0; + for(l=top; *l; l=&(*l)->link){ + c++; + if((*l)->key == key){ + /* move to front */ + n = *l; + *l = n->link; + n->link = *top; + *top = n; + return top; + } + } + if(c > maxlen) + maxlen = c; + return l; +} + +Node* +lookup(ulong key) +{ + return *llookup(key); +} + +void +insertnode(ulong key, ulong offset) +{ + int i; + Node *n, **l; + + l = llookup(key); + if(*l == nil){ + if(l==&hash[key&(nhash-1)]) + nnew++; + *l = allocnode(); + (*l)->key = key; + } + n = *l; + + /* add or replace last */ + for(i=0; i<NOFF-1 && n->offset[i]!=-1; i++) + ; + n->offset[i] = offset; +} + +void +Bputint(Biobufhdr *b, int n) +{ + uchar p[4]; + + p[0] = n>>24; + p[1] = n>>16; + p[2] = n>>8; + p[3] = n; + Bwrite(b, p, 4); +} + +void +flushraw(void) +{ + if(nraw){ + if(verbose) + fprint(2, "Raw %d+%d\n", rawstart, nraw); + zlength += 4+nraw; + Bputint(&bout, (1<<31)|nraw); + memmove(odat+nodat, data+rawstart, nraw); + nodat += nraw; + nraw = 0; + } +} + +int +rawbyte(int i) +{ + assert(acct == i); + if(nraw == 0) + rawstart = i; + acct++; + nraw++; + return 1; +} + +int +refblock(int i, int len, int off) +{ + assert(acct == i); + acct += len; + if(nraw) + flushraw(); + if(verbose) + fprint(2, "Copy %d+%d from %d\n", i, len, off); + Bputint(&bout, len); + Bputint(&bout, off); + zlength += 4+4; + return len; +} + +int +cmprun(uchar *a, uchar *b, int len) +{ + int i; + + if(a==b) + return 0; + for(i=0; i<len && a[i]==b[i]; i++) + ; + return i; +} + +int +countrle(uchar *a) +{ + int i; + + for(i=0; a[i]==a[0]; i++) + ; + return i; +} + +void +compress(void) +{ + int best, i, j, o, rle, run, maxrun, maxoff; + ulong sum; + Node *n; + + sum = 0; + for(i=0; i<win && i<length; i++) + sum = (sum*256+data[i])%Prime; + for(i=0; i<length-win; ){ + maxrun = 0; + maxoff = 0; + if(verbose) + fprint(2, "look %.6lux\n", sum); + n = lookup(sum); + if(n){ + best = -1; + for(o=0; o<NOFF; o++){ + if(n->offset[o] == -1) + break; + run = cmprun(data+i, data+n->offset[o], length-i); + if(run > maxrun && n->offset[o]+mindist < i){ + maxrun = run; + maxoff = n->offset[o]; + best = o; + } + } + if(best > 0){ + o = n->offset[best]; + for(j=best; j>0; j--) + n->offset[j] = n->offset[j-1]; + n->offset[0] = o; + } + } + + if(maxrun >= minrun) + j = i+refblock(i, maxrun, maxoff); + else + j = i+rawbyte(i); + for(; i<j; i++){ + /* avoid huge chains from large runs of same byte */ + rle = countrle(data+i); + if(rle<4) + insertnode(sum, i); + else if(rle>maxrle[data[i]]){ + maxrle[data[i]] = rle; + insertnode(sum, i); + } + sum = (sum*256+data[i+win]) % Prime; + sum = (sum + data[i]*outn) % Prime; + } + } + /* could do better here */ + for(; i<length; i++) + rawbyte(i); + flushraw(); +} + +void +usage(void) +{ + fprint(2, "usage: bflz [-n winsize] [file]\n"); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + int fd, i, n; + char buf[10485760]; + + ARGBEGIN{ + case 'd': + verbose = 1; + break; + case 's': + replacesame = atoi(EARGF(usage())); + break; + case 'm': + mindist = atoi(EARGF(usage())); + break; + case 'n': + win = atoi(EARGF(usage())); + minrun = win; + break; + default: + usage(); + }ARGEND + + switch(argc){ + default: + usage(); + case 0: + fd = 0; + break; + case 1: + if((fd = open(argv[0], OREAD)) < 0) + sysfatal("open %s: %r", argv[0]); + break; + } + + while((n = readn(fd, buf, sizeof buf)) > 0){ + data = realloc(data, length+n); + if(data == nil) + sysfatal("realloc: %r"); + memmove(data+length, buf, n); + length += n; + if(n < sizeof buf) + break; + } + odat = malloc(length); + if(odat == nil) + sysfatal("malloc: %r"); + + Binit(&bout, 1, OWRITE); + Bprint(&bout, "BLZ\n"); + Bputint(&bout, length); + outn = 1; + for(i=0; i<win; i++) + outn = (outn * 256) % Prime; + + if(verbose) + fprint(2, "256^%d = %.6lux\n", win, outn); + outn = Prime - outn; + if(verbose) + fprint(2, "outn = %.6lux\n", outn); + + compress(); + Bwrite(&bout, odat, nodat); + Bterm(&bout); + fprint(2, "brk %p\n", sbrk(1)); + fprint(2, "%d nodes used; %d of %d hash slots used\n", nalloc, nnew, nhash); + exits(nil); +} |