diff options
author | Jacob Moody <moody@posixcafe.org> | 2023-04-01 18:05:27 +0000 |
---|---|---|
committer | Jacob Moody <moody@posixcafe.org> | 2023-04-01 18:05:27 +0000 |
commit | 218f7a9ec7773484202d6fffb43b53f33524104c (patch) | |
tree | 2189d53481e990e9ebe239c15241861b78fcda71 | |
parent | 75d6267a5f788162d92e7a5ae126cd8b0770aa8a (diff) |
qcowfs(8)
-rw-r--r-- | sys/lib/dist/mkfile | 27 | ||||
-rw-r--r-- | sys/man/8/qcowfs | 63 | ||||
-rw-r--r-- | sys/src/cmd/disk/mkfile | 1 | ||||
-rw-r--r-- | sys/src/cmd/disk/qcowfs.c | 603 |
4 files changed, 692 insertions, 2 deletions
diff --git a/sys/lib/dist/mkfile b/sys/lib/dist/mkfile index 045f4560c..07097a4e9 100644 --- a/sys/lib/dist/mkfile +++ b/sys/lib/dist/mkfile @@ -21,6 +21,17 @@ cd:V: /tmp/9front.386.iso.gz mv $target.$pid.pc.iso $target } +%.amd64.qcow2: + @{ + objtype=amd64 + kernel=/n/src9/$objtype/9pc64 + echo 'bootfile='^`{basename $kernel} > /env/plan9.ini + fatfiles=(/386/9bootfat /env/plan9.ini $kernel) + mb=3770 + mk $target.$pid.disk + mv $target.$pid.disk $target + } + %.pi.img: @{ objtype=arm @@ -136,9 +147,15 @@ cd:V: /tmp/9front.386.iso.gz @{rfork n mk binds rm -f $target - dd -if /dev/zero -of $target -bs 1048576 -oseek $mb -count 1 s=`{basename $target} - disk/partfs -m /n/$s $target + if(~ $target *.amd64.qcow2.*){ + disk/qcowfs -n `{echo $mb '*1048576' | pc} $target + disk/partfs -m /n/$s /mnt/qcow/data + } + if not { + dd -if /dev/zero -of $target -bs 1048576 -oseek $mb -count 1 + disk/partfs -m /n/$s $target + } d=/n/$s/sdXX disk/mbr $d/data if(~ $target *.pi.img.* *.pi3.img.*){ @@ -169,6 +186,12 @@ cd:V: /tmp/9front.386.iso.gz disk/prep -bw -a^(nvram fs) $d/plan9 disk/format -d $d/dos $fatfiles } + if not if(~ $target *.amd64.qcow2.*){ + disk/mbr -m /386/mbr $d/data + disk/fdisk -baw $d/data + disk/prep -bw -a^(9fat nvram fs) $d/plan9 + disk/format -b /386/pbs -d -r 2 $d/9fat $fatfiles + } if not { disk/fdisk -baw $d/data disk/prep -bw -a^(9fat nvram fs) $d/plan9 diff --git a/sys/man/8/qcowfs b/sys/man/8/qcowfs new file mode 100644 index 000000000..3ce2b3720 --- /dev/null +++ b/sys/man/8/qcowfs @@ -0,0 +1,63 @@ +.TH QCOWFS 8 +.SH NAME +qcowfs \- QCOW2 file system +.SH SYNOPSIS +.B disk/qcowfs +[ +.B -n +.I size +] +[ +.B -m +.I mtpt +] +[ +.B -s +.I service +] +.I diskimage +.SH DESCRIPTION +.I Qcowfs +exposes a +.B data +file using the provided +.I diskimage +as the backing store. +.PP +The +.B -n +flag truncates +.I imagefile +with the specified +.I size +(in bytes) before using it. +The +.B -m +flag sets the +.I mtpt +(default /mnt/qcow). +The +.B -s +flag causes +.I qcowfs +to post its 9P service as +.BI /srv/ service \fR. +.SH EXAMPLES +Create a new QCOW2 diskimage and partition it +.IP +.EX +disk/qcowfs -n $size image.qcow2 +disk/partfs /mnt/qcow/data +disk/mbr -m /386/mbr /dev/sdXX/data +disk/fdisk -baw /dev/sdXX/data +disk/prep /dev/sdXX/plan9 +.EE +.SH SOURCE +.B /sys/src/cmd/disk/qcowfs.c +.SH SEE ALSO +.IR partfs (8), +.IR disksim (8), +.IR prep (8) +.SH HISTORY +.I qcowfs +first appeared in 9front (April, 2023). diff --git a/sys/src/cmd/disk/mkfile b/sys/src/cmd/disk/mkfile index e91b000d7..a6ee11a54 100644 --- a/sys/src/cmd/disk/mkfile +++ b/sys/src/cmd/disk/mkfile @@ -7,6 +7,7 @@ TARG=exsort\ mkfs\ partfs\ cryptsetup\ + qcowfs\ DIRS=\ 9660\ diff --git a/sys/src/cmd/disk/qcowfs.c b/sys/src/cmd/disk/qcowfs.c new file mode 100644 index 000000000..d26c7eb54 --- /dev/null +++ b/sys/src/cmd/disk/qcowfs.c @@ -0,0 +1,603 @@ +/* Adapted from OpenBSD's src/usr.sbin/vmd/vioqcow2.c */ +#include <u.h> +#include <libc.h> +#include <fcall.h> +#include <thread.h> +#include <9p.h> + +typedef struct Header Header; +typedef struct Disk Disk; + +struct Header { + char magic[4]; + u32int version; + u64int backingoff; + u32int backingsz; + u32int clustershift; + u64int disksz; + u32int cryptmethod; + u32int l1sz; + u64int l1off; + u64int refoff; + u32int refsz; + u32int snapcount; + u64int snapsz; + /* v3 additions */ + u64int incompatfeatures; + u64int compatfeatures; + u64int autoclearfeatures; + u32int reforder; /* Bits = 1 << reforder */ + u32int headersz; +}; + +#define QCOW2_COMPRESSED 0x4000000000000000ull +#define QCOW2_INPLACE 0x8000000000000000ull +char *MAGIC_QCOW = "QFI\xfb"; +enum{ + QCOW2_DIRTY = 1 << 0, + QCOW2_CORRUPT = 1 << 1, + + ICFEATURE_DIRTY = 1 << 0, + ICFEATURE_CORRUPT = 1 << 1, + + ACFEATURE_BITEXT = 1 << 0, + + HDRSZ = 4 + 4 + 8 + 4 + 4 + 8 + 4 + 4 + 8 + 8 + 4 + 4 + 8 + 8 + 8 + 8 + 4 + 4, +}; + +struct Disk { + RWLock lock; + Disk *base; + Header h; + + int fd; + u64int *l1; + s64int end; + s64int clustersz; + s64int disksz; /* In bytes */ + u32int cryptmethod; + + u32int l1sz; + s64int l1off; + + s64int refoff; + s64int refsz; + + u32int nsnap; + s64int snapoff; + + /* v3 features */ + u64int incompatfeatures; + u64int autoclearfeatures; + u32int refssz; + u32int headersz; +}; + +#define PUT2(p, u) (p)[0] = (u)>>8, (p)[1] = (u) +#define GET2(p) (u16int)(p)[2] | (u16int)(p)[1]<<8 +#define PUT4(p, u) (p)[0] = (u)>>24, (p)[1] = (u)>>16, (p)[2] = (u)>>8, (p)[3] = (u) +#define GET4(p) (u32int)(p)[3] | (u32int)(p)[2]<<8 | (u32int)(p)[1]<<16 | (u32int)(p)[0]<<24 + +#define PUT8(p, u) (p)[0] = (u)>>56, (p)[1] = (u)>>48, (p)[2] = (u)>>40, (p)[3] = (u)>>32, \ + (p)[4] = (u)>>24, (p)[5] = (u)>>16, (p)[6] = (u)>>8, (p)[7] = (u) + +#define GET8(p) (u64int)(p)[7] | (u64int)(p)[6]<<8 | (u64int)(p)[5]<<16 | (u64int)(p)[4]<<24 | \ + (u64int)(p)[3]<<32 | (u64int)(p)[2]<<40 | (u64int)(p)[1]<<48 | (u64int)(p)[0]<<56 + +int +ftruncate(int fd, s64int length) +{ + Dir d; + + if(length < 0) + return -1; + nulldir(&d); + d.length = length; + if(dirfwstat(fd, &d) < 0) + return -1; + return 0; +} + +static void +writehdr(Header *src, int fd) +{ + uchar store[HDRSZ]; + uchar *buf = store; + + memcpy(buf, src->magic, strlen(src->magic)); buf += 4; + PUT4(buf, src->version); buf += 4; + + PUT8(buf, src->backingoff); buf += 8; + PUT4(buf, src->backingsz); buf += 4; + PUT4(buf, src->clustershift); buf += 4; + PUT8(buf, src->disksz); buf += 8; + PUT4(buf, src->cryptmethod); buf += 4; + PUT4(buf, src->l1sz); buf += 4; + PUT8(buf, src->l1off); buf += 8; + PUT8(buf, src->refoff); buf += 8; + PUT4(buf, src->refsz); buf += 4; + PUT4(buf, src->snapcount); buf += 4; + PUT8(buf, src->snapsz); buf += 8; + PUT8(buf, src->incompatfeatures); buf += 8; + PUT8(buf, src->compatfeatures); buf += 8; + PUT8(buf, src->autoclearfeatures); buf += 8; + PUT4(buf, src->reforder); buf += 4; + PUT4(buf, src->headersz); + + if(write(fd, store, sizeof store) != sizeof store) + sysfatal("writehdr: %r"); +} + +static void +readhdr(Header *dst, int fd) +{ + uchar store[HDRSZ]; + uchar *buf = store; + + if(readn(fd, store, sizeof store) != sizeof store) + sysfatal("short read on header: %r"); + if(memcmp(MAGIC_QCOW, buf, strlen(MAGIC_QCOW)) != 0) + sysfatal("invalid magic"); + buf += 4; + + dst->version = GET4(buf); + if(dst->version != 2 && dst->version != 3) + sysfatal("unsupported version: %d", dst->version); + buf += 4; + + dst->backingoff = GET8(buf); buf += 8; + dst->backingsz = GET4(buf); buf += 4; + dst->clustershift = GET4(buf); buf += 4; + dst->disksz = GET8(buf); buf += 8; + dst->cryptmethod = GET4(buf); buf += 4; + dst->l1sz = GET4(buf); buf += 4; + dst->l1off = GET8(buf); buf += 8; + dst->refoff = GET8(buf); buf += 8; + dst->refsz = GET4(buf); buf += 4; + dst->snapcount = GET4(buf); buf += 4; + dst->snapsz = GET8(buf); buf += 8; + dst->incompatfeatures = GET8(buf); buf += 8; + dst->compatfeatures = GET8(buf); buf += 8; + dst->autoclearfeatures = GET8(buf); buf += 8; + dst->reforder = GET4(buf); buf += 4; + dst->headersz = GET4(buf); +} + +#define ALIGNSZ(sz, align) ((sz + align - 1) & ~(align - 1)) + +static void +qc2create(int fd, u64int disksz) +{ + Header hdr; + s64int base_len; + u64int l1sz, refsz, initsz, clustersz; + u64int l1off, refoff, i, l1entrysz, refentrysz; + uchar v[8], v2[2]; + + clustersz = 1<<16; + l1off = ALIGNSZ(HDRSZ, clustersz); + + l1entrysz = clustersz * clustersz / 8; + l1sz = (disksz + l1entrysz - 1) / l1entrysz; + + refoff = ALIGNSZ(l1off + 8*l1sz, clustersz); + refentrysz = clustersz * clustersz * clustersz / 2; + refsz = (disksz + refentrysz - 1) / refentrysz; + + initsz = ALIGNSZ(refoff + refsz*clustersz, clustersz); + base_len = 0; + + memcpy(hdr.magic, MAGIC_QCOW, strlen(MAGIC_QCOW)); + hdr.version = 3; + hdr.backingoff = 0; + hdr.backingsz = base_len; + hdr.clustershift = 16; + hdr.disksz = disksz; + hdr.cryptmethod = 0; + hdr.l1sz = l1sz; + hdr.l1off = l1off; + hdr.refoff = refoff; + hdr.refsz = refsz; + hdr.snapcount = 0; + hdr.snapsz = 0; + hdr.incompatfeatures = 0; + hdr.compatfeatures = 0; + hdr.autoclearfeatures = 0; + hdr.reforder = 4; + hdr.headersz = HDRSZ; + + writehdr(&hdr, fd); + if(ftruncate(fd, (s64int)initsz + clustersz) == -1) + sysfatal("ftruncate: %r"); + + assert(initsz/clustersz < clustersz/2); + + PUT8(v, initsz); + if(pwrite(fd, v, sizeof v, refoff) != sizeof v) + sysfatal("q2create: pwrite: %r"); + + for(i=0; i < initsz/clustersz + 1; i++){ + PUT2(v2, 1); + if(pwrite(fd, v2, sizeof v2, initsz + 2*i) != sizeof v2) + sysfatal("q2create: pwrite: %r"); + } +} + +static void +qc2open(Disk *disk, int fd) +{ + int i; + Dir *d; + uchar buf[8]; + + disk->fd = fd; + disk->base = nil; + disk->l1 = nil; + readhdr(&disk->h, disk->fd); + + disk->clustersz = 1ull << disk->h.clustershift; + disk->disksz = disk->h.disksz; + disk->cryptmethod = disk->h.cryptmethod; + disk->l1sz = disk->h.l1sz; + disk->l1off = disk->h.l1off; + disk->refsz = disk->h.refsz; + disk->refoff = disk->h.refoff; + disk->nsnap = disk->h.snapcount; + disk->snapoff = disk->h.snapsz; + + disk->incompatfeatures = disk->h.incompatfeatures; + disk->autoclearfeatures = disk->h.autoclearfeatures; + disk->refssz = disk->h.refsz; + disk->headersz = disk->h.headersz; + + if(disk->h.reforder != 4) + sysfatal("unsupoprted refcount size %d", disk->h.reforder); + + disk->l1 = mallocz(disk->l1sz * 8, 1); + pread(disk->fd, disk->l1, disk->l1sz * 8, disk->l1off); + for(i = 0; i < disk->l1sz; i++){ + memcpy(buf, disk->l1 + i, sizeof buf); + disk->l1[i] = GET8(buf); + } + + d = dirfstat(fd); + if(d == nil) + sysfatal("dirfstat: %r"); + disk->end = d->length; + free(d); +} + +static u64int +xlate(Disk *disk, s64int off, int *inplace) +{ + s64int l2sz, l1off, l2tab, l2off, cluster, clusteroff; + uchar buf[8]; + + /* + * Clear out inplace flag -- xlate misses should not + * be flagged as updatable in place. We will still + * return 0 from them, but this leaves less surprises + * in the API. + */ + if (inplace) + *inplace = 0; + rlock(&disk->lock); + if (off < 0) + goto err; + + l2sz = disk->clustersz / 8; + l1off = (off / disk->clustersz) / l2sz; + if (l1off >= disk->l1sz) + goto err; + + l2tab = disk->l1[l1off]; + l2tab &= ~QCOW2_INPLACE; + if (l2tab == 0) { + runlock(&disk->lock); + return 0; + } + l2off = (off / disk->clustersz) % l2sz; + pread(disk->fd, buf, sizeof(buf), l2tab + l2off * 8); + cluster = GET8(buf); + /* + * cluster may be 0, but all future operations don't affect + * the return value. + */ + if (inplace) + *inplace = !!(cluster & QCOW2_INPLACE); + if (cluster & QCOW2_COMPRESSED) + sysfatal("xlate: compressed clusters unsupported"); + runlock(&disk->lock); + clusteroff = 0; + cluster &= ~QCOW2_INPLACE; + if (cluster) + clusteroff = off % disk->clustersz; + return cluster + clusteroff; +err: + runlock(&disk->lock); + return -1; +} + +static void +inc_refs(Disk *disk, s64int off, int newcluster) +{ + s64int l1off, l1idx, l2idx, l2cluster; + u64int nper; + u16int refs; + uchar buf[8], buf2[2]; + + off &= ~QCOW2_INPLACE; + nper = disk->clustersz / 2; + l1idx = (off / disk->clustersz) / nper; + l2idx = (off / disk->clustersz) % nper; + l1off = disk->refoff + 8 * l1idx; + if (pread(disk->fd, buf, sizeof(buf), l1off) != 8) + sysfatal("could not read refs"); + + l2cluster = GET8(buf); + if (l2cluster == 0) { + l2cluster = disk->end; + disk->end += disk->clustersz; + if (ftruncate(disk->fd, disk->end) < 0) + sysfatal("inc_refs: failed to allocate ref block"); + PUT8(buf, l2cluster); + if (pwrite(disk->fd, buf, sizeof(buf), l1off) != 8) + sysfatal("inc_refs: failed to write ref block"); + } + + refs = 1; + if (!newcluster) { + if (pread(disk->fd, buf2, sizeof buf2, + l2cluster + 2 * l2idx) != 2) + sysfatal("could not read ref cluster"); + refs = GET2(buf2) + 1; + } + PUT2(buf2, refs); + if (pwrite(disk->fd, buf2, sizeof buf2, l2cluster + 2 * l2idx) != 2) + sysfatal("inc_refs: could not write ref block"); +} + +static void +copy_cluster(Disk *disk, Disk *base, u64int dst, u64int src) +{ + char *scratch; + + scratch = malloc(disk->clustersz); + if(!scratch) + sysfatal("out of memory"); + src &= ~(disk->clustersz - 1); + dst &= ~(disk->clustersz - 1); + if(pread(base->fd, scratch, disk->clustersz, src) == -1) + sysfatal("copy_cluster: could not read cluster"); + if(pwrite(disk->fd, scratch, disk->clustersz, dst) == -1) + sysfatal("copy_cluster: could not write cluster"); + free(scratch); +} + +/* + * Allocates a new cluster on disk, creating a new L2 table + * if needed. The cluster starts off with a refs of one, + * and the writable bit set. + * + * Returns -1 on error, and the physical address within the + * cluster of the write offset if it exists. + */ +static s64int +mkcluster(Disk *disk, Disk *base, s64int off, s64int src_phys) +{ + s64int l2sz, l1off, l2tab, l2off, cluster, clusteroff, orig; + uchar buf[8]; + + wlock(&disk->lock); + + /* L1 entries always exist */ + l2sz = disk->clustersz / 8; + l1off = off / (disk->clustersz * l2sz); + if (l1off >= disk->l1sz) + sysfatal("l1 offset outside disk"); + + disk->end = (disk->end + disk->clustersz - 1) & ~(disk->clustersz - 1); + + l2tab = disk->l1[l1off]; + l2off = (off / disk->clustersz) % l2sz; + /* We may need to create or clone an L2 entry to map the block */ + if (l2tab == 0 || (l2tab & QCOW2_INPLACE) == 0) { + orig = l2tab & ~QCOW2_INPLACE; + l2tab = disk->end; + disk->end += disk->clustersz; + if (ftruncate(disk->fd, disk->end) == -1) + sysfatal("mkcluster: ftruncate failed"); + + /* + * If we translated, found a L2 entry, but it needed to + * be copied, copy it. + */ + if (orig != 0) + copy_cluster(disk, disk, l2tab, orig); + /* Update l1 -- we flush it later */ + disk->l1[l1off] = l2tab | QCOW2_INPLACE; + inc_refs(disk, l2tab, 1); + } + l2tab &= ~QCOW2_INPLACE; + + /* Grow the disk */ + if (ftruncate(disk->fd, disk->end + disk->clustersz) < 0) + sysfatal("mkcluster: could not grow disk"); + if (src_phys > 0) + copy_cluster(disk, base, disk->end, src_phys); + cluster = disk->end; + disk->end += disk->clustersz; + PUT8(buf, cluster | QCOW2_INPLACE); + if (pwrite(disk->fd, buf, sizeof(buf), l2tab + l2off * 8) != 8) + sysfatal("mkcluster: could not write cluster"); + + PUT8(buf, disk->l1[l1off]); + if (pwrite(disk->fd, buf, sizeof(buf), disk->l1off + 8 * l1off) != 8) + sysfatal("mkcluster: could not write l1"); + inc_refs(disk, cluster, 1); + + wunlock(&disk->lock); + clusteroff = off % disk->clustersz; + if (cluster + clusteroff < disk->clustersz) + sysfatal("write would clobber header"); + return cluster + clusteroff; +} + +static void +fsread(Req *r) +{ + char *buf; + Disk *disk, *d; + s64int off, phys_off, end, cluster_off; + u64int len, sz, rem; + + off = r->ifcall.offset; + buf = r->ofcall.data; + len = r->ifcall.count; + disk = d = r->fid->file->aux; + + end = off + len; + if(end > d->disksz) + len -= end - d->disksz; + + rem = len; + while(rem != 0){ + phys_off = xlate(d, off, nil); + if(phys_off <= 0) + d = nil; + cluster_off = off % disk->clustersz; + sz = disk->clustersz - cluster_off; + if(sz > rem) + sz = rem; + + if(!d) + memset(buf, 0, sz); + else + sz = pread(d->fd, buf, sz, phys_off); + off += sz; + buf += sz; + rem -= sz; + } + r->ofcall.count = len; + respond(r, nil); +} + +static void +fswrite(Req *r) +{ + char *buf; + Disk *d; + s64int off, phys_off, end, cluster_off; + u64int len, sz, rem; + int inplace; + + off = r->ifcall.offset; + buf = r->ifcall.data; + len = r->ifcall.count; + d = r->fid->file->aux; + inplace = 1; + + end = off + len; + if(end > d->disksz){ + respond(r, "end of device"); + return; + } + + rem = len; + while(off != end){ + cluster_off = off % d->clustersz; + sz = d->clustersz - cluster_off; + if(sz > rem) + sz = rem; + phys_off = xlate(d, off, nil); + if(phys_off == -1){ + respond(r, "xlate error"); + return; + } + + if(!inplace || phys_off == 0) + phys_off = mkcluster(d, d, off, phys_off); + if(phys_off == -1){ + respond(r, "mkcluster error"); + return; + } + if(phys_off < d->clustersz) + sysfatal("fswrite: writing reserved cluster"); + if(pwrite(d->fd, buf, sz, phys_off) != sz){ + respond(r, "phase error"); + return; + } + off += sz; + buf += sz; + rem -= sz; + } + + r->ofcall.count = len; + respond(r, nil); +} + +Srv fs = { +.read = fsread, +.write = fswrite, +}; + +static void +usage(void) +{ + fprint(2, "usage: %s [-s srv] [-m mntpt ] [-n size] file\n", argv0); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + int fd; + char *uid; + File *f; + Disk *d; + uvlong size; + int nflag; + char *mntpt = "/mnt/qcow"; + char *srvname = nil; + + size = 0; + nflag = 0; + ARGBEGIN{ + case 'm': + mntpt = EARGF(usage()); + break; + case 'n': + size = strtoull(EARGF(usage()), nil, 0); + nflag++; + break; + case 's': + srvname = EARGF(usage()); + break; + default: + usage(); + break; + }ARGEND + if(argc < 1) + usage(); + + if(nflag){ + if((fd = create(argv[0], ORDWR, 0666)) < 0) + sysfatal("create: %r"); + qc2create(fd, size); + seek(fd, 0, 0); + } else if((fd = open(argv[0], ORDWR)) < 0) + sysfatal("open: %r"); + + uid = getuser(); + fs.tree = alloctree(uid, uid, 0755, nil); + if(fs.tree == nil) + sysfatal("alloctree: %r"); + + f = createfile(fs.tree->root, "data", uid, 0666, nil); + d = mallocz(sizeof(Disk), 1); + qc2open(d, fd); + f->aux = d; + f->length = d->disksz; + postmountsrv(&fs, srvname, mntpt, MREPL); + exits(nil); +} |