# HG changeset patch # User Ron Minnich # Date 1317421452 25200 # Node ID e1f6266f79c159d13097c3c174367c22c0573f5f # Parent 9c93e936f28e4066db82d2198b1b30e293ac97b9 another go. Get rid of the .h files. Very few C files. It actually works as a venti and as a vac. It's not pretty but it will let me experiment. R=nix-dev, noah.evans CC=nix-dev http://codereview.appspot.com/5154049 diff -r 9c93e936f28e -r e1f6266f79c1 sys/src/cmd/vtvacfs/dat.h --- a/sys/src/cmd/vtvacfs/dat.h Thu Sep 29 20:05:50 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -typedef struct MetaBlock MetaBlock; -typedef struct MetaEntry MetaEntry; - -#define MaxBlock (1UL<<31) - -enum { - BytesPerEntry = 100, /* estimate of bytes per dir entries - determines number of index entries in the block */ - FullPercentage = 80, /* don't allocate in block if more than this percentage full */ - FlushSize = 200, /* number of blocks to flush */ - DirtyPercentage = 50 /* maximum percentage of dirty blocks */ -}; - - -struct MetaEntry -{ - uchar *p; - ushort size; -}; - -struct MetaBlock -{ - int maxsize; /* size of block */ - int size; /* size used */ - int free; /* free space within used size */ - int maxindex; /* entries allocated for table */ - int nindex; /* amount of table used */ - int unbotch; - uchar *buf; -}; - -struct VacDirEnum -{ - VacFile *file; - u32int boff; - int i, n; - VacDir *buf; -}; - diff -r 9c93e936f28e -r e1f6266f79c1 sys/src/cmd/vtvacfs/error.h --- a/sys/src/cmd/vtvacfs/error.h Thu Sep 29 20:05:50 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -/* - * Somehow has been included on Mac OS X - */ -#undef EIO - -extern char ENoDir[]; -extern char EBadDir[]; -extern char EBadMeta[]; -extern char ENilBlock[]; -extern char ENotDir[]; -extern char ENotFile[]; -extern char EIO[]; -extern char EBadOffset[]; -extern char ETooBig[]; -extern char EReadOnly[]; -extern char ERemoved[]; -extern char ENotEmpty[]; -extern char EExists[]; -extern char ERoot[]; -extern char ENoFile[]; -extern char EBadPath[]; diff -r 9c93e936f28e -r e1f6266f79c1 sys/src/cmd/vtvacfs/file.c --- a/sys/src/cmd/vtvacfs/file.c Thu Sep 29 20:05:50 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2099 +0,0 @@ -#include "stdinc.h" -#include "vac.h" -#include "dat.h" -#include "fns.h" -#include "error.h" - -#define debug 0 - -/* - * Vac file system. This is a simplified version of the same code in Fossil. - * - * The locking order in the tree is upward: a thread can hold the lock - * for a VacFile and then acquire the lock of f->up (the parent), - * but not vice-versa. - * - * A vac file is one or two venti files. Plain data files are one venti file, - * while directores are two: a venti data file containing traditional - * directory entries, and a venti directory file containing venti - * directory entries. The traditional directory entries in the data file - * contain integers indexing into the venti directory entry file. - * It's a little complicated, but it makes the data usable by standard - * tools like venti/copy. - * - */ - -static int filemetaflush(VacFile*, char*); - -struct VacFile -{ - VacFs *fs; /* immutable */ - - /* meta data for file: protected by the lk in the parent */ - int ref; /* holds this data structure up */ - - int partial; /* file was never really open */ - int removed; /* file has been removed */ - int dirty; /* dir is dirty with respect to meta data in block */ - u32int boff; /* block offset within msource for this file's metadata */ - VacDir dir; /* metadata for this file */ - VacFile *up; /* parent file */ - VacFile *next; /* sibling */ - - RWLock lk; /* lock for the following */ - VtFile *source; /* actual data */ - VtFile *msource; /* metadata for children in a directory */ - VacFile *down; /* children */ - int mode; - - uvlong qidoffset; /* qid offset */ -}; - -static VacFile* -filealloc(VacFs *fs) -{ - VacFile *f; - - f = vtmallocz(sizeof(VacFile)); - f->ref = 1; - f->fs = fs; - f->boff = NilBlock; - f->mode = fs->mode; - return f; -} - -static void -filefree(VacFile *f) -{ - vtfileclose(f->source); - vtfileclose(f->msource); - vdcleanup(&f->dir); - memset(f, ~0, sizeof *f); /* paranoia */ - vtfree(f); -} - -static int -chksource(VacFile *f) -{ - if(f->partial) - return 0; - - if(f->source == nil - || ((f->dir.mode & ModeDir) && f->msource == nil)){ - werrstr(ERemoved); - return -1; - } - return 0; -} - -static int -filelock(VacFile *f) -{ - wlock(&f->lk); - if(chksource(f) < 0){ - wunlock(&f->lk); - return -1; - } - return 0; -} - -static void -fileunlock(VacFile *f) -{ - wunlock(&f->lk); -} - -static int -filerlock(VacFile *f) -{ - rlock(&f->lk); - if(chksource(f) < 0){ - runlock(&f->lk); - return -1; - } - return 0; -} - -static void -filerunlock(VacFile *f) -{ - runlock(&f->lk); -} - -/* - * The file metadata, like f->dir and f->ref, - * are synchronized via the parent's lock. - * This is why locking order goes up. - */ -static void -filemetalock(VacFile *f) -{ - assert(f->up != nil); - wlock(&f->up->lk); -} - -static void -filemetaunlock(VacFile *f) -{ - wunlock(&f->up->lk); -} - -uvlong -vacfilegetid(VacFile *f) -{ - /* immutable */ - return f->qidoffset + f->dir.qid; -} - -uvlong -vacfilegetqidoffset(VacFile *f) -{ - return f->qidoffset; -} - -ulong -vacfilegetmcount(VacFile *f) -{ - ulong mcount; - - filemetalock(f); - mcount = f->dir.mcount; - filemetaunlock(f); - return mcount; -} - -ulong -vacfilegetmode(VacFile *f) -{ - ulong mode; - - filemetalock(f); - mode = f->dir.mode; - filemetaunlock(f); - return mode; -} - -int -vacfileisdir(VacFile *f) -{ - /* immutable */ - return (f->dir.mode & ModeDir) != 0; -} - -int -vacfileisroot(VacFile *f) -{ - return f == f->fs->root; -} - -/* - * The files are reference counted, and while the reference - * is bigger than zero, each file can be found in its parent's - * f->down list (chains via f->next), so that multiple threads - * end up sharing a VacFile* when referring to the same file. - * - * Each VacFile holds a reference to its parent. - */ -VacFile* -vacfileincref(VacFile *vf) -{ - filemetalock(vf); - assert(vf->ref > 0); - vf->ref++; - filemetaunlock(vf); - return vf; -} - -int -vacfiledecref(VacFile *f) -{ - VacFile *p, *q, **qq; - - if(f->up == nil){ - /* never linked in */ - assert(f->ref == 1); - filefree(f); - return 0; - } - - filemetalock(f); - f->ref--; - if(f->ref > 0){ - filemetaunlock(f); - return -1; - } - assert(f->ref == 0); - assert(f->down == nil); - - if(f->source && vtfilelock(f->source, -1) >= 0){ - vtfileflush(f->source); - vtfileunlock(f->source); - } - if(f->msource && vtfilelock(f->msource, -1) >= 0){ - vtfileflush(f->msource); - vtfileunlock(f->msource); - } - - /* - * Flush f's directory information to the cache. - */ - filemetaflush(f, nil); - - p = f->up; - qq = &p->down; - for(q = *qq; q; q = *qq){ - if(q == f) - break; - qq = &q->next; - } - assert(q != nil); - *qq = f->next; - - filemetaunlock(f); - filefree(f); - vacfiledecref(p); - return 0; -} - - -/* - * Construct a vacfile for the root of a vac tree, given the - * venti file for the root information. That venti file is a - * directory file containing VtEntries for three more venti files: - * the two venti files making up the root directory, and a - * third venti file that would be the metadata half of the - * "root's parent". - * - * Fossil generates slightly different vac files, due to a now - * impossible-to-change bug, which contain a VtEntry - * for just one venti file, that itself contains the expected - * three directory entries. Sigh. - */ -VacFile* -_vacfileroot(VacFs *fs, VtFile *r) -{ - int redirected; - char err[ERRMAX]; - VtBlock *b; - VtFile *r0, *r1, *r2; - MetaBlock mb; - MetaEntry me; - VacFile *root, *mr; - - redirected = 0; -Top: - b = nil; - root = nil; - mr = nil; - r1 = nil; - r2 = nil; - - if(vtfilelock(r, -1) < 0) - return nil; - r0 = vtfileopen(r, 0, fs->mode); - if(debug) - fprint(2, "r0 %p\n", r0); - if(r0 == nil) - goto Err; - r2 = vtfileopen(r, 2, fs->mode); - if(debug) - fprint(2, "r2 %p\n", r2); - if(r2 == nil){ - /* - * some vac files (e.g., from fossil) - * have an extra layer of indirection. - */ - rerrstr(err, sizeof err); - if(!redirected && strstr(err, "not active")){ - redirected = 1; - vtfileunlock(r); - r = r0; - goto Top; - } - goto Err; - } - r1 = vtfileopen(r, 1, fs->mode); - if(debug) - fprint(2, "r1 %p\n", r1); - if(r1 == nil) - goto Err; - - mr = filealloc(fs); - mr->msource = r2; - r2 = nil; - - root = filealloc(fs); - root->boff = 0; - root->up = mr; - root->source = r0; - r0 = nil; - root->msource = r1; - r1 = nil; - - mr->down = root; - vtfileunlock(r); - - if(vtfilelock(mr->msource, VtOREAD) < 0) - goto Err1; - b = vtfileblock(mr->msource, 0, VtOREAD); - vtfileunlock(mr->msource); - if(b == nil) - goto Err1; - - if(mbunpack(&mb, b->data, mr->msource->dsize) < 0) - goto Err1; - - meunpack(&me, &mb, 0); - if(vdunpack(&root->dir, &me) < 0) - goto Err1; - vtblockput(b); - - return root; -Err: - vtfileunlock(r); -Err1: - vtblockput(b); - if(r0) - vtfileclose(r0); - if(r1) - vtfileclose(r1); - if(r2) - vtfileclose(r2); - if(mr) - filefree(mr); - if(root) - filefree(root); - - return nil; -} - -/* - * Vac directories are a sequence of metablocks, each of which - * contains a bunch of metaentries sorted by file name. - * The whole sequence isn't sorted, though, so you still have - * to look at every block to find a given name. - * Dirlookup looks in f for an element name elem. - * It returns a new VacFile with the dir, boff, and mode - * filled in, but the sources (venti files) are not, and f is - * not yet linked into the tree. These details must be taken - * care of by the caller. - * - * f must be locked, f->msource must not. - */ -static VacFile* -dirlookup(VacFile *f, char *elem) -{ - int i; - MetaBlock mb; - MetaEntry me; - VtBlock *b; - VtFile *meta; - VacFile *ff; - u32int bo, nb; - - meta = f->msource; - b = nil; - if(vtfilelock(meta, -1) < 0) - return nil; - nb = (vtfilegetsize(meta)+meta->dsize-1)/meta->dsize; - for(bo=0; bodata, meta->dsize) < 0) - goto Err; - if(mbsearch(&mb, elem, &i, &me) >= 0){ - ff = filealloc(f->fs); - if(vdunpack(&ff->dir, &me) < 0){ - filefree(ff); - goto Err; - } - ff->qidoffset = f->qidoffset + ff->dir.qidoffset; - vtfileunlock(meta); - vtblockput(b); - ff->boff = bo; - ff->mode = f->mode; - return ff; - } - vtblockput(b); - b = nil; - } - werrstr(ENoFile); - /* fall through */ -Err: - vtfileunlock(meta); - vtblockput(b); - return nil; -} - -/* - * Open the venti file at offset in the directory f->source. - * f is locked. - */ -static VtFile * -fileopensource(VacFile *f, u32int offset, u32int gen, int dir, uint mode) -{ - VtFile *r; - - if((r = vtfileopen(f->source, offset, mode)) == nil) - return nil; - if(r == nil) - return nil; - if(r->gen != gen){ - werrstr(ERemoved); - vtfileclose(r); - return nil; - } - if(r->dir != dir && r->mode != -1){ - werrstr(EBadMeta); - vtfileclose(r); - return nil; - } - return r; -} - -VacFile* -vacfilegetparent(VacFile *f) -{ - if(vacfileisroot(f)) - return vacfileincref(f); - return vacfileincref(f->up); -} - -/* - * Given an unlocked vacfile (directory) f, - * return the vacfile named elem in f. - * Interprets . and .. as a convenience to callers. - */ -VacFile* -vacfilewalk(VacFile *f, char *elem) -{ - VacFile *ff; - - if(elem[0] == 0){ - werrstr(EBadPath); - return nil; - } - - if(!vacfileisdir(f)){ - werrstr(ENotDir); - return nil; - } - - if(strcmp(elem, ".") == 0) - return vacfileincref(f); - - if(strcmp(elem, "..") == 0) - return vacfilegetparent(f); - - if(filelock(f) < 0) - return nil; - - for(ff = f->down; ff; ff=ff->next){ - if(strcmp(elem, ff->dir.elem) == 0 && !ff->removed){ - ff->ref++; - goto Exit; - } - } - - ff = dirlookup(f, elem); - if(ff == nil) - goto Err; - - if(ff->dir.mode & ModeSnapshot) - ff->mode = VtOREAD; - - if(vtfilelock(f->source, f->mode) < 0) - goto Err; - if(ff->dir.mode & ModeDir){ - ff->source = fileopensource(f, ff->dir.entry, ff->dir.gen, 1, ff->mode); - ff->msource = fileopensource(f, ff->dir.mentry, ff->dir.mgen, 0, ff->mode); - if(ff->source == nil || ff->msource == nil) - goto Err1; - }else{ - ff->source = fileopensource(f, ff->dir.entry, ff->dir.gen, 0, ff->mode); - if(ff->source == nil) - goto Err1; - } - vtfileunlock(f->source); - - /* link in and up parent ref count */ - ff->next = f->down; - f->down = ff; - ff->up = f; - vacfileincref(f); -Exit: - fileunlock(f); - return ff; - -Err1: - vtfileunlock(f->source); -Err: - fileunlock(f); - if(ff != nil) - vacfiledecref(ff); - return nil; -} - -/* - * Open a path in the vac file system: - * just walk each element one at a time. - */ -VacFile* -vacfileopen(VacFs *fs, char *path) -{ - VacFile *f, *ff; - char *p, elem[VtMaxStringSize], *opath; - int n; - - f = fs->root; - vacfileincref(f); - opath = path; - while(*path != 0){ - for(p = path; *p && *p != '/'; p++) - ; - n = p - path; - if(n > 0){ - if(n > VtMaxStringSize){ - werrstr("%s: element too long", EBadPath); - goto Err; - } - memmove(elem, path, n); - elem[n] = 0; - ff = vacfilewalk(f, elem); - if(ff == nil){ - werrstr("%.*s: %r", utfnlen(opath, p-opath), opath); - goto Err; - } - vacfiledecref(f); - f = ff; - } - if(*p == '/') - p++; - path = p; - } - return f; -Err: - vacfiledecref(f); - return nil; -} - -/* - * Extract the score for the bn'th block in f. - */ -int -vacfileblockscore(VacFile *f, u32int bn, u8int *score) -{ - VtFile *s; - uvlong size; - int dsize, ret; - - ret = -1; - if(filerlock(f) < 0) - return -1; - if(vtfilelock(f->source, VtOREAD) < 0) - goto out; - - s = f->source; - dsize = s->dsize; - size = vtfilegetsize(s); - if((uvlong)bn*dsize >= size) - goto out1; - ret = vtfileblockscore(f->source, bn, score); - -out1: - vtfileunlock(f->source); -out: - filerunlock(f); - return ret; -} - -/* - * Read data from f. - */ -int -vacfileread(VacFile *f, void *buf, int cnt, vlong offset) -{ - int n; - - if(offset < 0){ - werrstr(EBadOffset); - return -1; - } - if(filerlock(f) < 0) - return -1; - if(vtfilelock(f->source, VtOREAD) < 0){ - filerunlock(f); - return -1; - } - n = vtfileread(f->source, buf, cnt, offset); - vtfileunlock(f->source); - filerunlock(f); - return n; -} - -static int -getentry(VtFile *f, VtEntry *e) -{ - if(vtfilelock(f, VtOREAD) < 0) - return -1; - if(vtfilegetentry(f, e) < 0){ - vtfileunlock(f); - return -1; - } - vtfileunlock(f); - if(vtglobaltolocal(e->score) != NilBlock){ - werrstr("internal error - data not on venti"); - return -1; - } - return 0; -} - -/* - * Get the VtEntries for the data contained in f. - */ -int -vacfilegetentries(VacFile *f, VtEntry *e, VtEntry *me) -{ - if(filerlock(f) < 0) - return -1; - if(e && getentry(f->source, e) < 0){ - filerunlock(f); - return -1; - } - if(me){ - if(f->msource == nil) - memset(me, 0, sizeof *me); - else if(getentry(f->msource, me) < 0){ - filerunlock(f); - return -1; - } - } - filerunlock(f); - return 0; -} - -/* - * Get the file's size. - */ -int -vacfilegetsize(VacFile *f, uvlong *size) -{ - if(filerlock(f) < 0) - return -1; - if(vtfilelock(f->source, VtOREAD) < 0){ - filerunlock(f); - return -1; - } - *size = vtfilegetsize(f->source); - vtfileunlock(f->source); - filerunlock(f); - - return 0; -} - -/* - * Directory reading. - * - * A VacDirEnum is a buffer containing directory entries. - * Directory entries contain malloced strings and need to - * be cleaned up with vdcleanup. The invariant in the - * VacDirEnum is that the directory entries between - * vde->i and vde->n are owned by the vde and need to - * be cleaned up if it is closed. Those from 0 up to vde->i - * have been handed to the reader, and the reader must - * take care of calling vdcleanup as appropriate. - */ -VacDirEnum* -vdeopen(VacFile *f) -{ - VacDirEnum *vde; - VacFile *p; - - if(!vacfileisdir(f)){ - werrstr(ENotDir); - return nil; - } - - /* - * There might be changes to this directory's children - * that have not been flushed out into the cache yet. - * Those changes are only available if we look at the - * VacFile structures directory. But the directory reader - * is going to read the cache blocks directly, so update them. - */ - if(filelock(f) < 0) - return nil; - for(p=f->down; p; p=p->next) - filemetaflush(p, nil); - fileunlock(f); - - vde = vtmallocz(sizeof(VacDirEnum)); - vde->file = vacfileincref(f); - - return vde; -} - -/* - * Figure out the size of the directory entry at offset. - * The rest of the metadata is kept in the data half, - * but since venti has to track the data size anyway, - * we just use that one and avoid updating the directory - * each time the file size changes. - */ -static int -direntrysize(VtFile *s, ulong offset, ulong gen, uvlong *size) -{ - VtBlock *b; - ulong bn; - VtEntry e; - int epb; - - epb = s->dsize/VtEntrySize; - bn = offset/epb; - offset -= bn*epb; - - b = vtfileblock(s, bn, VtOREAD); - if(b == nil) - goto Err; - if(vtentryunpack(&e, b->data, offset) < 0) - goto Err; - - /* dangling entries are returned as zero size */ - if(!(e.flags & VtEntryActive) || e.gen != gen) - *size = 0; - else - *size = e.size; - vtblockput(b); - return 0; - -Err: - vtblockput(b); - return -1; -} - -/* - * Fill in vde with a new batch of directory entries. - */ -static int -vdefill(VacDirEnum *vde) -{ - int i, n; - VtFile *meta, *source; - MetaBlock mb; - MetaEntry me; - VacFile *f; - VtBlock *b; - VacDir *de; - - /* clean up first */ - for(i=vde->i; in; i++) - vdcleanup(vde->buf+i); - vtfree(vde->buf); - vde->buf = nil; - vde->i = 0; - vde->n = 0; - - f = vde->file; - - source = f->source; - meta = f->msource; - - b = vtfileblock(meta, vde->boff, VtOREAD); - if(b == nil) - goto Err; - if(mbunpack(&mb, b->data, meta->dsize) < 0) - goto Err; - - n = mb.nindex; - vde->buf = vtmalloc(n * sizeof(VacDir)); - - for(i=0; ibuf + i; - meunpack(&me, &mb, i); - if(vdunpack(de, &me) < 0) - goto Err; - vde->n++; - if(!(de->mode & ModeDir)) - if(direntrysize(source, de->entry, de->gen, &de->size) < 0) - goto Err; - } - vde->boff++; - vtblockput(b); - return 0; -Err: - vtblockput(b); - return -1; -} - -/* - * Read a single directory entry from vde into de. - * Returns -1 on error, 0 on EOF, and 1 on success. - * When it returns 1, it becomes the caller's responsibility - * to call vdcleanup(de) to free the strings contained - * inside, or else to call vdunread to give it back. - */ -int -vderead(VacDirEnum *vde, VacDir *de) -{ - int ret; - VacFile *f; - u32int nb; - - f = vde->file; - if(filerlock(f) < 0) - return -1; - - if(vtfilelock2(f->source, f->msource, VtOREAD) < 0){ - filerunlock(f); - return -1; - } - - nb = (vtfilegetsize(f->msource)+f->msource->dsize-1)/f->msource->dsize; - - while(vde->i >= vde->n){ - if(vde->boff >= nb){ - ret = 0; - goto Return; - } - if(vdefill(vde) < 0){ - ret = -1; - goto Return; - } - } - - memmove(de, vde->buf + vde->i, sizeof(VacDir)); - vde->i++; - ret = 1; - -Return: - vtfileunlock(f->source); - vtfileunlock(f->msource); - filerunlock(f); - - return ret; -} - -/* - * "Unread" the last directory entry that was read, - * so that the next vderead will return the same one. - * If the caller calls vdeunread(vde) it should not call - * vdcleanup on the entry being "unread". - */ -int -vdeunread(VacDirEnum *vde) -{ - if(vde->i > 0){ - vde->i--; - return 0; - } - return -1; -} - -/* - * Close the enumerator. - */ -void -vdeclose(VacDirEnum *vde) -{ - int i; - if(vde == nil) - return; - /* free the strings */ - for(i=vde->i; in; i++) - vdcleanup(vde->buf+i); - vtfree(vde->buf); - vacfiledecref(vde->file); - vtfree(vde); -} - - -/* - * On to mutation. If the vac file system has been opened - * read-write, then the files and directories can all be edited. - * Changes are kept in the in-memory cache until flushed out - * to venti, so we must be careful to explicitly flush data - * that we're not likely to modify again. - * - * Each VacFile has its own copy of its VacDir directory entry - * in f->dir, but otherwise the cache is the authoratative source - * for data. Thus, for the most part, it suffices if we just - * call vtfileflushbefore and vtfileflush when we modify things. - * There are a few places where we have to remember to write - * changed VacDirs back into the cache. If f->dir *is* out of sync, - * then f->dirty should be set. - * - * The metadata in a directory is, to venti, a plain data file, - * but as mentioned above it is actually a sequence of - * MetaBlocks that contain sorted lists of VacDir entries. - * The filemetaxxx routines manipulate that stream. - */ - -/* - * Find space in fp for the directory entry dir (not yet written to disk) - * and write it to disk, returning NilBlock on failure, - * or the block number on success. - * - * Start is a suggested block number to try. - * The caller must have filemetalock'ed f and have - * vtfilelock'ed f->up->msource. - */ -static u32int -filemetaalloc(VacFile *fp, VacDir *dir, u32int start) -{ - u32int nb, bo; - VtBlock *b; - MetaBlock mb; - int nn; - uchar *p; - int i, n; - MetaEntry me; - VtFile *ms; - - ms = fp->msource; - n = vdsize(dir, VacDirVersion); - - /* Look for a block with room for a new entry of size n. */ - nb = (vtfilegetsize(ms)+ms->dsize-1)/ms->dsize; - if(start == NilBlock){ - if(nb > 0) - start = nb - 1; - else - start = 0; - } - - if(start > nb) - start = nb; - for(bo=start; bodata, ms->dsize) < 0) - goto Err; - nn = (mb.maxsize*FullPercentage/100) - mb.size + mb.free; - if(n <= nn && mb.nindex < mb.maxindex){ - /* reopen for writing */ - vtblockput(b); - if((b = vtfileblock(ms, bo, VtORDWR)) == nil) - goto Err; - mbunpack(&mb, b->data, ms->dsize); - goto Found; - } - vtblockput(b); - } - - /* No block found, extend the file by one metablock. */ - vtfileflushbefore(ms, nb*(uvlong)ms->dsize); - if((b = vtfileblock(ms, nb, VtORDWR)) == nil) - goto Err; - vtfilesetsize(ms, (nb+1)*ms->dsize); - mbinit(&mb, b->data, ms->dsize, ms->dsize/BytesPerEntry); - -Found: - /* Now we have a block; allocate space to write the entry. */ - p = mballoc(&mb, n); - if(p == nil){ - /* mballoc might have changed block */ - mbpack(&mb); - werrstr(EBadMeta); - goto Err; - } - - /* Figure out where to put the index entry, and write it. */ - mbsearch(&mb, dir->elem, &i, &me); - assert(me.p == nil); /* not already there */ - me.p = p; - me.size = n; - vdpack(dir, &me, VacDirVersion); - mbinsert(&mb, i, &me); - mbpack(&mb); - vtblockput(b); - return bo; - -Err: - vtblockput(b); - return NilBlock; -} - -/* - * Update f's directory entry in the block cache. - * We look for the directory entry by name; - * if we're trying to rename the file, oelem is the old name. - * - * Assumes caller has filemetalock'ed f. - */ -static int -filemetaflush(VacFile *f, char *oelem) -{ - int i, n; - MetaBlock mb; - MetaEntry me, me2; - VacFile *fp; - VtBlock *b; - u32int bo; - - if(!f->dirty) - return 0; - - if(oelem == nil) - oelem = f->dir.elem; - - /* - * Locate f's old metadata in the parent's metadata file. - * We know which block it was in, but not exactly where - * in the block. - */ - fp = f->up; - if(vtfilelock(fp->msource, -1) < 0) - return -1; - /* can happen if source is clri'ed out from under us */ - if(f->boff == NilBlock) - goto Err1; - b = vtfileblock(fp->msource, f->boff, VtORDWR); - if(b == nil) - goto Err1; - if(mbunpack(&mb, b->data, fp->msource->dsize) < 0) - goto Err; - if(mbsearch(&mb, oelem, &i, &me) < 0) - goto Err; - - /* - * Check whether we can resize the entry and keep it - * in this block. - */ - n = vdsize(&f->dir, VacDirVersion); - if(mbresize(&mb, &me, n) >= 0){ - /* Okay, can be done without moving to another block. */ - - /* Remove old data */ - mbdelete(&mb, i, &me); - - /* Find new location if renaming */ - if(strcmp(f->dir.elem, oelem) != 0) - mbsearch(&mb, f->dir.elem, &i, &me2); - - /* Pack new data into new location. */ - vdpack(&f->dir, &me, VacDirVersion); -vdunpack(&f->dir, &me); - mbinsert(&mb, i, &me); - mbpack(&mb); - - /* Done */ - vtblockput(b); - vtfileunlock(fp->msource); - f->dirty = 0; - return 0; - } - - /* - * The entry must be moved to another block. - * This can only really happen on renames that - * make the name very long. - */ - - /* Allocate a spot in a new block. */ - if((bo = filemetaalloc(fp, &f->dir, f->boff+1)) == NilBlock){ - /* mbresize above might have modified block */ - mbpack(&mb); - goto Err; - } - f->boff = bo; - - /* Now we're committed. Delete entry in old block. */ - mbdelete(&mb, i, &me); - mbpack(&mb); - vtblockput(b); - vtfileunlock(fp->msource); - - f->dirty = 0; - return 0; - -Err: - vtblockput(b); -Err1: - vtfileunlock(fp->msource); - return -1; -} - -/* - * Remove the directory entry for f. - */ -static int -filemetaremove(VacFile *f) -{ - VtBlock *b; - MetaBlock mb; - MetaEntry me; - int i; - VacFile *fp; - - b = nil; - fp = f->up; - filemetalock(f); - - if(vtfilelock(fp->msource, VtORDWR) < 0) - goto Err; - b = vtfileblock(fp->msource, f->boff, VtORDWR); - if(b == nil) - goto Err; - - if(mbunpack(&mb, b->data, fp->msource->dsize) < 0) - goto Err; - if(mbsearch(&mb, f->dir.elem, &i, &me) < 0) - goto Err; - mbdelete(&mb, i, &me); - mbpack(&mb); - vtblockput(b); - vtfileunlock(fp->msource); - - f->removed = 1; - f->boff = NilBlock; - f->dirty = 0; - - filemetaunlock(f); - return 0; - -Err: - vtfileunlock(fp->msource); - vtblockput(b); - filemetaunlock(f); - return -1; -} - -/* - * That was far too much effort for directory entries. - * Now we can write code that *does* things. - */ - -/* - * Flush all data associated with f out of the cache and onto venti. - * If recursive is set, flush f's children too. - * Vacfiledecref knows how to flush source and msource too. - */ -int -vacfileflush(VacFile *f, int recursive) -{ - int ret; - VacFile **kids, *p; - int i, nkids; - - if(f->mode == VtOREAD) - return 0; - - ret = 0; - filemetalock(f); - if(filemetaflush(f, nil) < 0) - ret = -1; - filemetaunlock(f); - - if(filelock(f) < 0) - return -1; - - /* - * Lock order prevents us from flushing kids while holding - * lock, so make a list and then flush without the lock. - */ - nkids = 0; - kids = nil; - if(recursive){ - nkids = 0; - for(p=f->down; p; p=p->next) - nkids++; - kids = vtmalloc(nkids*sizeof(VacFile*)); - i = 0; - for(p=f->down; p; p=p->next){ - kids[i++] = p; - p->ref++; - } - } - if(nkids > 0){ - fileunlock(f); - for(i=0; isource, -1); - if(vtfileflush(f->source) < 0) - ret = -1; - vtfileunlock(f->source); - if(f->msource){ - vtfilelock(f->msource, -1); - if(vtfileflush(f->msource) < 0) - ret = -1; - vtfileunlock(f->msource); - } - fileunlock(f); - - return ret; -} - -/* - * Create a new file named elem in fp with the given mode. - * The mode can be changed later except for the ModeDir bit. - */ -VacFile* -vacfilecreate(VacFile *fp, char *elem, ulong mode) -{ - VacFile *ff; - VacDir *dir; - VtFile *pr, *r, *mr; - int type; - u32int bo; - - if(filelock(fp) < 0) - return nil; - - /* - * First, look to see that there's not a file in memory - * with the same name. - */ - for(ff = fp->down; ff; ff=ff->next){ - if(strcmp(elem, ff->dir.elem) == 0 && !ff->removed){ - ff = nil; - werrstr(EExists); - goto Err1; - } - } - - /* - * Next check the venti blocks. - */ - ff = dirlookup(fp, elem); - if(ff != nil){ - werrstr(EExists); - goto Err1; - } - - /* - * By the way, you can't create in a read-only file system. - */ - pr = fp->source; - if(pr->mode != VtORDWR){ - werrstr(EReadOnly); - goto Err1; - } - - /* - * Okay, time to actually create something. Lock the two - * halves of the directory and create a file. - */ - if(vtfilelock2(fp->source, fp->msource, -1) < 0) - goto Err1; - ff = filealloc(fp->fs); - ff->qidoffset = fp->qidoffset; /* hopefully fp->qidoffset == 0 */ - type = VtDataType; - if(mode & ModeDir) - type = VtDirType; - mr = nil; - if((r = vtfilecreate(pr, pr->psize, pr->dsize, type)) == nil) - goto Err; - if(mode & ModeDir) - if((mr = vtfilecreate(pr, pr->psize, pr->dsize, VtDataType)) == nil) - goto Err; - - /* - * Fill in the directory entry and write it to disk. - */ - dir = &ff->dir; - dir->elem = vtstrdup(elem); - dir->entry = r->offset; - dir->gen = r->gen; - if(mode & ModeDir){ - dir->mentry = mr->offset; - dir->mgen = mr->gen; - } - dir->size = 0; - if(_vacfsnextqid(fp->fs, &dir->qid) < 0) - goto Err; - dir->uid = vtstrdup(fp->dir.uid); - dir->gid = vtstrdup(fp->dir.gid); - dir->mid = vtstrdup(""); - dir->mtime = time(0L); - dir->mcount = 0; - dir->ctime = dir->mtime; - dir->atime = dir->mtime; - dir->mode = mode; - if((bo = filemetaalloc(fp, &ff->dir, NilBlock)) == NilBlock) - goto Err; - - /* - * Now we're committed. - */ - vtfileunlock(fp->source); - vtfileunlock(fp->msource); - ff->source = r; - ff->msource = mr; - ff->boff = bo; - - /* Link into tree. */ - ff->next = fp->down; - fp->down = ff; - ff->up = fp; - vacfileincref(fp); - - fileunlock(fp); - - filelock(ff); - vtfilelock(ff->source, -1); - vtfileunlock(ff->source); - fileunlock(ff); - - return ff; - -Err: - vtfileunlock(fp->source); - vtfileunlock(fp->msource); - if(r){ - vtfilelock(r, -1); - vtfileremove(r); - } - if(mr){ - vtfilelock(mr, -1); - vtfileremove(mr); - } -Err1: - if(ff) - vacfiledecref(ff); - fileunlock(fp); - return nil; -} - -/* - * Change the size of the file f. - */ -int -vacfilesetsize(VacFile *f, uvlong size) -{ - if(vacfileisdir(f)){ - werrstr(ENotFile); - return -1; - } - - if(filelock(f) < 0) - return -1; - - if(f->source->mode != VtORDWR){ - werrstr(EReadOnly); - goto Err; - } - if(vtfilelock(f->source, -1) < 0) - goto Err; - if(vtfilesetsize(f->source, size) < 0){ - vtfileunlock(f->source); - goto Err; - } - vtfileunlock(f->source); - fileunlock(f); - return 0; - -Err: - fileunlock(f); - return -1; -} - -/* - * Write data to f. - */ -int -vacfilewrite(VacFile *f, void *buf, int cnt, vlong offset) -{ - if(vacfileisdir(f)){ - werrstr(ENotFile); - return -1; - } - if(filelock(f) < 0) - return -1; - if(f->source->mode != VtORDWR){ - werrstr(EReadOnly); - goto Err; - } - if(offset < 0){ - werrstr(EBadOffset); - goto Err; - } - - if(vtfilelock(f->source, -1) < 0) - goto Err; - if(f->dir.mode & ModeAppend) - offset = vtfilegetsize(f->source); - if(vtfilewrite(f->source, buf, cnt, offset) != cnt - || vtfileflushbefore(f->source, offset) < 0){ - vtfileunlock(f->source); - goto Err; - } - vtfileunlock(f->source); - fileunlock(f); - return cnt; - -Err: - fileunlock(f); - return -1; -} - -/* - * Set (!) the VtEntry for the data contained in f. - * This let's us efficiently copy data from one file to another. - */ -int -vacfilesetentries(VacFile *f, VtEntry *e, VtEntry *me) -{ - int ret; - - vacfileflush(f, 0); /* flush blocks to venti, since we won't see them again */ - - if(!(e->flags&VtEntryActive)){ - werrstr("missing entry for source"); - return -1; - } - if(me && !(me->flags&VtEntryActive)) - me = nil; - if(f->msource && !me){ - werrstr("missing entry for msource"); - return -1; - } - if(me && !f->msource){ - werrstr("no msource to set"); - return -1; - } - - if(filelock(f) < 0) - return -1; - if(f->source->mode != VtORDWR - || (f->msource && f->msource->mode != VtORDWR)){ - werrstr(EReadOnly); - fileunlock(f); - return -1; - } - if(vtfilelock2(f->source, f->msource, -1) < 0){ - fileunlock(f); - return -1; - } - ret = 0; - if(vtfilesetentry(f->source, e) < 0) - ret = -1; - else if(me && vtfilesetentry(f->msource, me) < 0) - ret = -1; - - vtfileunlock(f->source); - if(f->msource) - vtfileunlock(f->msource); - fileunlock(f); - return ret; -} - -/* - * Get the directory entry for f. - */ -int -vacfilegetdir(VacFile *f, VacDir *dir) -{ - if(filerlock(f) < 0) - return -1; - - filemetalock(f); - vdcopy(dir, &f->dir); - filemetaunlock(f); - - if(!vacfileisdir(f)){ - if(vtfilelock(f->source, VtOREAD) < 0){ - filerunlock(f); - return -1; - } - dir->size = vtfilegetsize(f->source); - vtfileunlock(f->source); - } - filerunlock(f); - - return 0; -} - -/* - * Set the directory entry for f. - */ -int -vacfilesetdir(VacFile *f, VacDir *dir) -{ - VacFile *ff; - char *oelem; - u32int mask; - u64int size; - - /* can not set permissions for the root */ - if(vacfileisroot(f)){ - werrstr(ERoot); - return -1; - } - - if(filelock(f) < 0) - return -1; - filemetalock(f); - - if(f->source->mode != VtORDWR){ - werrstr(EReadOnly); - goto Err; - } - - /* On rename, check new name does not already exist */ - if(strcmp(f->dir.elem, dir->elem) != 0){ - for(ff = f->up->down; ff; ff=ff->next){ - if(strcmp(dir->elem, ff->dir.elem) == 0 && !ff->removed){ - werrstr(EExists); - goto Err; - } - } - ff = dirlookup(f->up, dir->elem); - if(ff != nil){ - vacfiledecref(ff); - werrstr(EExists); - goto Err; - } - werrstr(""); /* "failed" dirlookup poisoned it */ - } - - /* Get ready... */ - if(vtfilelock2(f->source, f->msource, -1) < 0) - goto Err; - if(!vacfileisdir(f)){ - size = vtfilegetsize(f->source); - if(size != dir->size){ - if(vtfilesetsize(f->source, dir->size) < 0){ - vtfileunlock(f->source); - if(f->msource) - vtfileunlock(f->msource); - goto Err; - } - } - } - /* ... now commited to changing it. */ - vtfileunlock(f->source); - if(f->msource) - vtfileunlock(f->msource); - - oelem = nil; - if(strcmp(f->dir.elem, dir->elem) != 0){ - oelem = f->dir.elem; - f->dir.elem = vtstrdup(dir->elem); - } - - if(strcmp(f->dir.uid, dir->uid) != 0){ - vtfree(f->dir.uid); - f->dir.uid = vtstrdup(dir->uid); - } - - if(strcmp(f->dir.gid, dir->gid) != 0){ - vtfree(f->dir.gid); - f->dir.gid = vtstrdup(dir->gid); - } - - f->dir.mtime = dir->mtime; - f->dir.atime = dir->atime; - - mask = ~(ModeDir|ModeSnapshot); - f->dir.mode &= ~mask; - f->dir.mode |= mask & dir->mode; - f->dirty = 1; - - if(filemetaflush(f, oelem) < 0){ - vtfree(oelem); - goto Err; /* that sucks */ - } - vtfree(oelem); - - filemetaunlock(f); - fileunlock(f); - return 0; - -Err: - filemetaunlock(f); - fileunlock(f); - return -1; -} - -/* - * Set the qid space. - */ -int -vacfilesetqidspace(VacFile *f, u64int offset, u64int max) -{ - int ret; - - if(filelock(f) < 0) - return -1; - if(f->source->mode != VtORDWR){ - fileunlock(f); - werrstr(EReadOnly); - return -1; - } - filemetalock(f); - f->dir.qidspace = 1; - f->dir.qidoffset = offset; - f->dir.qidmax = max; - f->dirty = 1; - ret = filemetaflush(f, nil); - filemetaunlock(f); - fileunlock(f); - return ret; -} - -/* - * Check that the file is empty, returning 0 if it is. - * Returns -1 on error (and not being empty is an error). - */ -static int -filecheckempty(VacFile *f) -{ - u32int i, n; - VtBlock *b; - MetaBlock mb; - VtFile *r; - - r = f->msource; - n = (vtfilegetsize(r)+r->dsize-1)/r->dsize; - for(i=0; idata, r->dsize) < 0) - goto Err; - if(mb.nindex > 0){ - werrstr(ENotEmpty); - goto Err; - } - vtblockput(b); - } - return 0; - -Err: - vtblockput(b); - return -1; -} - -/* - * Remove the vac file f. - */ -int -vacfileremove(VacFile *f) -{ - VacFile *ff; - - /* Cannot remove the root */ - if(vacfileisroot(f)){ - werrstr(ERoot); - return -1; - } - - if(filelock(f) < 0) - return -1; - if(f->source->mode != VtORDWR){ - werrstr(EReadOnly); - goto Err1; - } - if(vtfilelock2(f->source, f->msource, -1) < 0) - goto Err1; - if(vacfileisdir(f) && filecheckempty(f)<0) - goto Err; - - for(ff=f->down; ff; ff=ff->next) - assert(ff->removed); - - vtfileremove(f->source); - f->source = nil; - if(f->msource){ - vtfileremove(f->msource); - f->msource = nil; - } - fileunlock(f); - - if(filemetaremove(f) < 0) - return -1; - return 0; - -Err: - vtfileunlock(f->source); - if(f->msource) - vtfileunlock(f->msource); -Err1: - fileunlock(f); - return -1; -} - -/* - * Vac file system format. - */ -static char EBadVacFormat[] = "bad format for vac file"; - -static VacFs * -vacfsalloc(VtConn *z, int bsize, int ncache, int mode) -{ - VacFs *fs; - - fs = vtmallocz(sizeof(VacFs)); - fs->z = z; - fs->bsize = bsize; - fs->mode = mode; - fs->cache = vtcachealloc(z, bsize, ncache); - return fs; -} - -static int -readscore(int fd, uchar score[VtScoreSize]) -{ - char buf[45], *pref; - int n; - - n = readn(fd, buf, sizeof(buf)-1); - if(n < sizeof(buf)-1) { - werrstr("short read"); - return -1; - } - buf[n] = 0; - - if(vtparsescore(buf, &pref, score) < 0){ - werrstr(EBadVacFormat); - return -1; - } - if(pref==nil || strcmp(pref, "vac") != 0) { - werrstr("not a vac file"); - return -1; - } - return 0; -} - -VacFs* -vacfsopen(VtConn *z, char *file, int mode, int ncache) -{ - int fd; - uchar score[VtScoreSize]; - char *prefix; - - if(vtparsescore(file, &prefix, score) >= 0){ - if(strcmp(prefix, "vac") != 0){ - werrstr("not a vac file"); - return nil; - } - }else{ - fd = open(file, OREAD); - if(fd < 0) - return nil; - if(readscore(fd, score) < 0){ - close(fd); - return nil; - } - close(fd); - } - return vacfsopenscore(z, score, mode, ncache); -} - -VacFs* -vacfsopenscore(VtConn *z, u8int *score, int mode, int ncache) -{ - VacFs *fs; - int n; - VtRoot rt; - uchar buf[VtRootSize]; - VacFile *root; - VtFile *r; - VtEntry e; - - n = vtread(z, score, VtRootType, buf, VtRootSize); - if(n < 0) - return nil; - if(n != VtRootSize){ - werrstr("vtread on root too short"); - return nil; - } - - if(vtrootunpack(&rt, buf) < 0) - return nil; - - if(strcmp(rt.type, "vac") != 0) { - werrstr("not a vac root"); - return nil; - } - - fs = vacfsalloc(z, rt.blocksize, ncache, mode); - memmove(fs->score, score, VtScoreSize); - fs->mode = mode; - - memmove(e.score, rt.score, VtScoreSize); - e.gen = 0; - e.psize = rt.blocksize; - e.dsize = rt.blocksize; - e.type = VtDirType; - e.flags = VtEntryActive; - e.size = 3*VtEntrySize; - - root = nil; - if((r = vtfileopenroot(fs->cache, &e)) == nil) - goto Err; - if(debug) - fprint(2, "r %p\n", r); - root = _vacfileroot(fs, r); - if(debug) - fprint(2, "root %p\n", root); - vtfileclose(r); - if(root == nil) - goto Err; - fs->root = root; - return fs; -Err: - if(root) - vacfiledecref(root); - vacfsclose(fs); - return nil; -} - -int -vacfsmode(VacFs *fs) -{ - return fs->mode; -} - -VacFile* -vacfsgetroot(VacFs *fs) -{ - return vacfileincref(fs->root); -} - -int -vacfsgetblocksize(VacFs *fs) -{ - return fs->bsize; -} - -int -vacfsgetscore(VacFs *fs, u8int *score) -{ - memmove(score, fs->score, VtScoreSize); - return 0; -} - -int -_vacfsnextqid(VacFs *fs, uvlong *qid) -{ - ++fs->qid; - *qid = fs->qid; - return 0; -} - -void -vacfsjumpqid(VacFs *fs, uvlong step) -{ - fs->qid += step; -} - -/* - * Set *maxqid to the maximum qid expected in this file system. - * In newer vac archives, the maximum qid is stored in the - * qidspace VacDir annotation. In older vac archives, the root - * got created last, so it had the maximum qid. - */ -int -vacfsgetmaxqid(VacFs *fs, uvlong *maxqid) -{ - VacDir vd; - - if(vacfilegetdir(fs->root, &vd) < 0) - return -1; - if(vd.qidspace) - *maxqid = vd.qidmax; - else - *maxqid = vd.qid; - vdcleanup(&vd); - return 0; -} - - -void -vacfsclose(VacFs *fs) -{ - if(fs->root) - vacfiledecref(fs->root); - fs->root = nil; - vtcachefree(fs->cache); - vtfree(fs); -} - -/* - * Create a fresh vac fs. - */ -VacFs * -vacfscreate(VtConn *z, int bsize, int ncache) -{ - VacFs *fs; - VtFile *f; - uchar buf[VtEntrySize], metascore[VtScoreSize]; - VtEntry e; - VtBlock *b; - MetaBlock mb; - VacDir vd; - MetaEntry me; - int psize; - int mbsize; - - if((fs = vacfsalloc(z, bsize, ncache, VtORDWR)) == nil) - return nil; - - /* - * Fake up an empty vac fs. - */ - psize = bsize; - f = vtfilecreateroot(fs->cache, psize, bsize, VtDirType); - vtfilelock(f, VtORDWR); - - /* Metablocks can't be too big -- they have 16-bit offsets in them. */ - mbsize = bsize; - if(mbsize >= 56*1024) - mbsize = 56*1024; - - /* Write metablock containing root directory VacDir. */ - b = vtcacheallocblock(fs->cache, VtDataType); - mbinit(&mb, b->data, mbsize, mbsize/BytesPerEntry); - memset(&vd, 0, sizeof vd); - vd.elem = "/"; - vd.mode = 0777|ModeDir; - vd.uid = "vac"; - vd.gid = "vac"; - vd.mid = ""; - me.size = vdsize(&vd, VacDirVersion); - me.p = mballoc(&mb, me.size); - vdpack(&vd, &me, VacDirVersion); - mbinsert(&mb, 0, &me); - mbpack(&mb); - vtblockwrite(b); - memmove(metascore, b->score, VtScoreSize); - vtblockput(b); - - /* First entry: empty venti directory stream. */ - memset(&e, 0, sizeof e); - e.flags = VtEntryActive; - e.psize = psize; - e.dsize = bsize; - e.type = VtDirType; - memmove(e.score, vtzeroscore, VtScoreSize); - vtentrypack(&e, buf, 0); - vtfilewrite(f, buf, VtEntrySize, 0); - - /* Second entry: empty metadata stream. */ - e.type = VtDataType; - e.dsize = mbsize; - vtentrypack(&e, buf, 0); - vtfilewrite(f, buf, VtEntrySize, VtEntrySize); - - /* Third entry: metadata stream with root directory. */ - memmove(e.score, metascore, VtScoreSize); - e.size = mbsize; - vtentrypack(&e, buf, 0); - vtfilewrite(f, buf, VtEntrySize, VtEntrySize*2); - - vtfileflush(f); - vtfileunlock(f); - - /* Now open it as a vac fs. */ - fs->root = _vacfileroot(fs, f); - if(fs->root == nil){ - werrstr("vacfileroot: %r"); - vacfsclose(fs); - return nil; - } - - return fs; -} - -int -vacfssync(VacFs *fs) -{ - uchar buf[1024]; - VtEntry e; - VtFile *f; - VtRoot root; - - /* Sync the entire vacfs to disk. */ - if(vacfileflush(fs->root, 1) < 0) - return -1; - if(vtfilelock(fs->root->up->msource, -1) < 0) - return -1; - if(vtfileflush(fs->root->up->msource) < 0){ - vtfileunlock(fs->root->up->msource); - return -1; - } - vtfileunlock(fs->root->up->msource); - - /* Prepare the dir stream for the root block. */ - if(getentry(fs->root->source, &e) < 0) - return -1; - vtentrypack(&e, buf, 0); - if(getentry(fs->root->msource, &e) < 0) - return -1; - vtentrypack(&e, buf, 1); - if(getentry(fs->root->up->msource, &e) < 0) - return -1; - vtentrypack(&e, buf, 2); - - f = vtfilecreateroot(fs->cache, fs->bsize, fs->bsize, VtDirType); - vtfilelock(f, VtORDWR); - if(vtfilewrite(f, buf, 3*VtEntrySize, 0) < 0 - || vtfileflush(f) < 0){ - vtfileunlock(f); - vtfileclose(f); - return -1; - } - vtfileunlock(f); - if(getentry(f, &e) < 0){ - vtfileclose(f); - return -1; - } - vtfileclose(f); - - /* Build a root block. */ - memset(&root, 0, sizeof root); - strcpy(root.type, "vac"); - strcpy(root.name, fs->name); - memmove(root.score, e.score, VtScoreSize); - root.blocksize = fs->bsize; - memmove(root.prev, fs->score, VtScoreSize); - vtrootpack(&root, buf); - if(vtwrite(fs->z, fs->score, VtRootType, buf, VtRootSize) < 0){ - werrstr("writing root: %r"); - return -1; - } - if(vtsync(fs->z) < 0) - return -1; - return 0; -} - -int -vacfiledsize(VacFile *f) -{ - VtEntry e; - - if(vacfilegetentries(f,&e,nil) < 0) - return -1; - return e.dsize; -} - -/* - * Does block b of f have the same SHA1 hash as the n bytes at buf? - */ -int -sha1matches(VacFile *f, ulong b, uchar *buf, int n) -{ - uchar fscore[VtScoreSize]; - uchar bufscore[VtScoreSize]; - - if(vacfileblockscore(f, b, fscore) < 0) - return 0; - n = vtzerotruncate(VtDataType, buf, n); - sha1(buf, n, bufscore, nil); - if(memcmp(bufscore, fscore, VtScoreSize) == 0) - return 1; - return 0; -} - diff -r 9c93e936f28e -r e1f6266f79c1 sys/src/cmd/vtvacfs/fns.h --- a/sys/src/cmd/vtvacfs/fns.h Thu Sep 29 20:05:50 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ -int mbunpack(MetaBlock *mb, uchar *p, int n); -void mbinsert(MetaBlock *mb, int i, MetaEntry*); -void mbdelete(MetaBlock *mb, int i, MetaEntry*); -void mbpack(MetaBlock *mb); -uchar *mballoc(MetaBlock *mb, int n); -void mbinit(MetaBlock *mb, uchar *p, int n, int entries); -int mbsearch(MetaBlock*, char*, int*, MetaEntry*); -int mbresize(MetaBlock*, MetaEntry*, int); - -int meunpack(MetaEntry*, MetaBlock *mb, int i); -int mecmp(MetaEntry*, char *s); -int mecmpnew(MetaEntry*, char *s); - -enum { - VacDirVersion = 8, - FossilDirVersion = 9, -}; -int vdsize(VacDir *dir, int); -int vdunpack(VacDir *dir, MetaEntry*); -void vdpack(VacDir *dir, MetaEntry*, int); - -VacFile *_vacfileroot(VacFs *fs, VtFile *file); - -int _vacfsnextqid(VacFs *fs, uvlong *qid); -void vacfsjumpqid(VacFs*, uvlong step); - -Reprog* glob2regexp(char*); -void loadexcludefile(char*); -int includefile(char*); -void excludepattern(char*); - -/* mmventi */ -int getdata(u8int *score, u8int *data, u8int len, u8int blocktype); -int putdata(u8int *score, u8int *data, int len, uchar blocktype); diff -r 9c93e936f28e -r e1f6266f79c1 sys/src/cmd/vtvacfs/mkfile --- a/sys/src/cmd/vtvacfs/mkfile Thu Sep 29 20:05:50 2011 +0200 +++ b/sys/src/cmd/vtvacfs/mkfile Fri Sep 30 15:24:12 2011 -0700 @@ -1,19 +1,37 @@ = 9 */ - DirNTEntry, /* not valid in version >= 9 */ - DirQidSpaceEntry, - DirGenEntry /* not valid in version >= 9 */ -}; - -struct VacDir -{ - char *elem; /* path element */ - ulong entry; /* entry in directory for data */ - ulong gen; /* generation of data entry */ - ulong mentry; /* entry in directory for meta */ - ulong mgen; /* generation of meta entry */ - uvlong size; /* size of file */ - uvlong qid; /* unique file id */ - - char *uid; /* owner id */ - char *gid; /* group id */ - char *mid; /* last modified by */ - ulong mtime; /* last modified time */ - ulong mcount; /* number of modifications: can wrap! */ - ulong ctime; /* directory entry last changed */ - ulong atime; /* last time accessed */ - ulong mode; /* various mode bits */ - - /* plan 9 */ - int plan9; - uvlong p9path; - ulong p9version; - - /* sub space of qid */ - int qidspace; - uvlong qidoffset; /* qid offset */ - uvlong qidmax; /* qid maximum */ -}; - -struct VacFs -{ - char name[128]; - uchar score[VtScoreSize]; - VacFile *root; - VtConn *z; - int mode; - int bsize; - uvlong qid; - VtCache *cache; -}; - -VacFs *vacfsopen(VtConn *z, char *file, int mode, int ncache); -VacFs *vacfsopenscore(VtConn *z, u8int *score, int mode, int ncache); -VacFs *vacfscreate(VtConn *z, int bsize, int ncache); -void vacfsclose(VacFs *fs); -int vacfssync(VacFs *fs); -int vacfssnapshot(VacFs *fs, char *src, char *dst); -int vacfsgetscore(VacFs *fs, u8int *score); -int vacfsgetmaxqid(VacFs*, uvlong*); -void vacfsjumpqid(VacFs*, uvlong); - -VacFile *vacfsgetroot(VacFs *fs); -VacFile *vacfileopen(VacFs *fs, char *path); -VacFile *vacfilecreate(VacFile *file, char *elem, ulong perm); -VacFile *vacfilewalk(VacFile *file, char *elem); -int vacfileremove(VacFile *file); -int vacfileread(VacFile *file, void *buf, int n, vlong offset); -int vacfileblockscore(VacFile *file, u32int, u8int*); -int vacfilewrite(VacFile *file, void *buf, int n, vlong offset); -uvlong vacfilegetid(VacFile *file); -ulong vacfilegetmcount(VacFile *file); -int vacfileisdir(VacFile *file); -int vacfileisroot(VacFile *file); -ulong vacfilegetmode(VacFile *file); -int vacfilegetsize(VacFile *file, uvlong *size); -int vacfilegetdir(VacFile *file, VacDir *dir); -int vacfilesetdir(VacFile *file, VacDir *dir); -VacFile *vacfilegetparent(VacFile *file); -int vacfileflush(VacFile*, int); -VacFile *vacfileincref(VacFile*); -int vacfiledecref(VacFile*); -int vacfilesetsize(VacFile *f, uvlong size); - -int vacfilegetentries(VacFile *f, VtEntry *e, VtEntry *me); -int vacfilesetentries(VacFile *f, VtEntry *e, VtEntry *me); - -void vdcleanup(VacDir *dir); -void vdcopy(VacDir *dst, VacDir *src); -int vacfilesetqidspace(VacFile*, u64int, u64int); -uvlong vacfilegetqidoffset(VacFile*); - -VacDirEnum *vdeopen(VacFile*); -int vderead(VacDirEnum*, VacDir *); -void vdeclose(VacDirEnum*); -int vdeunread(VacDirEnum*); - -int vacfiledsize(VacFile *f); -int sha1matches(VacFile *f, ulong b, uchar *buf, int n); - diff -r 9c93e936f28e -r e1f6266f79c1 sys/src/cmd/vtvacfs/vacfile.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/vtvacfs/vacfile.c Fri Sep 30 15:24:12 2011 -0700 @@ -0,0 +1,2090 @@ +#include "/sys/src/cmd/vac/stdinc.h" +#include "/sys/src/cmd/vac/vac.h" +#include "/sys/src/cmd/vac/dat.h" +#include "/sys/src/cmd/vac/fns.h" +#include "/sys/src/cmd/vac/error.h" + +#define debug 0 + +/* + * Vac file system. This is a simplified version of the same code in Fossil. + * + * The locking order in the tree is upward: a thread can hold the lock + * for a VacFile and then acquire the lock of f->up (the parent), + * but not vice-versa. + * + * A vac file is one or two venti files. Plain data files are one venti file, + * while directores are two: a venti data file containing traditional + * directory entries, and a venti directory file containing venti + * directory entries. The traditional directory entries in the data file + * contain integers indexing into the venti directory entry file. + * It's a little complicated, but it makes the data usable by standard + * tools like venti/copy. + * + */ + +static int filemetaflush(VacFile*, char*); +int mmvtwrite(uchar score[VtScoreSize], uint type, uchar *buf, int n); +int mmvtread(uchar score[VtScoreSize], uint type, uchar *buf, int n); + +struct VacFile +{ + VacFs *fs; /* immutable */ + + /* meta data for file: protected by the lk in the parent */ + int ref; /* holds this data structure up */ + + int partial; /* file was never really open */ + int removed; /* file has been removed */ + int dirty; /* dir is dirty with respect to meta data in block */ + u32int boff; /* block offset within msource for this file's metadata */ + VacDir dir; /* metadata for this file */ + VacFile *up; /* parent file */ + VacFile *next; /* sibling */ + + RWLock lk; /* lock for the following */ + VtFile *source; /* actual data */ + VtFile *msource; /* metadata for children in a directory */ + VacFile *down; /* children */ + int mode; + + uvlong qidoffset; /* qid offset */ +}; + +static VacFile* +filealloc(VacFs *fs) +{ + VacFile *f; + + f = vtmallocz(sizeof(VacFile)); + f->ref = 1; + f->fs = fs; + f->boff = NilBlock; + f->mode = fs->mode; + return f; +} + +static void +filefree(VacFile *f) +{ + vtfileclose(f->source); + vtfileclose(f->msource); + vdcleanup(&f->dir); + memset(f, ~0, sizeof *f); /* paranoia */ + vtfree(f); +} + +static int +chksource(VacFile *f) +{ + if(f->partial) + return 0; + + if(f->source == nil + || ((f->dir.mode & ModeDir) && f->msource == nil)){ + werrstr(ERemoved); + return -1; + } + return 0; +} + +static int +filelock(VacFile *f) +{ + wlock(&f->lk); + if(chksource(f) < 0){ + wunlock(&f->lk); + return -1; + } + return 0; +} + +static void +fileunlock(VacFile *f) +{ + wunlock(&f->lk); +} + +static int +filerlock(VacFile *f) +{ + rlock(&f->lk); + if(chksource(f) < 0){ + runlock(&f->lk); + return -1; + } + return 0; +} + +static void +filerunlock(VacFile *f) +{ + runlock(&f->lk); +} + +/* + * The file metadata, like f->dir and f->ref, + * are synchronized via the parent's lock. + * This is why locking order goes up. + */ +static void +filemetalock(VacFile *f) +{ + assert(f->up != nil); + wlock(&f->up->lk); +} + +static void +filemetaunlock(VacFile *f) +{ + wunlock(&f->up->lk); +} + +uvlong +vacfilegetid(VacFile *f) +{ + /* immutable */ + return f->qidoffset + f->dir.qid; +} + +uvlong +vacfilegetqidoffset(VacFile *f) +{ + return f->qidoffset; +} + +ulong +vacfilegetmcount(VacFile *f) +{ + ulong mcount; + + filemetalock(f); + mcount = f->dir.mcount; + filemetaunlock(f); + return mcount; +} + +ulong +vacfilegetmode(VacFile *f) +{ + ulong mode; + + filemetalock(f); + mode = f->dir.mode; + filemetaunlock(f); + return mode; +} + +int +vacfileisdir(VacFile *f) +{ + /* immutable */ + return (f->dir.mode & ModeDir) != 0; +} + +int +vacfileisroot(VacFile *f) +{ + return f == f->fs->root; +} + +/* + * The files are reference counted, and while the reference + * is bigger than zero, each file can be found in its parent's + * f->down list (chains via f->next), so that multiple threads + * end up sharing a VacFile* when referring to the same file. + * + * Each VacFile holds a reference to its parent. + */ +VacFile* +vacfileincref(VacFile *vf) +{ + filemetalock(vf); + assert(vf->ref > 0); + vf->ref++; + filemetaunlock(vf); + return vf; +} + +int +vacfiledecref(VacFile *f) +{ + VacFile *p, *q, **qq; + + if(f->up == nil){ + /* never linked in */ + assert(f->ref == 1); + filefree(f); + return 0; + } + + filemetalock(f); + f->ref--; + if(f->ref > 0){ + filemetaunlock(f); + return -1; + } + assert(f->ref == 0); + assert(f->down == nil); + + if(f->source && vtfilelock(f->source, -1) >= 0){ + vtfileflush(f->source); + vtfileunlock(f->source); + } + if(f->msource && vtfilelock(f->msource, -1) >= 0){ + vtfileflush(f->msource); + vtfileunlock(f->msource); + } + + /* + * Flush f's directory information to the cache. + */ + filemetaflush(f, nil); + + p = f->up; + qq = &p->down; + for(q = *qq; q; q = *qq){ + if(q == f) + break; + qq = &q->next; + } + assert(q != nil); + *qq = f->next; + + filemetaunlock(f); + filefree(f); + vacfiledecref(p); + return 0; +} + + +/* + * Construct a vacfile for the root of a vac tree, given the + * venti file for the root information. That venti file is a + * directory file containing VtEntries for three more venti files: + * the two venti files making up the root directory, and a + * third venti file that would be the metadata half of the + * "root's parent". + * + * Fossil generates slightly different vac files, due to a now + * impossible-to-change bug, which contain a VtEntry + * for just one venti file, that itself contains the expected + * three directory entries. Sigh. + */ +VacFile* +_vacfileroot(VacFs *fs, VtFile *r) +{ + int redirected; + char err[ERRMAX]; + VtBlock *b; + VtFile *r0, *r1, *r2; + MetaBlock mb; + MetaEntry me; + VacFile *root, *mr; + + redirected = 0; +Top: + b = nil; + root = nil; + mr = nil; + r1 = nil; + r2 = nil; + + if(vtfilelock(r, -1) < 0) + return nil; + r0 = vtfileopen(r, 0, fs->mode); + if(debug) + fprint(2, "r0 %p\n", r0); + if(r0 == nil) + goto Err; + r2 = vtfileopen(r, 2, fs->mode); + if(debug) + fprint(2, "r2 %p\n", r2); + if(r2 == nil){ + /* + * some vac files (e.g., from fossil) + * have an extra layer of indirection. + */ + rerrstr(err, sizeof err); + if(!redirected && strstr(err, "not active")){ + redirected = 1; + vtfileunlock(r); + r = r0; + goto Top; + } + goto Err; + } + r1 = vtfileopen(r, 1, fs->mode); + if(debug) + fprint(2, "r1 %p\n", r1); + if(r1 == nil) + goto Err; + + mr = filealloc(fs); + mr->msource = r2; + r2 = nil; + + root = filealloc(fs); + root->boff = 0; + root->up = mr; + root->source = r0; + r0 = nil; + root->msource = r1; + r1 = nil; + + mr->down = root; + vtfileunlock(r); + + if(vtfilelock(mr->msource, VtOREAD) < 0) + goto Err1; + b = vtfileblock(mr->msource, 0, VtOREAD); + vtfileunlock(mr->msource); + if(b == nil) + goto Err1; + + if(mbunpack(&mb, b->data, mr->msource->dsize) < 0) + goto Err1; + + meunpack(&me, &mb, 0); + if(vdunpack(&root->dir, &me) < 0) + goto Err1; + vtblockput(b); + + return root; +Err: + vtfileunlock(r); +Err1: + vtblockput(b); + if(r0) + vtfileclose(r0); + if(r1) + vtfileclose(r1); + if(r2) + vtfileclose(r2); + if(mr) + filefree(mr); + if(root) + filefree(root); + + return nil; +} + +/* + * Vac directories are a sequence of metablocks, each of which + * contains a bunch of metaentries sorted by file name. + * The whole sequence isn't sorted, though, so you still have + * to look at every block to find a given name. + * Dirlookup looks in f for an element name elem. + * It returns a new VacFile with the dir, boff, and mode + * filled in, but the sources (venti files) are not, and f is + * not yet linked into the tree. These details must be taken + * care of by the caller. + * + * f must be locked, f->msource must not. + */ +static VacFile* +dirlookup(VacFile *f, char *elem) +{ + int i; + MetaBlock mb; + MetaEntry me; + VtBlock *b; + VtFile *meta; + VacFile *ff; + u32int bo, nb; + + meta = f->msource; + b = nil; + if(vtfilelock(meta, -1) < 0) + return nil; + nb = (vtfilegetsize(meta)+meta->dsize-1)/meta->dsize; + for(bo=0; bodata, meta->dsize) < 0) + goto Err; + if(mbsearch(&mb, elem, &i, &me) >= 0){ + ff = filealloc(f->fs); + if(vdunpack(&ff->dir, &me) < 0){ + filefree(ff); + goto Err; + } + ff->qidoffset = f->qidoffset + ff->dir.qidoffset; + vtfileunlock(meta); + vtblockput(b); + ff->boff = bo; + ff->mode = f->mode; + return ff; + } + vtblockput(b); + b = nil; + } + werrstr(ENoFile); + /* fall through */ +Err: + vtfileunlock(meta); + vtblockput(b); + return nil; +} + +/* + * Open the venti file at offset in the directory f->source. + * f is locked. + */ +static VtFile * +fileopensource(VacFile *f, u32int offset, u32int gen, int dir, uint mode) +{ + VtFile *r; + + if((r = vtfileopen(f->source, offset, mode)) == nil) + return nil; + if(r == nil) + return nil; + if(r->gen != gen){ + werrstr(ERemoved); + vtfileclose(r); + return nil; + } + if(r->dir != dir && r->mode != -1){ + werrstr(EBadMeta); + vtfileclose(r); + return nil; + } + return r; +} + +VacFile* +vacfilegetparent(VacFile *f) +{ + if(vacfileisroot(f)) + return vacfileincref(f); + return vacfileincref(f->up); +} + +/* + * Given an unlocked vacfile (directory) f, + * return the vacfile named elem in f. + * Interprets . and .. as a convenience to callers. + */ +VacFile* +vacfilewalk(VacFile *f, char *elem) +{ + VacFile *ff; + + if(elem[0] == 0){ + werrstr(EBadPath); + return nil; + } + + if(!vacfileisdir(f)){ + werrstr(ENotDir); + return nil; + } + + if(strcmp(elem, ".") == 0) + return vacfileincref(f); + + if(strcmp(elem, "..") == 0) + return vacfilegetparent(f); + + if(filelock(f) < 0) + return nil; + + for(ff = f->down; ff; ff=ff->next){ + if(strcmp(elem, ff->dir.elem) == 0 && !ff->removed){ + ff->ref++; + goto Exit; + } + } + + ff = dirlookup(f, elem); + if(ff == nil) + goto Err; + + if(ff->dir.mode & ModeSnapshot) + ff->mode = VtOREAD; + + if(vtfilelock(f->source, f->mode) < 0) + goto Err; + if(ff->dir.mode & ModeDir){ + ff->source = fileopensource(f, ff->dir.entry, ff->dir.gen, 1, ff->mode); + ff->msource = fileopensource(f, ff->dir.mentry, ff->dir.mgen, 0, ff->mode); + if(ff->source == nil || ff->msource == nil) + goto Err1; + }else{ + ff->source = fileopensource(f, ff->dir.entry, ff->dir.gen, 0, ff->mode); + if(ff->source == nil) + goto Err1; + } + vtfileunlock(f->source); + + /* link in and up parent ref count */ + ff->next = f->down; + f->down = ff; + ff->up = f; + vacfileincref(f); +Exit: + fileunlock(f); + return ff; + +Err1: + vtfileunlock(f->source); +Err: + fileunlock(f); + if(ff != nil) + vacfiledecref(ff); + return nil; +} + +/* + * Open a path in the vac file system: + * just walk each element one at a time. + */ +VacFile* +vacfileopen(VacFs *fs, char *path) +{ + VacFile *f, *ff; + char *p, elem[VtMaxStringSize], *opath; + int n; + + f = fs->root; + vacfileincref(f); + opath = path; + while(*path != 0){ + for(p = path; *p && *p != '/'; p++) + ; + n = p - path; + if(n > 0){ + if(n > VtMaxStringSize){ + werrstr("%s: element too long", EBadPath); + goto Err; + } + memmove(elem, path, n); + elem[n] = 0; + ff = vacfilewalk(f, elem); + if(ff == nil){ + werrstr("%.*s: %r", utfnlen(opath, p-opath), opath); + goto Err; + } + vacfiledecref(f); + f = ff; + } + if(*p == '/') + p++; + path = p; + } + return f; +Err: + vacfiledecref(f); + return nil; +} + +/* + * Extract the score for the bn'th block in f. + */ +int +vacfileblockscore(VacFile *f, u32int bn, u8int *score) +{ + VtFile *s; + uvlong size; + int dsize, ret; + + ret = -1; + if(filerlock(f) < 0) + return -1; + if(vtfilelock(f->source, VtOREAD) < 0) + goto out; + + s = f->source; + dsize = s->dsize; + size = vtfilegetsize(s); + if((uvlong)bn*dsize >= size) + goto out1; + ret = vtfileblockscore(f->source, bn, score); + +out1: + vtfileunlock(f->source); +out: + filerunlock(f); + return ret; +} + +/* + * Read data from f. + */ +int +vacfileread(VacFile *f, void *buf, int cnt, vlong offset) +{ + int n; + + if(offset < 0){ + werrstr(EBadOffset); + return -1; + } + if(filerlock(f) < 0) + return -1; + if(vtfilelock(f->source, VtOREAD) < 0){ + filerunlock(f); + return -1; + } + n = vtfileread(f->source, buf, cnt, offset); + vtfileunlock(f->source); + filerunlock(f); + return n; +} + +static int +getentry(VtFile *f, VtEntry *e) +{ + if(vtfilelock(f, VtOREAD) < 0) + return -1; + if(vtfilegetentry(f, e) < 0){ + vtfileunlock(f); + return -1; + } + vtfileunlock(f); + if(vtglobaltolocal(e->score) != NilBlock){ + werrstr("internal error - data not on venti"); + return -1; + } + return 0; +} + +/* + * Get the VtEntries for the data contained in f. + */ +int +vacfilegetentries(VacFile *f, VtEntry *e, VtEntry *me) +{ + if(filerlock(f) < 0) + return -1; + if(e && getentry(f->source, e) < 0){ + filerunlock(f); + return -1; + } + if(me){ + if(f->msource == nil) + memset(me, 0, sizeof *me); + else if(getentry(f->msource, me) < 0){ + filerunlock(f); + return -1; + } + } + filerunlock(f); + return 0; +} + +/* + * Get the file's size. + */ +int +vacfilegetsize(VacFile *f, uvlong *size) +{ + if(filerlock(f) < 0) + return -1; + if(vtfilelock(f->source, VtOREAD) < 0){ + filerunlock(f); + return -1; + } + *size = vtfilegetsize(f->source); + vtfileunlock(f->source); + filerunlock(f); + + return 0; +} + +/* + * Directory reading. + * + * A VacDirEnum is a buffer containing directory entries. + * Directory entries contain malloced strings and need to + * be cleaned up with vdcleanup. The invariant in the + * VacDirEnum is that the directory entries between + * vde->i and vde->n are owned by the vde and need to + * be cleaned up if it is closed. Those from 0 up to vde->i + * have been handed to the reader, and the reader must + * take care of calling vdcleanup as appropriate. + */ +VacDirEnum* +vdeopen(VacFile *f) +{ + VacDirEnum *vde; + VacFile *p; + + if(!vacfileisdir(f)){ + werrstr(ENotDir); + return nil; + } + + /* + * There might be changes to this directory's children + * that have not been flushed out into the cache yet. + * Those changes are only available if we look at the + * VacFile structures directory. But the directory reader + * is going to read the cache blocks directly, so update them. + */ + if(filelock(f) < 0) + return nil; + for(p=f->down; p; p=p->next) + filemetaflush(p, nil); + fileunlock(f); + + vde = vtmallocz(sizeof(VacDirEnum)); + vde->file = vacfileincref(f); + + return vde; +} + +/* + * Figure out the size of the directory entry at offset. + * The rest of the metadata is kept in the data half, + * but since venti has to track the data size anyway, + * we just use that one and avoid updating the directory + * each time the file size changes. + */ +static int +direntrysize(VtFile *s, ulong offset, ulong gen, uvlong *size) +{ + VtBlock *b; + ulong bn; + VtEntry e; + int epb; + + epb = s->dsize/VtEntrySize; + bn = offset/epb; + offset -= bn*epb; + + b = vtfileblock(s, bn, VtOREAD); + if(b == nil) + goto Err; + if(vtentryunpack(&e, b->data, offset) < 0) + goto Err; + + /* dangling entries are returned as zero size */ + if(!(e.flags & VtEntryActive) || e.gen != gen) + *size = 0; + else + *size = e.size; + vtblockput(b); + return 0; + +Err: + vtblockput(b); + return -1; +} + +/* + * Fill in vde with a new batch of directory entries. + */ +static int +vdefill(VacDirEnum *vde) +{ + int i, n; + VtFile *meta, *source; + MetaBlock mb; + MetaEntry me; + VacFile *f; + VtBlock *b; + VacDir *de; + + /* clean up first */ + for(i=vde->i; in; i++) + vdcleanup(vde->buf+i); + vtfree(vde->buf); + vde->buf = nil; + vde->i = 0; + vde->n = 0; + + f = vde->file; + + source = f->source; + meta = f->msource; + + b = vtfileblock(meta, vde->boff, VtOREAD); + if(b == nil) + goto Err; + if(mbunpack(&mb, b->data, meta->dsize) < 0) + goto Err; + + n = mb.nindex; + vde->buf = vtmalloc(n * sizeof(VacDir)); + + for(i=0; ibuf + i; + meunpack(&me, &mb, i); + if(vdunpack(de, &me) < 0) + goto Err; + vde->n++; + if(!(de->mode & ModeDir)) + if(direntrysize(source, de->entry, de->gen, &de->size) < 0) + goto Err; + } + vde->boff++; + vtblockput(b); + return 0; +Err: + vtblockput(b); + return -1; +} + +/* + * Read a single directory entry from vde into de. + * Returns -1 on error, 0 on EOF, and 1 on success. + * When it returns 1, it becomes the caller's responsibility + * to call vdcleanup(de) to free the strings contained + * inside, or else to call vdunread to give it back. + */ +int +vderead(VacDirEnum *vde, VacDir *de) +{ + int ret; + VacFile *f; + u32int nb; + + f = vde->file; + if(filerlock(f) < 0) + return -1; + + if(vtfilelock2(f->source, f->msource, VtOREAD) < 0){ + filerunlock(f); + return -1; + } + + nb = (vtfilegetsize(f->msource)+f->msource->dsize-1)/f->msource->dsize; + + while(vde->i >= vde->n){ + if(vde->boff >= nb){ + ret = 0; + goto Return; + } + if(vdefill(vde) < 0){ + ret = -1; + goto Return; + } + } + + memmove(de, vde->buf + vde->i, sizeof(VacDir)); + vde->i++; + ret = 1; + +Return: + vtfileunlock(f->source); + vtfileunlock(f->msource); + filerunlock(f); + + return ret; +} + +/* + * "Unread" the last directory entry that was read, + * so that the next vderead will return the same one. + * If the caller calls vdeunread(vde) it should not call + * vdcleanup on the entry being "unread". + */ +int +vdeunread(VacDirEnum *vde) +{ + if(vde->i > 0){ + vde->i--; + return 0; + } + return -1; +} + +/* + * Close the enumerator. + */ +void +vdeclose(VacDirEnum *vde) +{ + int i; + if(vde == nil) + return; + /* free the strings */ + for(i=vde->i; in; i++) + vdcleanup(vde->buf+i); + vtfree(vde->buf); + vacfiledecref(vde->file); + vtfree(vde); +} + + +/* + * On to mutation. If the vac file system has been opened + * read-write, then the files and directories can all be edited. + * Changes are kept in the in-memory cache until flushed out + * to venti, so we must be careful to explicitly flush data + * that we're not likely to modify again. + * + * Each VacFile has its own copy of its VacDir directory entry + * in f->dir, but otherwise the cache is the authoratative source + * for data. Thus, for the most part, it suffices if we just + * call vtfileflushbefore and vtfileflush when we modify things. + * There are a few places where we have to remember to write + * changed VacDirs back into the cache. If f->dir *is* out of sync, + * then f->dirty should be set. + * + * The metadata in a directory is, to venti, a plain data file, + * but as mentioned above it is actually a sequence of + * MetaBlocks that contain sorted lists of VacDir entries. + * The filemetaxxx routines manipulate that stream. + */ + +/* + * Find space in fp for the directory entry dir (not yet written to disk) + * and write it to disk, returning NilBlock on failure, + * or the block number on success. + * + * Start is a suggested block number to try. + * The caller must have filemetalock'ed f and have + * vtfilelock'ed f->up->msource. + */ +static u32int +filemetaalloc(VacFile *fp, VacDir *dir, u32int start) +{ + u32int nb, bo; + VtBlock *b; + MetaBlock mb; + int nn; + uchar *p; + int i, n; + MetaEntry me; + VtFile *ms; + + ms = fp->msource; + n = vdsize(dir, VacDirVersion); + + /* Look for a block with room for a new entry of size n. */ + nb = (vtfilegetsize(ms)+ms->dsize-1)/ms->dsize; + if(start == NilBlock){ + if(nb > 0) + start = nb - 1; + else + start = 0; + } + + if(start > nb) + start = nb; + for(bo=start; bodata, ms->dsize) < 0) + goto Err; + nn = (mb.maxsize*FullPercentage/100) - mb.size + mb.free; + if(n <= nn && mb.nindex < mb.maxindex){ + /* reopen for writing */ + vtblockput(b); + if((b = vtfileblock(ms, bo, VtORDWR)) == nil) + goto Err; + mbunpack(&mb, b->data, ms->dsize); + goto Found; + } + vtblockput(b); + } + + /* No block found, extend the file by one metablock. */ + vtfileflushbefore(ms, nb*(uvlong)ms->dsize); + if((b = vtfileblock(ms, nb, VtORDWR)) == nil) + goto Err; + vtfilesetsize(ms, (nb+1)*ms->dsize); + mbinit(&mb, b->data, ms->dsize, ms->dsize/BytesPerEntry); + +Found: + /* Now we have a block; allocate space to write the entry. */ + p = mballoc(&mb, n); + if(p == nil){ + /* mballoc might have changed block */ + mbpack(&mb); + werrstr(EBadMeta); + goto Err; + } + + /* Figure out where to put the index entry, and write it. */ + mbsearch(&mb, dir->elem, &i, &me); + assert(me.p == nil); /* not already there */ + me.p = p; + me.size = n; + vdpack(dir, &me, VacDirVersion); + mbinsert(&mb, i, &me); + mbpack(&mb); + vtblockput(b); + return bo; + +Err: + vtblockput(b); + return NilBlock; +} + +/* + * Update f's directory entry in the block cache. + * We look for the directory entry by name; + * if we're trying to rename the file, oelem is the old name. + * + * Assumes caller has filemetalock'ed f. + */ +static int +filemetaflush(VacFile *f, char *oelem) +{ + int i, n; + MetaBlock mb; + MetaEntry me, me2; + VacFile *fp; + VtBlock *b; + u32int bo; + + if(!f->dirty) + return 0; + + if(oelem == nil) + oelem = f->dir.elem; + + /* + * Locate f's old metadata in the parent's metadata file. + * We know which block it was in, but not exactly where + * in the block. + */ + fp = f->up; + if(vtfilelock(fp->msource, -1) < 0) + return -1; + /* can happen if source is clri'ed out from under us */ + if(f->boff == NilBlock) + goto Err1; + b = vtfileblock(fp->msource, f->boff, VtORDWR); + if(b == nil) + goto Err1; + if(mbunpack(&mb, b->data, fp->msource->dsize) < 0) + goto Err; + if(mbsearch(&mb, oelem, &i, &me) < 0) + goto Err; + + /* + * Check whether we can resize the entry and keep it + * in this block. + */ + n = vdsize(&f->dir, VacDirVersion); + if(mbresize(&mb, &me, n) >= 0){ + /* Okay, can be done without moving to another block. */ + + /* Remove old data */ + mbdelete(&mb, i, &me); + + /* Find new location if renaming */ + if(strcmp(f->dir.elem, oelem) != 0) + mbsearch(&mb, f->dir.elem, &i, &me2); + + /* Pack new data into new location. */ + vdpack(&f->dir, &me, VacDirVersion); +vdunpack(&f->dir, &me); + mbinsert(&mb, i, &me); + mbpack(&mb); + + /* Done */ + vtblockput(b); + vtfileunlock(fp->msource); + f->dirty = 0; + return 0; + } + + /* + * The entry must be moved to another block. + * This can only really happen on renames that + * make the name very long. + */ + + /* Allocate a spot in a new block. */ + if((bo = filemetaalloc(fp, &f->dir, f->boff+1)) == NilBlock){ + /* mbresize above might have modified block */ + mbpack(&mb); + goto Err; + } + f->boff = bo; + + /* Now we're committed. Delete entry in old block. */ + mbdelete(&mb, i, &me); + mbpack(&mb); + vtblockput(b); + vtfileunlock(fp->msource); + + f->dirty = 0; + return 0; + +Err: + vtblockput(b); +Err1: + vtfileunlock(fp->msource); + return -1; +} + +/* + * Remove the directory entry for f. + */ +static int +filemetaremove(VacFile *f) +{ + VtBlock *b; + MetaBlock mb; + MetaEntry me; + int i; + VacFile *fp; + + b = nil; + fp = f->up; + filemetalock(f); + + if(vtfilelock(fp->msource, VtORDWR) < 0) + goto Err; + b = vtfileblock(fp->msource, f->boff, VtORDWR); + if(b == nil) + goto Err; + + if(mbunpack(&mb, b->data, fp->msource->dsize) < 0) + goto Err; + if(mbsearch(&mb, f->dir.elem, &i, &me) < 0) + goto Err; + mbdelete(&mb, i, &me); + mbpack(&mb); + vtblockput(b); + vtfileunlock(fp->msource); + + f->removed = 1; + f->boff = NilBlock; + f->dirty = 0; + + filemetaunlock(f); + return 0; + +Err: + vtfileunlock(fp->msource); + vtblockput(b); + filemetaunlock(f); + return -1; +} + +/* + * That was far too much effort for directory entries. + * Now we can write code that *does* things. + */ + +/* + * Flush all data associated with f out of the cache and onto venti. + * If recursive is set, flush f's children too. + * Vacfiledecref knows how to flush source and msource too. + */ +int +vacfileflush(VacFile *f, int recursive) +{ + int ret; + VacFile **kids, *p; + int i, nkids; + + if(f->mode == VtOREAD) + return 0; + + ret = 0; + filemetalock(f); + if(filemetaflush(f, nil) < 0) + ret = -1; + filemetaunlock(f); + + if(filelock(f) < 0) + return -1; + + /* + * Lock order prevents us from flushing kids while holding + * lock, so make a list and then flush without the lock. + */ + nkids = 0; + kids = nil; + if(recursive){ + nkids = 0; + for(p=f->down; p; p=p->next) + nkids++; + kids = vtmalloc(nkids*sizeof(VacFile*)); + i = 0; + for(p=f->down; p; p=p->next){ + kids[i++] = p; + p->ref++; + } + } + if(nkids > 0){ + fileunlock(f); + for(i=0; isource, -1); + if(vtfileflush(f->source) < 0) + ret = -1; + vtfileunlock(f->source); + if(f->msource){ + vtfilelock(f->msource, -1); + if(vtfileflush(f->msource) < 0) + ret = -1; + vtfileunlock(f->msource); + } + fileunlock(f); + + return ret; +} + +/* + * Create a new file named elem in fp with the given mode. + * The mode can be changed later except for the ModeDir bit. + */ +VacFile* +vacfilecreate(VacFile *fp, char *elem, ulong mode) +{ + VacFile *ff; + VacDir *dir; + VtFile *pr, *r, *mr; + int type; + u32int bo; + + if(filelock(fp) < 0) + return nil; + + /* + * First, look to see that there's not a file in memory + * with the same name. + */ + for(ff = fp->down; ff; ff=ff->next){ + if(strcmp(elem, ff->dir.elem) == 0 && !ff->removed){ + ff = nil; + werrstr(EExists); + goto Err1; + } + } + + /* + * Next check the venti blocks. + */ + ff = dirlookup(fp, elem); + if(ff != nil){ + werrstr(EExists); + goto Err1; + } + + /* + * By the way, you can't create in a read-only file system. + */ + pr = fp->source; + if(pr->mode != VtORDWR){ + werrstr(EReadOnly); + goto Err1; + } + + /* + * Okay, time to actually create something. Lock the two + * halves of the directory and create a file. + */ + if(vtfilelock2(fp->source, fp->msource, -1) < 0) + goto Err1; + ff = filealloc(fp->fs); + ff->qidoffset = fp->qidoffset; /* hopefully fp->qidoffset == 0 */ + type = VtDataType; + if(mode & ModeDir) + type = VtDirType; + mr = nil; + if((r = vtfilecreate(pr, pr->psize, pr->dsize, type)) == nil) + goto Err; + if(mode & ModeDir) + if((mr = vtfilecreate(pr, pr->psize, pr->dsize, VtDataType)) == nil) + goto Err; + + /* + * Fill in the directory entry and write it to disk. + */ + dir = &ff->dir; + dir->elem = vtstrdup(elem); + dir->entry = r->offset; + dir->gen = r->gen; + if(mode & ModeDir){ + dir->mentry = mr->offset; + dir->mgen = mr->gen; + } + dir->size = 0; + if(_vacfsnextqid(fp->fs, &dir->qid) < 0) + goto Err; + dir->uid = vtstrdup(fp->dir.uid); + dir->gid = vtstrdup(fp->dir.gid); + dir->mid = vtstrdup(""); + dir->mtime = time(0L); + dir->mcount = 0; + dir->ctime = dir->mtime; + dir->atime = dir->mtime; + dir->mode = mode; + if((bo = filemetaalloc(fp, &ff->dir, NilBlock)) == NilBlock) + goto Err; + + /* + * Now we're committed. + */ + vtfileunlock(fp->source); + vtfileunlock(fp->msource); + ff->source = r; + ff->msource = mr; + ff->boff = bo; + + /* Link into tree. */ + ff->next = fp->down; + fp->down = ff; + ff->up = fp; + vacfileincref(fp); + + fileunlock(fp); + + filelock(ff); + vtfilelock(ff->source, -1); + vtfileunlock(ff->source); + fileunlock(ff); + + return ff; + +Err: + vtfileunlock(fp->source); + vtfileunlock(fp->msource); + if(r){ + vtfilelock(r, -1); + vtfileremove(r); + } + if(mr){ + vtfilelock(mr, -1); + vtfileremove(mr); + } +Err1: + if(ff) + vacfiledecref(ff); + fileunlock(fp); + return nil; +} + +/* + * Change the size of the file f. + */ +int +vacfilesetsize(VacFile *f, uvlong size) +{ + if(vacfileisdir(f)){ + werrstr(ENotFile); + return -1; + } + + if(filelock(f) < 0) + return -1; + + if(f->source->mode != VtORDWR){ + werrstr(EReadOnly); + goto Err; + } + if(vtfilelock(f->source, -1) < 0) + goto Err; + if(vtfilesetsize(f->source, size) < 0){ + vtfileunlock(f->source); + goto Err; + } + vtfileunlock(f->source); + fileunlock(f); + return 0; + +Err: + fileunlock(f); + return -1; +} + +/* + * Write data to f. + */ +int +vacfilewrite(VacFile *f, void *buf, int cnt, vlong offset) +{ + if(vacfileisdir(f)){ + werrstr(ENotFile); + return -1; + } + if(filelock(f) < 0) + return -1; + if(f->source->mode != VtORDWR){ + werrstr(EReadOnly); + goto Err; + } + if(offset < 0){ + werrstr(EBadOffset); + goto Err; + } + + if(vtfilelock(f->source, -1) < 0) + goto Err; + if(f->dir.mode & ModeAppend) + offset = vtfilegetsize(f->source); + if(vtfilewrite(f->source, buf, cnt, offset) != cnt + || vtfileflushbefore(f->source, offset) < 0){ + vtfileunlock(f->source); + goto Err; + } + vtfileunlock(f->source); + fileunlock(f); + return cnt; + +Err: + fileunlock(f); + return -1; +} + +/* + * Set (!) the VtEntry for the data contained in f. + * This let's us efficiently copy data from one file to another. + */ +int +vacfilesetentries(VacFile *f, VtEntry *e, VtEntry *me) +{ + int ret; + + vacfileflush(f, 0); /* flush blocks to venti, since we won't see them again */ + + if(!(e->flags&VtEntryActive)){ + werrstr("missing entry for source"); + return -1; + } + if(me && !(me->flags&VtEntryActive)) + me = nil; + if(f->msource && !me){ + werrstr("missing entry for msource"); + return -1; + } + if(me && !f->msource){ + werrstr("no msource to set"); + return -1; + } + + if(filelock(f) < 0) + return -1; + if(f->source->mode != VtORDWR + || (f->msource && f->msource->mode != VtORDWR)){ + werrstr(EReadOnly); + fileunlock(f); + return -1; + } + if(vtfilelock2(f->source, f->msource, -1) < 0){ + fileunlock(f); + return -1; + } + ret = 0; + if(vtfilesetentry(f->source, e) < 0) + ret = -1; + else if(me && vtfilesetentry(f->msource, me) < 0) + ret = -1; + + vtfileunlock(f->source); + if(f->msource) + vtfileunlock(f->msource); + fileunlock(f); + return ret; +} + +/* + * Get the directory entry for f. + */ +int +vacfilegetdir(VacFile *f, VacDir *dir) +{ + if(filerlock(f) < 0) + return -1; + + filemetalock(f); + vdcopy(dir, &f->dir); + filemetaunlock(f); + + if(!vacfileisdir(f)){ + if(vtfilelock(f->source, VtOREAD) < 0){ + filerunlock(f); + return -1; + } + dir->size = vtfilegetsize(f->source); + vtfileunlock(f->source); + } + filerunlock(f); + + return 0; +} + +/* + * Set the directory entry for f. + */ +int +vacfilesetdir(VacFile *f, VacDir *dir) +{ + VacFile *ff; + char *oelem; + u32int mask; + u64int size; + + /* can not set permissions for the root */ + if(vacfileisroot(f)){ + werrstr(ERoot); + return -1; + } + + if(filelock(f) < 0) + return -1; + filemetalock(f); + + if(f->source->mode != VtORDWR){ + werrstr(EReadOnly); + goto Err; + } + + /* On rename, check new name does not already exist */ + if(strcmp(f->dir.elem, dir->elem) != 0){ + for(ff = f->up->down; ff; ff=ff->next){ + if(strcmp(dir->elem, ff->dir.elem) == 0 && !ff->removed){ + werrstr(EExists); + goto Err; + } + } + ff = dirlookup(f->up, dir->elem); + if(ff != nil){ + vacfiledecref(ff); + werrstr(EExists); + goto Err; + } + werrstr(""); /* "failed" dirlookup poisoned it */ + } + + /* Get ready... */ + if(vtfilelock2(f->source, f->msource, -1) < 0) + goto Err; + if(!vacfileisdir(f)){ + size = vtfilegetsize(f->source); + if(size != dir->size){ + if(vtfilesetsize(f->source, dir->size) < 0){ + vtfileunlock(f->source); + if(f->msource) + vtfileunlock(f->msource); + goto Err; + } + } + } + /* ... now commited to changing it. */ + vtfileunlock(f->source); + if(f->msource) + vtfileunlock(f->msource); + + oelem = nil; + if(strcmp(f->dir.elem, dir->elem) != 0){ + oelem = f->dir.elem; + f->dir.elem = vtstrdup(dir->elem); + } + + if(strcmp(f->dir.uid, dir->uid) != 0){ + vtfree(f->dir.uid); + f->dir.uid = vtstrdup(dir->uid); + } + + if(strcmp(f->dir.gid, dir->gid) != 0){ + vtfree(f->dir.gid); + f->dir.gid = vtstrdup(dir->gid); + } + + f->dir.mtime = dir->mtime; + f->dir.atime = dir->atime; + + mask = ~(ModeDir|ModeSnapshot); + f->dir.mode &= ~mask; + f->dir.mode |= mask & dir->mode; + f->dirty = 1; + + if(filemetaflush(f, oelem) < 0){ + vtfree(oelem); + goto Err; /* that sucks */ + } + vtfree(oelem); + + filemetaunlock(f); + fileunlock(f); + return 0; + +Err: + filemetaunlock(f); + fileunlock(f); + return -1; +} + +/* + * Set the qid space. + */ +int +vacfilesetqidspace(VacFile *f, u64int offset, u64int max) +{ + int ret; + + if(filelock(f) < 0) + return -1; + if(f->source->mode != VtORDWR){ + fileunlock(f); + werrstr(EReadOnly); + return -1; + } + filemetalock(f); + f->dir.qidspace = 1; + f->dir.qidoffset = offset; + f->dir.qidmax = max; + f->dirty = 1; + ret = filemetaflush(f, nil); + filemetaunlock(f); + fileunlock(f); + return ret; +} + +/* + * Check that the file is empty, returning 0 if it is. + * Returns -1 on error (and not being empty is an error). + */ +static int +filecheckempty(VacFile *f) +{ + u32int i, n; + VtBlock *b; + MetaBlock mb; + VtFile *r; + + r = f->msource; + n = (vtfilegetsize(r)+r->dsize-1)/r->dsize; + for(i=0; idata, r->dsize) < 0) + goto Err; + if(mb.nindex > 0){ + werrstr(ENotEmpty); + goto Err; + } + vtblockput(b); + } + return 0; + +Err: + vtblockput(b); + return -1; +} + +/* + * Remove the vac file f. + */ +int +vacfileremove(VacFile *f) +{ + VacFile *ff; + + /* Cannot remove the root */ + if(vacfileisroot(f)){ + werrstr(ERoot); + return -1; + } + + if(filelock(f) < 0) + return -1; + if(f->source->mode != VtORDWR){ + werrstr(EReadOnly); + goto Err1; + } + if(vtfilelock2(f->source, f->msource, -1) < 0) + goto Err1; + if(vacfileisdir(f) && filecheckempty(f)<0) + goto Err; + + for(ff=f->down; ff; ff=ff->next) + assert(ff->removed); + + vtfileremove(f->source); + f->source = nil; + if(f->msource){ + vtfileremove(f->msource); + f->msource = nil; + } + fileunlock(f); + + if(filemetaremove(f) < 0) + return -1; + return 0; + +Err: + vtfileunlock(f->source); + if(f->msource) + vtfileunlock(f->msource); +Err1: + fileunlock(f); + return -1; +} + +/* + * Vac file system format. + */ +static char EBadVacFormat[] = "bad format for vac file"; + +static VacFs * +vacfsalloc(VtConn *z, int bsize, int ncache, int mode) +{ + VacFs *fs; + + fs = vtmallocz(sizeof(VacFs)); + fs->z = z; + fs->bsize = bsize; + fs->mode = mode; + fs->cache = vtcachealloc(z, bsize, ncache); + return fs; +} + +static int +readscore(int fd, uchar score[VtScoreSize]) +{ + char buf[45], *pref; + int n; + + n = readn(fd, buf, sizeof(buf)-1); + if(n < sizeof(buf)-1) { + werrstr("short read"); + return -1; + } + buf[n] = 0; + + if(vtparsescore(buf, &pref, score) < 0){ + werrstr(EBadVacFormat); + return -1; + } + if(pref==nil || strcmp(pref, "vac") != 0) { + werrstr("not a vac file"); + return -1; + } + return 0; +} + +VacFs* +vacfsopen(VtConn *z, char *scorez, int mode, int ncache) +{ + int fd; + uchar score[VtScoreSize]; + char *prefix; + + if(vtparsescore(scorez, &prefix, score)){ + werrstr("not a valid vac string"); + return nil; + } + return vacfsopenscore(z, score, mode, ncache); +} + +VacFs* +vacfsopenscore(VtConn *z, u8int *score, int mode, int ncache) +{ + VacFs *fs; + int n; + VtRoot rt; + uchar buf[VtRootSize]; + VacFile *root; + VtFile *r; + VtEntry e; + print("vacfsopenscore %d", VtRootSize); + n = mmvtread(score, VtRootType, buf, VtRootSize); + if(n < 0) + return nil; + if(n != VtRootSize){ + werrstr("mmvtread on root too short"); + return nil; + } + + if(vtrootunpack(&rt, buf) < 0) + return nil; + + if(strcmp(rt.type, "vac") != 0) { + werrstr("not a vac root"); + return nil; + } + + fs = vacfsalloc(z, rt.blocksize, ncache, mode); + memmove(fs->score, score, VtScoreSize); + fs->mode = mode; + + memmove(e.score, rt.score, VtScoreSize); + e.gen = 0; + e.psize = rt.blocksize; + e.dsize = rt.blocksize; + e.type = VtDirType; + e.flags = VtEntryActive; + e.size = 3*VtEntrySize; + + root = nil; + if((r = vtfileopenroot(fs->cache, &e)) == nil) + goto Err; + if(debug) + fprint(2, "r %p\n", r); + root = _vacfileroot(fs, r); + if(debug) + fprint(2, "root %p\n", root); + vtfileclose(r); + if(root == nil) + goto Err; + fs->root = root; + return fs; +Err: + if(root) + vacfiledecref(root); + vacfsclose(fs); + return nil; +} + +int +vacfsmode(VacFs *fs) +{ + return fs->mode; +} + +VacFile* +vacfsgetroot(VacFs *fs) +{ + return vacfileincref(fs->root); +} + +int +vacfsgetblocksize(VacFs *fs) +{ + return fs->bsize; +} + +int +vacfsgetscore(VacFs *fs, u8int *score) +{ + memmove(score, fs->score, VtScoreSize); + return 0; +} + +int +_vacfsnextqid(VacFs *fs, uvlong *qid) +{ + ++fs->qid; + *qid = fs->qid; + return 0; +} + +void +vacfsjumpqid(VacFs *fs, uvlong step) +{ + fs->qid += step; +} + +/* + * Set *maxqid to the maximum qid expected in this file system. + * In newer vac archives, the maximum qid is stored in the + * qidspace VacDir annotation. In older vac archives, the root + * got created last, so it had the maximum qid. + */ +int +vacfsgetmaxqid(VacFs *fs, uvlong *maxqid) +{ + VacDir vd; + + if(vacfilegetdir(fs->root, &vd) < 0) + return -1; + if(vd.qidspace) + *maxqid = vd.qidmax; + else + *maxqid = vd.qid; + vdcleanup(&vd); + return 0; +} + + +void +vacfsclose(VacFs *fs) +{ + if(fs->root) + vacfiledecref(fs->root); + fs->root = nil; + vtcachefree(fs->cache); + vtfree(fs); +} + +/* + * Create a fresh vac fs. + */ +VacFs * +vacfscreate(VtConn *z, int bsize, int ncache) +{ + VacFs *fs; + VtFile *f; + uchar buf[VtEntrySize], metascore[VtScoreSize]; + VtEntry e; + VtBlock *b; + MetaBlock mb; + VacDir vd; + MetaEntry me; + int psize; + int mbsize; + + if((fs = vacfsalloc(z, bsize, ncache, VtORDWR)) == nil) + return nil; + + /* + * Fake up an empty vac fs. + */ + psize = bsize; + f = vtfilecreateroot(fs->cache, psize, bsize, VtDirType); + vtfilelock(f, VtORDWR); + + /* Metablocks can't be too big -- they have 16-bit offsets in them. */ + mbsize = bsize; + if(mbsize >= 56*1024) + mbsize = 56*1024; + + /* Write metablock containing root directory VacDir. */ + b = vtcacheallocblock(fs->cache, VtDataType); + mbinit(&mb, b->data, mbsize, mbsize/BytesPerEntry); + memset(&vd, 0, sizeof vd); + vd.elem = "/"; + vd.mode = 0777|ModeDir; + vd.uid = "vac"; + vd.gid = "vac"; + vd.mid = ""; + me.size = vdsize(&vd, VacDirVersion); + me.p = mballoc(&mb, me.size); + vdpack(&vd, &me, VacDirVersion); + mbinsert(&mb, 0, &me); + mbpack(&mb); + vtblockwrite(b); + memmove(metascore, b->score, VtScoreSize); + vtblockput(b); + + /* First entry: empty venti directory stream. */ + memset(&e, 0, sizeof e); + e.flags = VtEntryActive; + e.psize = psize; + e.dsize = bsize; + e.type = VtDirType; + memmove(e.score, vtzeroscore, VtScoreSize); + vtentrypack(&e, buf, 0); + vtfilewrite(f, buf, VtEntrySize, 0); + + /* Second entry: empty metadata stream. */ + e.type = VtDataType; + e.dsize = mbsize; + vtentrypack(&e, buf, 0); + vtfilewrite(f, buf, VtEntrySize, VtEntrySize); + + /* Third entry: metadata stream with root directory. */ + memmove(e.score, metascore, VtScoreSize); + e.size = mbsize; + vtentrypack(&e, buf, 0); + vtfilewrite(f, buf, VtEntrySize, VtEntrySize*2); + + vtfileflush(f); + vtfileunlock(f); + + /* Now open it as a vac fs. */ + fs->root = _vacfileroot(fs, f); + if(fs->root == nil){ + werrstr("vacfileroot: %r"); + vacfsclose(fs); + return nil; + } + + return fs; +} + +int +vacfssync(VacFs *fs) +{ + uchar buf[1024]; + VtEntry e; + VtFile *f; + VtRoot root; + + /* Sync the entire vacfs to disk. */ + if(vacfileflush(fs->root, 1) < 0) + return -1; + if(vtfilelock(fs->root->up->msource, -1) < 0) + return -1; + if(vtfileflush(fs->root->up->msource) < 0){ + vtfileunlock(fs->root->up->msource); + return -1; + } + vtfileunlock(fs->root->up->msource); + + /* Prepare the dir stream for the root block. */ + if(getentry(fs->root->source, &e) < 0) + return -1; + vtentrypack(&e, buf, 0); + if(getentry(fs->root->msource, &e) < 0) + return -1; + vtentrypack(&e, buf, 1); + if(getentry(fs->root->up->msource, &e) < 0) + return -1; + vtentrypack(&e, buf, 2); + + f = vtfilecreateroot(fs->cache, fs->bsize, fs->bsize, VtDirType); + vtfilelock(f, VtORDWR); + if(vtfilewrite(f, buf, 3*VtEntrySize, 0) < 0 + || vtfileflush(f) < 0){ + vtfileunlock(f); + vtfileclose(f); + return -1; + } + vtfileunlock(f); + if(getentry(f, &e) < 0){ + vtfileclose(f); + return -1; + } + vtfileclose(f); + + /* Build a root block. */ + memset(&root, 0, sizeof root); + strcpy(root.type, "vac"); + strcpy(root.name, fs->name); + memmove(root.score, e.score, VtScoreSize); + root.blocksize = fs->bsize; + memmove(root.prev, fs->score, VtScoreSize); + vtrootpack(&root, buf); + if(mmvtwrite(fs->score, VtRootType, buf, VtRootSize) < 0){ + werrstr("writing root: %r"); + return -1; + } + if(vtsync(fs->z) < 0) + return -1; + return 0; +} + +int +vacfiledsize(VacFile *f) +{ + VtEntry e; + + if(vacfilegetentries(f,&e,nil) < 0) + return -1; + return e.dsize; +} + +/* + * Does block b of f have the same SHA1 hash as the n bytes at buf? + */ +int +sha1matches(VacFile *f, ulong b, uchar *buf, int n) +{ + uchar fscore[VtScoreSize]; + uchar bufscore[VtScoreSize]; + + if(vacfileblockscore(f, b, fscore) < 0) + return 0; + n = vtzerotruncate(VtDataType, buf, n); + sha1(buf, n, bufscore, nil); + if(memcmp(bufscore, fscore, VtScoreSize) == 0) + return 1; + return 0; +} + diff -r 9c93e936f28e -r e1f6266f79c1 sys/src/cmd/vtvacfs/vacfs.c --- a/sys/src/cmd/vtvacfs/vacfs.c Thu Sep 29 20:05:50 2011 +0200 +++ b/sys/src/cmd/vtvacfs/vacfs.c Fri Sep 30 15:24:12 2011 -0700 @@ -1,6 +1,6 @@ -#include "stdinc.h" +#include "/sys/src/cmd/vac/stdinc.h" #include -#include "vac.h" +#include "/sys/src/cmd/vac/vac.h" #define convM2Su(a, b, c, d) convM2S(a, b, c) #define convS2Mu(a, b, c, d) convS2M(a, b, c) @@ -122,7 +122,6 @@ { char *defsrv, *srvname; int p[2], fd; - char *host = nil; long ncache; defsrv = "mmventi"; @@ -215,6 +214,13 @@ /* since the score is in the aname :-) */ VacFile *file; char err[80]; + char *score = rhdr.aname; + + fs = vacfsopen(nil, score, VtOREAD, 1024); + if (fs == nil) { + rerrstr(err, sizeof err); + return vtstrdup(err); + } file = vacfsgetroot(fs); if(file == nil) { @@ -633,6 +639,8 @@ { char *err; int n; + dflag=2; + fmtinstall('G', fcallfmt); for(;;){ n = read9pmsg(mfd[0], mdata, sizeof mdata); @@ -642,7 +650,7 @@ sysfatal("convM2S conversion error"); if(dflag) - fprint(2, "vacfs:<-%F\n", &rhdr); + fprint(2, "vacfs:<-%G\n", &rhdr); thdr.data = (char*)mdata + IOHDRSZ; if(!fcalls[rhdr.type]) @@ -658,7 +666,7 @@ } thdr.tag = rhdr.tag; if(dflag) - fprint(2, "vacfs:->%F\n", &thdr); + fprint(2, "vacfs:->%G\n", &thdr); n = convS2Mu(&thdr, mdata, messagesize, dotu); if(n <= BIT16SZ) sysfatal("convS2Mu conversion error"); diff -r 9c93e936f28e -r e1f6266f79c1 sys/src/cmd/vtvacfs/vtcache.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/vtvacfs/vtcache.c Fri Sep 30 15:24:12 2011 -0700 @@ -0,0 +1,606 @@ +/* + * Memory-only VtBlock cache. + * + * The cached Venti blocks are in the hash chains. + * The cached local blocks are only in the blocks array. + * The free blocks are in the heap, which is supposed to + * be indexed by second-to-last use but actually + * appears to be last use. + */ + +#include +#include +#include +int mmvtwrite(uchar score[VtScoreSize], uint type, uchar *buf, int n); +int mmvtread(uchar score[VtScoreSize], uint type, uchar *buf, int n); + +int vtcachenread; +int vtcachencopy; +int vtcachenwrite; +int vttracelevel; + +enum { + BioLocal = 1, + BioVenti, + BioReading, + BioWriting, + BioEmpty, + BioVentiError +}; +enum { + BadHeap = ~0 +}; +struct VtCache +{ + QLock lk; + VtConn *z; + u32int blocksize; + u32int now; /* ticks for usage time stamps */ + VtBlock **hash; /* hash table for finding addresses */ + int nhash; + VtBlock **heap; /* heap for finding victims */ + int nheap; + VtBlock *block; /* all allocated blocks */ + int nblock; + uchar *mem; /* memory for all blocks and data */ + int (*write)(VtConn*, uchar[VtScoreSize], uint, uchar*, int); +}; + +static void cachecheck(VtCache*); + +int +hackmmvtwrite(VtConn *, uchar *score, uint type, u8int *buf, int len) +{ + return mmvtwrite(score, type, buf, len); +} +VtCache* +vtcachealloc(VtConn *z, int blocksize, ulong nblock) +{ + uchar *p; + VtCache *c; + int i; + VtBlock *b; + + c = vtmallocz(sizeof(VtCache)); + + c->z = z; + c->blocksize = (blocksize + 127) & ~127; + c->nblock = nblock; + c->nhash = nblock; + c->hash = vtmallocz(nblock*sizeof(VtBlock*)); + c->heap = vtmallocz(nblock*sizeof(VtBlock*)); + c->block = vtmallocz(nblock*sizeof(VtBlock)); + c->mem = vtmallocz(nblock*c->blocksize); + c->write = hackmmvtwrite; + + p = c->mem; + for(i=0; iblock[i]; + b->addr = NilBlock; + b->c = c; + b->data = p; + b->heap = i; + c->heap[i] = b; + p += c->blocksize; + } + c->nheap = nblock; + cachecheck(c); + return c; +} + +/* + * BUG This is here so that vbackup can override it and do some + * pipelining of writes. Arguably vtwrite or vtwritepacket or the + * cache itself should be providing this functionality. + */ +void +vtcachesetwrite(VtCache *c, int (*write)(VtConn*, uchar[VtScoreSize], uint, uchar*, int)) +{ + if(write == nil) + write = vtwrite; + c->write = write; +} + +void +vtcachefree(VtCache *c) +{ + int i; + + qlock(&c->lk); + + cachecheck(c); + for(i=0; inblock; i++) + assert(c->block[i].ref == 0); + + vtfree(c->hash); + vtfree(c->heap); + vtfree(c->block); + vtfree(c->mem); + vtfree(c); +} + +static void +vtcachedump(VtCache *c) +{ + int i; + VtBlock *b; + + for(i=0; inblock; i++){ + b = &c->block[i]; + print("cache block %d: type %d score %V iostate %d addr %d ref %d nlock %d\n", + i, b->type, b->score, b->iostate, b->addr, b->ref, b->nlock); + } +} + +static void +cachecheck(VtCache *c) +{ + u32int size, now; + int i, k, refed; + VtBlock *b; + + size = c->blocksize; + now = c->now; + + for(i = 0; i < c->nheap; i++){ + if(c->heap[i]->heap != i) + sysfatal("mis-heaped at %d: %d", i, c->heap[i]->heap); + if(i > 0 && c->heap[(i - 1) >> 1]->used - now > c->heap[i]->used - now) + sysfatal("bad heap ordering"); + k = (i << 1) + 1; + if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now) + sysfatal("bad heap ordering"); + k++; + if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now) + sysfatal("bad heap ordering"); + } + + refed = 0; + for(i = 0; i < c->nblock; i++){ + b = &c->block[i]; + if(b->data != &c->mem[i * size]) + sysfatal("mis-blocked at %d", i); + if(b->ref && b->heap == BadHeap) + refed++; + else if(b->addr != NilBlock) + refed++; + } + assert(c->nheap + refed == c->nblock); + refed = 0; + for(i = 0; i < c->nblock; i++){ + b = &c->block[i]; + if(b->ref){ + refed++; + } + } +} + +static int +upheap(int i, VtBlock *b) +{ + VtBlock *bb; + u32int now; + int p; + VtCache *c; + + c = b->c; + now = c->now; + for(; i != 0; i = p){ + p = (i - 1) >> 1; + bb = c->heap[p]; + if(b->used - now >= bb->used - now) + break; + c->heap[i] = bb; + bb->heap = i; + } + c->heap[i] = b; + b->heap = i; + + return i; +} + +static int +downheap(int i, VtBlock *b) +{ + VtBlock *bb; + u32int now; + int k; + VtCache *c; + + c = b->c; + now = c->now; + for(; ; i = k){ + k = (i << 1) + 1; + if(k >= c->nheap) + break; + if(k + 1 < c->nheap && c->heap[k]->used - now > c->heap[k + 1]->used - now) + k++; + bb = c->heap[k]; + if(b->used - now <= bb->used - now) + break; + c->heap[i] = bb; + bb->heap = i; + } + c->heap[i] = b; + b->heap = i; + return i; +} + +/* + * Delete a block from the heap. + * Called with c->lk held. + */ +static void +heapdel(VtBlock *b) +{ + int i, si; + VtCache *c; + + c = b->c; + + si = b->heap; + if(si == BadHeap) + return; + b->heap = BadHeap; + c->nheap--; + if(si == c->nheap) + return; + b = c->heap[c->nheap]; + i = upheap(si, b); + if(i == si) + downheap(i, b); +} + +/* + * Insert a block into the heap. + * Called with c->lk held. + */ +static void +heapins(VtBlock *b) +{ + assert(b->heap == BadHeap); + upheap(b->c->nheap++, b); +} + +/* + * locate the vtBlock with the oldest second to last use. + * remove it from the heap, and fix up the heap. + */ +/* called with c->lk held */ +static VtBlock* +vtcachebumpblock(VtCache *c) +{ + VtBlock *b; + + /* + * locate the vtBlock with the oldest second to last use. + * remove it from the heap, and fix up the heap. + */ + if(c->nheap == 0){ + vtcachedump(c); + fprint(2, "vtcachebumpblock: no free blocks in vtCache"); + abort(); + } + b = c->heap[0]; + heapdel(b); + + assert(b->heap == BadHeap); + assert(b->ref == 0); + + /* + * unchain the vtBlock from hash chain if any + */ + if(b->prev){ + *(b->prev) = b->next; + if(b->next) + b->next->prev = b->prev; + b->prev = nil; + } + + +if(0)fprint(2, "droping %x:%V\n", b->addr, b->score); + /* set vtBlock to a reasonable state */ + b->ref = 1; + b->iostate = BioEmpty; + return b; +} + +/* + * fetch a local block from the memory cache. + * if it's not there, load it, bumping some other Block. + * if we're out of free blocks, we're screwed. + */ +VtBlock* +vtcachelocal(VtCache *c, u32int addr, int type) +{ + VtBlock *b; + + if(addr == 0) + sysfatal("vtcachelocal: asked for nonexistent block 0"); + if(addr > c->nblock) + sysfatal("vtcachelocal: asked for block #%ud; only %d blocks", + addr, c->nblock); + + b = &c->block[addr-1]; + if(b->addr == NilBlock || b->iostate != BioLocal) + sysfatal("vtcachelocal: block is not local"); + + if(b->type != type) + sysfatal("vtcachelocal: block has wrong type %d != %d", b->type, type); + + qlock(&c->lk); + b->ref++; + qunlock(&c->lk); + + qlock(&b->lk); + b->nlock = 1; + b->pc = getcallerpc(&c); + return b; +} + +VtBlock* +vtcacheallocblock(VtCache *c, int type) +{ + VtBlock *b; + + qlock(&c->lk); + b = vtcachebumpblock(c); + b->iostate = BioLocal; + b->type = type; + b->addr = (b - c->block)+1; + vtzeroextend(type, b->data, 0, c->blocksize); + vtlocaltoglobal(b->addr, b->score); + qunlock(&c->lk); + + qlock(&b->lk); + b->nlock = 1; + b->pc = getcallerpc(&c); + return b; +} + +/* + * fetch a global (Venti) block from the memory cache. + * if it's not there, load it, bumping some other block. + */ +VtBlock* +vtcacheglobal(VtCache *c, uchar score[VtScoreSize], int type) +{ + VtBlock *b; + ulong h; + int n; + u32int addr; + + if(vttracelevel) + fprint(2, "vtcacheglobal %V %d from %p\n", score, type, getcallerpc(&c)); + addr = vtglobaltolocal(score); + if(addr != NilBlock){ + if(vttracelevel) + fprint(2, "vtcacheglobal %V %d => local\n", score, type); + b = vtcachelocal(c, addr, type); + if(b) + b->pc = getcallerpc(&c); + return b; + } + + h = (u32int)(score[0]|(score[1]<<8)|(score[2]<<16)|(score[3]<<24)) % c->nhash; + + /* + * look for the block in the cache + */ + qlock(&c->lk); + for(b = c->hash[h]; b != nil; b = b->next){ + if(b->addr != NilBlock || memcmp(b->score, score, VtScoreSize) != 0 || b->type != type) + continue; + heapdel(b); + b->ref++; + qunlock(&c->lk); + if(vttracelevel) + fprint(2, "vtcacheglobal %V %d => found in cache %p; locking\n", score, type, b); + qlock(&b->lk); + b->nlock = 1; + if(b->iostate == BioVentiError){ + if(chattyventi) + fprint(2, "cached read error for %V\n", score); + if(vttracelevel) + fprint(2, "vtcacheglobal %V %d => cache read error\n", score, type); + vtblockput(b); + werrstr("venti i/o error"); + return nil; + } + if(vttracelevel) + fprint(2, "vtcacheglobal %V %d => found in cache; returning\n", score, type); + b->pc = getcallerpc(&c); + return b; + } + + /* + * not found + */ + b = vtcachebumpblock(c); + b->addr = NilBlock; + b->type = type; + memmove(b->score, score, VtScoreSize); + /* chain onto correct hash */ + b->next = c->hash[h]; + c->hash[h] = b; + if(b->next != nil) + b->next->prev = &b->next; + b->prev = &c->hash[h]; + + /* + * Lock b before unlocking c, so that others wait while we read. + * + * You might think there is a race between this qlock(b) before qunlock(c) + * and the qlock(c) while holding a qlock(b) in vtblockwrite. However, + * the block here can never be the block in a vtblockwrite, so we're safe. + * We're certainly living on the edge. + */ + if(vttracelevel) + fprint(2, "vtcacheglobal %V %d => bumped; locking %p\n", score, type, b); + qlock(&b->lk); + b->nlock = 1; + qunlock(&c->lk); + + vtcachenread++; + n = mmvtread(score, type, b->data, c->blocksize); + if(n < 0){ + if(chattyventi) + fprint(2, "read %V: %r\n", score); + if(vttracelevel) + fprint(2, "vtcacheglobal %V %d => bumped; read error\n", score, type); + b->iostate = BioVentiError; + vtblockput(b); + return nil; + } + vtzeroextend(type, b->data, n, c->blocksize); + b->iostate = BioVenti; + b->nlock = 1; + if(vttracelevel) + fprint(2, "vtcacheglobal %V %d => loaded into cache; returning\n", score, type); + b->pc = getcallerpc(&b); + return b; +} + +/* + * The thread that has locked b may refer to it by + * multiple names. Nlock counts the number of + * references the locking thread holds. It will call + * vtblockput once per reference. + */ +void +vtblockduplock(VtBlock *b) +{ + assert(b->nlock > 0); + b->nlock++; +} + +/* + * we're done with the block. + * unlock it. can't use it after calling this. + */ +void +vtblockput(VtBlock* b) +{ + VtCache *c; + + if(b == nil) + return; + +if(0)fprint(2, "vtblockput: %d: %x %d %d\n", getpid(), b->addr, c->nheap, b->iostate); + if(vttracelevel) + fprint(2, "vtblockput %p from %p\n", b, getcallerpc(&b)); + + if(--b->nlock > 0) + return; + + /* + * b->nlock should probably stay at zero while + * the vtBlock is unlocked, but diskThread and vtSleep + * conspire to assume that they can just qlock(&b->lk); vtblockput(b), + * so we have to keep b->nlock set to 1 even + * when the vtBlock is unlocked. + */ + assert(b->nlock == 0); + b->nlock = 1; + + qunlock(&b->lk); + c = b->c; + qlock(&c->lk); + + if(--b->ref > 0){ + qunlock(&c->lk); + return; + } + + assert(b->ref == 0); + switch(b->iostate){ + case BioVenti: +/*if(b->addr != NilBlock) print("blockput %d\n", b->addr); */ + b->used = c->now++; + /* fall through */ + case BioVentiError: + heapins(b); + break; + case BioLocal: + break; + } + qunlock(&c->lk); +} + +int +vtblockwrite(VtBlock *b) +{ + uchar score[VtScoreSize]; + VtCache *c; + uint h; + int n; + + if(b->iostate != BioLocal){ + werrstr("vtblockwrite: not a local block"); + return -1; + } + + c = b->c; + n = vtzerotruncate(b->type, b->data, c->blocksize); + vtcachenwrite++; + if(c->write(c->z, score, b->type, b->data, n) < 0) + return -1; + + memmove(b->score, score, VtScoreSize); + + qlock(&c->lk); + b->addr = NilBlock; /* now on venti */ + b->iostate = BioVenti; + h = (u32int)(score[0]|(score[1]<<8)|(score[2]<<16)|(score[3]<<24)) % c->nhash; + b->next = c->hash[h]; + c->hash[h] = b; + if(b->next != nil) + b->next->prev = &b->next; + b->prev = &c->hash[h]; + qunlock(&c->lk); + return 0; +} + +uint +vtcacheblocksize(VtCache *c) +{ + return c->blocksize; +} + +VtBlock* +vtblockcopy(VtBlock *b) +{ + VtBlock *bb; + + vtcachencopy++; + bb = vtcacheallocblock(b->c, b->type); + if(bb == nil){ + vtblockput(b); + return nil; + } + memmove(bb->data, b->data, b->c->blocksize); + vtblockput(b); + bb->pc = getcallerpc(&b); + return bb; +} + +void +vtlocaltoglobal(u32int addr, uchar score[VtScoreSize]) +{ + memset(score, 0, 16); + score[16] = addr>>24; + score[17] = addr>>16; + score[18] = addr>>8; + score[19] = addr; +} + + +u32int +vtglobaltolocal(uchar score[VtScoreSize]) +{ + static uchar zero[16]; + if(memcmp(score, zero, 16) != 0) + return NilBlock; + return (score[16]<<24)|(score[17]<<16)|(score[18]<<8)|score[19]; +} + diff -r 9c93e936f28e -r e1f6266f79c1 sys/src/cmd/vtvacfs/vtfile.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/vtvacfs/vtfile.c Fri Sep 30 15:24:12 2011 -0700 @@ -0,0 +1,1282 @@ +/* + * Manage tree of VtFiles stored in the block cache. + * + * The single point of truth for the info about the VtFiles themselves + * is the block data. Because of this, there is no explicit locking of + * VtFile structures, and indeed there may be more than one VtFile + * structure for a given Venti file. They synchronize through the + * block cache. + * + * This is a bit simpler than fossil because there are no epochs + * or tags or anything else. Just mutable local blocks and immutable + * Venti blocks. + */ + +#include +#include +#include + +#define MaxBlock (1UL<<31) + +static char ENotDir[] = "walk in non-directory"; +static char ETooBig[] = "file too big"; +/* static char EBadAddr[] = "bad address"; */ +static char ELabelMismatch[] = "label mismatch"; + +static int sizetodepth(uvlong s, int psize, int dsize); +static VtBlock *fileload(VtFile *r, VtEntry *e); +static int shrinkdepth(VtFile*, VtBlock*, VtEntry*, int); +static int shrinksize(VtFile*, VtEntry*, uvlong); +static int growdepth(VtFile*, VtBlock*, VtEntry*, int); + +#define ISLOCKED(r) ((r)->b != nil) +#define DEPTH(t) ((t)&VtTypeDepthMask) + +static VtFile * +vtfilealloc(VtCache *c, VtBlock *b, VtFile *p, u32int offset, int mode) +{ + int epb; + u32int size; + VtEntry e; + VtFile *r; + + assert(p==nil || ISLOCKED(p)); + + if(p == nil){ + assert(offset == 0); + epb = 1; + }else + epb = p->dsize / VtEntrySize; + + if(b->type != VtDirType){ + werrstr("bad block type %#uo", b->type); + return nil; + } + + /* + * a non-active entry is the only thing that + * can legitimately happen here. all the others + * get prints. + */ + if(vtentryunpack(&e, b->data, offset % epb) < 0){ + fprint(2, "vtentryunpack failed: %r (%.*H)\n", VtEntrySize, b->data+VtEntrySize*(offset%epb)); + return nil; + } + if(!(e.flags & VtEntryActive)){ + werrstr("entry not active"); + return nil; + } + + if(DEPTH(e.type) < sizetodepth(e.size, e.psize, e.dsize)){ + fprint(2, "depth %ud size %llud psize %ud dsize %ud\n", + DEPTH(e.type), e.size, e.psize, e.dsize); + werrstr("bad depth"); + return nil; + } + + size = vtcacheblocksize(c); + if(e.dsize > size || e.psize > size){ + werrstr("block sizes %ud, %ud bigger than cache block size %ud", + e.psize, e.dsize, size); + return nil; + } + + r = vtmallocz(sizeof(VtFile)); + r->c = c; + r->mode = mode; + r->dsize = e.dsize; + r->psize = e.psize; + r->gen = e.gen; + r->dir = (e.type & VtTypeBaseMask) == VtDirType; + r->ref = 1; + r->parent = p; + if(p){ + qlock(&p->lk); + assert(mode == VtOREAD || p->mode == VtORDWR); + p->ref++; + qunlock(&p->lk); + }else{ + assert(b->addr != NilBlock); + r->local = 1; + } + memmove(r->score, b->score, VtScoreSize); + r->offset = offset; + r->epb = epb; + + return r; +} + +VtFile * +vtfileroot(VtCache *c, u32int addr, int mode) +{ + VtFile *r; + VtBlock *b; + + b = vtcachelocal(c, addr, VtDirType); + if(b == nil) + return nil; + r = vtfilealloc(c, b, nil, 0, mode); + vtblockput(b); + return r; +} + +VtFile* +vtfileopenroot(VtCache *c, VtEntry *e) +{ + VtBlock *b; + VtFile *f; + + b = vtcacheallocblock(c, VtDirType); + if(b == nil) + return nil; + + vtentrypack(e, b->data, 0); + f = vtfilealloc(c, b, nil, 0, VtORDWR); + vtblockput(b); + return f; +} + +VtFile * +vtfilecreateroot(VtCache *c, int psize, int dsize, int type) +{ + VtEntry e; + + memset(&e, 0, sizeof e); + e.flags = VtEntryActive; + e.psize = psize; + e.dsize = dsize; + e.type = type; + memmove(e.score, vtzeroscore, VtScoreSize); + + return vtfileopenroot(c, &e); +} + +VtFile * +vtfileopen(VtFile *r, u32int offset, int mode) +{ + ulong bn; + VtBlock *b; + + assert(ISLOCKED(r)); + if(!r->dir){ + werrstr(ENotDir); + return nil; + } + + bn = offset/(r->dsize/VtEntrySize); + + b = vtfileblock(r, bn, mode); + if(b == nil) + return nil; + r = vtfilealloc(r->c, b, r, offset, mode); + vtblockput(b); + return r; +} + +VtFile* +vtfilecreate(VtFile *r, int psize, int dsize, int type) +{ + return _vtfilecreate(r, -1, psize, dsize, type); +} + +VtFile* +_vtfilecreate(VtFile *r, int o, int psize, int dsize, int type) +{ + int i; + VtBlock *b; + u32int bn, size; + VtEntry e; + int epb; + VtFile *rr; + u32int offset; + + assert(ISLOCKED(r)); + assert(psize <= VtMaxLumpSize); + assert(dsize <= VtMaxLumpSize); + assert(type == VtDirType || type == VtDataType); + + if(!r->dir){ + werrstr(ENotDir); + return nil; + } + + epb = r->dsize/VtEntrySize; + + size = vtfilegetdirsize(r); + /* + * look at a random block to see if we can find an empty entry + */ + if(o == -1){ + offset = lnrand(size+1); + offset -= offset % epb; + }else + offset = o; + + /* try the given block and then try the last block */ + for(;;){ + bn = offset/epb; + b = vtfileblock(r, bn, VtORDWR); + if(b == nil) + return nil; + for(i=offset%r->epb; idata, i) < 0) + continue; + if((e.flags&VtEntryActive) == 0 && e.gen != ~0) + goto Found; + } + vtblockput(b); + if(offset == size){ + fprint(2, "vtfilecreate: cannot happen\n"); + werrstr("vtfilecreate: cannot happen"); + return nil; + } + offset = size; + } + +Found: + /* found an entry - gen already set */ + e.psize = psize; + e.dsize = dsize; + e.flags = VtEntryActive; + e.type = type; + e.size = 0; + memmove(e.score, vtzeroscore, VtScoreSize); + vtentrypack(&e, b->data, i); + + offset = bn*epb + i; + if(offset+1 > size){ + if(vtfilesetdirsize(r, offset+1) < 0){ + vtblockput(b); + return nil; + } + } + + rr = vtfilealloc(r->c, b, r, offset, VtORDWR); + vtblockput(b); + return rr; +} + +static int +vtfilekill(VtFile *r, int doremove) +{ + VtEntry e; + VtBlock *b; + + assert(ISLOCKED(r)); + b = fileload(r, &e); + if(b == nil) + return -1; + + if(doremove==0 && e.size == 0){ + /* already truncated */ + vtblockput(b); + return 0; + } + + if(doremove){ + if(e.gen != ~0) + e.gen++; + e.dsize = 0; + e.psize = 0; + e.flags = 0; + }else + e.flags &= ~VtEntryLocal; + e.type = 0; + e.size = 0; + memmove(e.score, vtzeroscore, VtScoreSize); + vtentrypack(&e, b->data, r->offset % r->epb); + vtblockput(b); + + if(doremove){ + vtfileunlock(r); + vtfileclose(r); + } + + return 0; +} + +int +vtfileremove(VtFile *r) +{ + return vtfilekill(r, 1); +} + +int +vtfiletruncate(VtFile *r) +{ + return vtfilekill(r, 0); +} + +uvlong +vtfilegetsize(VtFile *r) +{ + VtEntry e; + VtBlock *b; + + assert(ISLOCKED(r)); + b = fileload(r, &e); + if(b == nil) + return ~(uvlong)0; + vtblockput(b); + + return e.size; +} + +static int +shrinksize(VtFile *r, VtEntry *e, uvlong size) +{ + int i, depth, type, isdir, ppb; + uvlong ptrsz; + uchar score[VtScoreSize]; + VtBlock *b; + + b = vtcacheglobal(r->c, e->score, e->type); + if(b == nil) + return -1; + + ptrsz = e->dsize; + ppb = e->psize/VtScoreSize; + type = e->type; + depth = DEPTH(type); + for(i=0; i+1dir; + while(depth > 0){ + if(b->addr == NilBlock){ + /* not worth copying the block just so we can zero some of it */ + vtblockput(b); + return -1; + } + + /* + * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes + */ + + /* zero the pointers to unnecessary blocks */ + i = (size+ptrsz-1)/ptrsz; + for(; idata+i*VtScoreSize, vtzeroscore, VtScoreSize); + + /* recurse (go around again) on the partially necessary block */ + i = size/ptrsz; + size = size%ptrsz; + if(size == 0){ + vtblockput(b); + return 0; + } + ptrsz /= ppb; + type--; + memmove(score, b->data+i*VtScoreSize, VtScoreSize); + vtblockput(b); + b = vtcacheglobal(r->c, score, type); + if(b == nil) + return -1; + } + + if(b->addr == NilBlock){ + vtblockput(b); + return -1; + } + + /* + * No one ever truncates BtDir blocks. + */ + if(depth==0 && !isdir && e->dsize > size) + memset(b->data+size, 0, e->dsize-size); + vtblockput(b); + return 0; +} + +int +vtfilesetsize(VtFile *r, u64int size) +{ + int depth, edepth; + VtEntry e; + VtBlock *b; + + assert(ISLOCKED(r)); + if(size == 0) + return vtfiletruncate(r); + + if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){ + werrstr(ETooBig); + return -1; + } + + b = fileload(r, &e); + if(b == nil) + return -1; + + /* quick out */ + if(e.size == size){ + vtblockput(b); + return 0; + } + + depth = sizetodepth(size, e.psize, e.dsize); + edepth = DEPTH(e.type); + if(depth < edepth){ + if(shrinkdepth(r, b, &e, depth) < 0){ + vtblockput(b); + return -1; + } + }else if(depth > edepth){ + if(growdepth(r, b, &e, depth) < 0){ + vtblockput(b); + return -1; + } + } + + if(size < e.size) + shrinksize(r, &e, size); + + e.size = size; + vtentrypack(&e, b->data, r->offset % r->epb); + vtblockput(b); + + return 0; +} + +int +vtfilesetdirsize(VtFile *r, u32int ds) +{ + uvlong size; + int epb; + + assert(ISLOCKED(r)); + epb = r->dsize/VtEntrySize; + + size = (uvlong)r->dsize*(ds/epb); + size += VtEntrySize*(ds%epb); + return vtfilesetsize(r, size); +} + +u32int +vtfilegetdirsize(VtFile *r) +{ + ulong ds; + uvlong size; + int epb; + + assert(ISLOCKED(r)); + epb = r->dsize/VtEntrySize; + + size = vtfilegetsize(r); + ds = epb*(size/r->dsize); + ds += (size%r->dsize)/VtEntrySize; + return ds; +} + +int +vtfilegetentry(VtFile *r, VtEntry *e) +{ + VtBlock *b; + + assert(ISLOCKED(r)); + b = fileload(r, e); + if(b == nil) + return -1; + vtblockput(b); + + return 0; +} + +int +vtfilesetentry(VtFile *r, VtEntry *e) +{ + VtBlock *b; + VtEntry ee; + + assert(ISLOCKED(r)); + b = fileload(r, &ee); + if(b == nil) + return -1; + vtentrypack(e, b->data, r->offset % r->epb); + vtblockput(b); + return 0; +} + +static VtBlock * +blockwalk(VtBlock *p, int index, VtCache *c, int mode, VtEntry *e) +{ + VtBlock *b; + int type; + uchar *score; + VtEntry oe; + + switch(p->type){ + case VtDataType: + assert(0); + case VtDirType: + type = e->type; + score = e->score; + break; + default: + type = p->type - 1; + score = p->data+index*VtScoreSize; + break; + } +/*print("walk from %V/%d ty %d to %V ty %d\n", p->score, index, p->type, score, type); */ + + if(mode == VtOWRITE && vtglobaltolocal(score) == NilBlock){ + b = vtcacheallocblock(c, type); + if(b) + goto HaveCopy; + }else + b = vtcacheglobal(c, score, type); + + if(b == nil || mode == VtOREAD) + return b; + + if(vtglobaltolocal(b->score) != NilBlock) + return b; + + oe = *e; + + /* + * Copy on write. + */ + e->flags |= VtEntryLocal; + + b = vtblockcopy(b/*, e->tag, fs->ehi, fs->elo*/); + if(b == nil) + return nil; + +HaveCopy: + if(p->type == VtDirType){ + memmove(e->score, b->score, VtScoreSize); + vtentrypack(e, p->data, index); + }else{ + memmove(p->data+index*VtScoreSize, b->score, VtScoreSize); + } + return b; +} + +/* + * Change the depth of the VtFile r. + * The entry e for r is contained in block p. + */ +static int +growdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth) +{ + VtBlock *b, *bb; + VtEntry oe; + + assert(ISLOCKED(r)); + assert(depth <= VtPointerDepth); + + b = vtcacheglobal(r->c, e->score, e->type); + if(b == nil) + return -1; + + oe = *e; + + /* + * Keep adding layers until we get to the right depth + * or an error occurs. + */ + while(DEPTH(e->type) < depth){ + bb = vtcacheallocblock(r->c, e->type+1); + if(bb == nil) + break; + memmove(bb->data, b->score, VtScoreSize); + memmove(e->score, bb->score, VtScoreSize); + e->type++; + e->flags |= VtEntryLocal; + vtblockput(b); + b = bb; + } + + vtentrypack(e, p->data, r->offset % r->epb); + vtblockput(b); + + if(DEPTH(e->type) == depth) + return 0; + return -1; +} + +static int +shrinkdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth) +{ + VtBlock *b, *nb, *ob, *rb; + VtEntry oe; + + assert(ISLOCKED(r)); + assert(depth <= VtPointerDepth); + + rb = vtcacheglobal(r->c, e->score, e->type); + if(rb == nil) + return -1; + + /* + * Walk down to the new root block. + * We may stop early, but something is better than nothing. + */ + oe = *e; + + ob = nil; + b = rb; + for(; DEPTH(e->type) > depth; e->type--){ + nb = vtcacheglobal(r->c, b->data, e->type-1); + if(nb == nil) + break; + if(ob!=nil && ob!=rb) + vtblockput(ob); + ob = b; + b = nb; + } + + if(b == rb){ + vtblockput(rb); + return 0; + } + + /* + * Right now, e points at the root block rb, b is the new root block, + * and ob points at b. To update: + * + * (i) change e to point at b + * (ii) zero the pointer ob -> b + * (iii) free the root block + * + * p (the block containing e) must be written before + * anything else. + */ + + /* (i) */ + memmove(e->score, b->score, VtScoreSize); + vtentrypack(e, p->data, r->offset % r->epb); + + /* (ii) */ + memmove(ob->data, vtzeroscore, VtScoreSize); + + /* (iii) */ + vtblockput(rb); + if(ob!=nil && ob!=rb) + vtblockput(ob); + vtblockput(b); + + if(DEPTH(e->type) == depth) + return 0; + return -1; +} + +static int +mkindices(VtEntry *e, u32int bn, int *index) +{ + int i, np; + + memset(index, 0, (VtPointerDepth+1)*sizeof(int)); + + np = e->psize/VtScoreSize; + for(i=0; bn > 0; i++){ + if(i >= VtPointerDepth){ + werrstr("bad address 0x%lux", (ulong)bn); + return -1; + } + index[i] = bn % np; + bn /= np; + } + return i; +} + +VtBlock * +vtfileblock(VtFile *r, u32int bn, int mode) +{ + VtBlock *b, *bb; + int index[VtPointerDepth+1]; + VtEntry e; + int i; + int m; + + assert(ISLOCKED(r)); + assert(bn != NilBlock); + + b = fileload(r, &e); + if(b == nil) + return nil; + + i = mkindices(&e, bn, index); + if(i < 0) + goto Err; + if(i > DEPTH(e.type)){ + if(mode == VtOREAD){ + werrstr("bad address 0x%lux", (ulong)bn); + goto Err; + } + index[i] = 0; + if(growdepth(r, b, &e, i) < 0) + goto Err; + } + +assert(b->type == VtDirType); + + index[DEPTH(e.type)] = r->offset % r->epb; + + /* mode for intermediate block */ + m = mode; + if(m == VtOWRITE) + m = VtORDWR; + + for(i=DEPTH(e.type); i>=0; i--){ + bb = blockwalk(b, index[i], r->c, i==0 ? mode : m, &e); + if(bb == nil) + goto Err; + vtblockput(b); + b = bb; + } + b->pc = getcallerpc(&r); + return b; +Err: + vtblockput(b); + return nil; +} + +int +vtfileblockscore(VtFile *r, u32int bn, uchar score[VtScoreSize]) +{ + VtBlock *b, *bb; + int index[VtPointerDepth+1]; + VtEntry e; + int i; + + assert(ISLOCKED(r)); + assert(bn != NilBlock); + + b = fileload(r, &e); + if(b == nil) + return -1; + + if(DEPTH(e.type) == 0){ + memmove(score, e.score, VtScoreSize); + vtblockput(b); + return 0; + } + + i = mkindices(&e, bn, index); + if(i < 0){ + vtblockput(b); + return -1; + } + if(i > DEPTH(e.type)){ + memmove(score, vtzeroscore, VtScoreSize); + vtblockput(b); + return 0; + } + + index[DEPTH(e.type)] = r->offset % r->epb; + + for(i=DEPTH(e.type); i>=1; i--){ + bb = blockwalk(b, index[i], r->c, VtOREAD, &e); + if(bb == nil) + goto Err; + vtblockput(b); + b = bb; + if(memcmp(b->score, vtzeroscore, VtScoreSize) == 0) + break; + } + + memmove(score, b->data+index[0]*VtScoreSize, VtScoreSize); + vtblockput(b); + return 0; + +Err: + vtblockput(b); + return -1; +} + +void +vtfileincref(VtFile *r) +{ + qlock(&r->lk); + r->ref++; + qunlock(&r->lk); +} + +void +vtfileclose(VtFile *r) +{ + if(r == nil) + return; + qlock(&r->lk); + r->ref--; + if(r->ref){ + qunlock(&r->lk); + return; + } + assert(r->ref == 0); + qunlock(&r->lk); + if(r->parent) + vtfileclose(r->parent); + memset(r, ~0, sizeof(*r)); + vtfree(r); +} + +/* + * Retrieve the block containing the entry for r. + * If a snapshot has happened, we might need + * to get a new copy of the block. We avoid this + * in the common case by caching the score for + * the block and the last epoch in which it was valid. + * + * We use r->mode to tell the difference between active + * file system VtFiles (VtORDWR) and VtFiles for the + * snapshot file system (VtOREAD). + */ +static VtBlock* +fileloadblock(VtFile *r, int mode) +{ + char e[ERRMAX]; + u32int addr; + VtBlock *b; + + switch(r->mode){ + default: + assert(0); + case VtORDWR: + assert(r->mode == VtORDWR); + if(r->local == 1){ + b = vtcacheglobal(r->c, r->score, VtDirType); + if(b == nil) + return nil; + b->pc = getcallerpc(&r); + return b; + } + assert(r->parent != nil); + if(vtfilelock(r->parent, VtORDWR) < 0) + return nil; + b = vtfileblock(r->parent, r->offset/r->epb, VtORDWR); + vtfileunlock(r->parent); + if(b == nil) + return nil; + memmove(r->score, b->score, VtScoreSize); + r->local = 1; + return b; + + case VtOREAD: + if(mode == VtORDWR){ + werrstr("read/write lock of read-only file"); + return nil; + } + addr = vtglobaltolocal(r->score); + if(addr == NilBlock) + return vtcacheglobal(r->c, r->score, VtDirType); + + b = vtcachelocal(r->c, addr, VtDirType); + if(b) + return b; + + /* + * If it failed because the epochs don't match, the block has been + * archived and reclaimed. Rewalk from the parent and get the + * new pointer. This can't happen in the VtORDWR case + * above because blocks in the current epoch don't get + * reclaimed. The fact that we're VtOREAD means we're + * a snapshot. (Or else the file system is read-only, but then + * the archiver isn't going around deleting blocks.) + */ + rerrstr(e, sizeof e); + if(strcmp(e, ELabelMismatch) == 0){ + if(vtfilelock(r->parent, VtOREAD) < 0) + return nil; + b = vtfileblock(r->parent, r->offset/r->epb, VtOREAD); + vtfileunlock(r->parent); + if(b){ + fprint(2, "vtfilealloc: lost %V found %V\n", + r->score, b->score); + memmove(r->score, b->score, VtScoreSize); + return b; + } + } + return nil; + } +} + +int +vtfilelock(VtFile *r, int mode) +{ + VtBlock *b; + + if(mode == -1) + mode = r->mode; + + b = fileloadblock(r, mode); + if(b == nil) + return -1; + /* + * The fact that we are holding b serves as the + * lock entitling us to write to r->b. + */ + assert(r->b == nil); + r->b = b; + b->pc = getcallerpc(&r); + return 0; +} + +/* + * Lock two (usually sibling) VtFiles. This needs special care + * because the Entries for both vtFiles might be in the same block. + * We also try to lock blocks in left-to-right order within the tree. + */ +int +vtfilelock2(VtFile *r, VtFile *rr, int mode) +{ + VtBlock *b, *bb; + + if(rr == nil) + return vtfilelock(r, mode); + + if(mode == -1) + mode = r->mode; + + if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){ + b = fileloadblock(r, mode); + if(b == nil) + return -1; + vtblockduplock(b); + bb = b; + }else if(r->parent==rr->parent || r->offset > rr->offset){ + bb = fileloadblock(rr, mode); + b = fileloadblock(r, mode); + }else{ + b = fileloadblock(r, mode); + bb = fileloadblock(rr, mode); + } + if(b == nil || bb == nil){ + if(b) + vtblockput(b); + if(bb) + vtblockput(bb); + return -1; + } + + /* + * The fact that we are holding b and bb serves + * as the lock entitling us to write to r->b and rr->b. + */ + r->b = b; + rr->b = bb; + b->pc = getcallerpc(&r); + bb->pc = getcallerpc(&r); + return 0; +} + +void +vtfileunlock(VtFile *r) +{ + VtBlock *b; + + if(r->b == nil){ + fprint(2, "vtfileunlock: already unlocked\n"); + abort(); + } + b = r->b; + r->b = nil; + vtblockput(b); +} + +static VtBlock* +fileload(VtFile *r, VtEntry *e) +{ + VtBlock *b; + + assert(ISLOCKED(r)); + b = r->b; + if(vtentryunpack(e, b->data, r->offset % r->epb) < 0) + return nil; + vtblockduplock(b); + return b; +} + +static int +sizetodepth(uvlong s, int psize, int dsize) +{ + int np; + int d; + + /* determine pointer depth */ + np = psize/VtScoreSize; + s = (s + dsize - 1)/dsize; + for(d = 0; s > 1; d++) + s = (s + np - 1)/np; + return d; +} + +long +vtfileread(VtFile *f, void *data, long count, vlong offset) +{ + int frag; + VtBlock *b; + VtEntry e; + + assert(ISLOCKED(f)); + + vtfilegetentry(f, &e); + if(count == 0) + return 0; + if(count < 0 || offset < 0){ + werrstr("vtfileread: bad offset or count"); + return -1; + } + if(offset >= e.size) + return 0; + + if(offset+count > e.size) + count = e.size - offset; + + frag = offset % e.dsize; + if(frag+count > e.dsize) + count = e.dsize - frag; + + b = vtfileblock(f, offset/e.dsize, VtOREAD); + if(b == nil) + return -1; + + memmove(data, b->data+frag, count); + vtblockput(b); + return count; +} + +static long +filewrite1(VtFile *f, void *data, long count, vlong offset) +{ + int frag, m; + VtBlock *b; + VtEntry e; + + vtfilegetentry(f, &e); + if(count < 0 || offset < 0){ + werrstr("vtfilewrite: bad offset or count"); + return -1; + } + + frag = offset % e.dsize; + if(frag+count > e.dsize) + count = e.dsize - frag; + + m = VtORDWR; + if(frag == 0 && count == e.dsize) + m = VtOWRITE; + + b = vtfileblock(f, offset/e.dsize, m); + if(b == nil) + return -1; + + memmove(b->data+frag, data, count); + if(m == VtOWRITE && frag+count < e.dsize) + memset(b->data+frag+count, 0, e.dsize-frag-count); + + if(offset+count > e.size){ + vtfilegetentry(f, &e); + e.size = offset+count; + vtfilesetentry(f, &e); + } + + vtblockput(b); + return count; +} + +long +vtfilewrite(VtFile *f, void *data, long count, vlong offset) +{ + long tot, m; + + assert(ISLOCKED(f)); + + tot = 0; + m = 0; + while(tot < count){ + m = filewrite1(f, (char*)data+tot, count-tot, offset+tot); + if(m <= 0) + break; + tot += m; + } + if(tot==0) + return m; + return tot; +} + +static int +flushblock(VtCache *c, VtBlock *bb, uchar score[VtScoreSize], int ppb, int epb, + int type) +{ + u32int addr; + VtBlock *b; + VtEntry e; + int i; + + addr = vtglobaltolocal(score); + if(addr == NilBlock) + return 0; + + if(bb){ + b = bb; + if(memcmp(b->score, score, VtScoreSize) != 0) + abort(); + }else + if((b = vtcachelocal(c, addr, type)) == nil) + return -1; + + switch(type){ + case VtDataType: + break; + + case VtDirType: + for(i=0; idata, i) < 0) + goto Err; + if(!(e.flags&VtEntryActive)) + continue; + if(flushblock(c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize, + e.type) < 0) + goto Err; + vtentrypack(&e, b->data, i); + } + break; + + default: /* VtPointerTypeX */ + for(i=0; idata+VtScoreSize*i, ppb, epb, type-1) < 0) + goto Err; + } + break; + } + + if(vtblockwrite(b) < 0) + goto Err; + memmove(score, b->score, VtScoreSize); + if(b != bb) + vtblockput(b); + return 0; + +Err: + if(b != bb) + vtblockput(b); + return -1; +} + +int +vtfileflush(VtFile *f) +{ + int ret; + VtBlock *b; + VtEntry e; + + assert(ISLOCKED(f)); + b = fileload(f, &e); + if(!(e.flags&VtEntryLocal)){ + vtblockput(b); + return 0; + } + + ret = flushblock(f->c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize, + e.type); + if(ret < 0){ + vtblockput(b); + return -1; + } + + vtentrypack(&e, b->data, f->offset % f->epb); + vtblockput(b); + return 0; +} + +int +vtfileflushbefore(VtFile *r, u64int offset) +{ + VtBlock *b, *bb; + VtEntry e; + int i, base, depth, ppb, epb, doflush; + int index[VtPointerDepth+1], j, ret; + VtBlock *bi[VtPointerDepth+2]; + uchar *score; + + assert(ISLOCKED(r)); + if(offset == 0) + return 0; + + b = fileload(r, &e); + if(b == nil) + return -1; + + /* + * compute path through tree for the last written byte and the next one. + */ + ret = -1; + memset(bi, 0, sizeof bi); + depth = DEPTH(e.type); + bi[depth+1] = b; + i = mkindices(&e, (offset-1)/e.dsize, index); + if(i < 0) + goto Err; + if(i > depth) + goto Err; + ppb = e.psize / VtScoreSize; + epb = e.dsize / VtEntrySize; + + /* + * load the blocks along the last written byte + */ + index[depth] = r->offset % r->epb; + for(i=depth; i>=0; i--){ + bb = blockwalk(b, index[i], r->c, VtORDWR, &e); + if(bb == nil) + goto Err; + bi[i] = bb; + b = bb; + } + ret = 0; + + /* + * walk up the path from leaf to root, flushing anything that + * has been finished. + */ + base = e.type&~VtTypeDepthMask; + for(i=0; i<=depth; i++){ + doflush = 0; + if(i == 0){ + /* leaf: data or dir block */ + if(offset%e.dsize == 0) + doflush = 1; + }else{ + /* + * interior node: pointer blocks. + * specifically, b = bi[i] is a block whose index[i-1]'th entry + * points at bi[i-1]. + */ + b = bi[i]; + + /* + * the index entries up to but not including index[i-1] point at + * finished blocks, so flush them for sure. + */ + for(j=0; jc, nil, b->data+j*VtScoreSize, ppb, epb, base+i-1) < 0) + goto Err; + + /* + * if index[i-1] is the last entry in the block and is global + * (i.e. the kid is flushed), then we can flush this block. + */ + if(j==ppb-1 && vtglobaltolocal(b->data+j*VtScoreSize)==NilBlock) + doflush = 1; + } + if(doflush){ + if(i == depth) + score = e.score; + else + score = bi[i+1]->data+index[i]*VtScoreSize; + if(flushblock(r->c, bi[i], score, ppb, epb, base+i) < 0) + goto Err; + } + } + +Err: + /* top: entry. do this always so that the score is up-to-date */ + vtentrypack(&e, bi[depth+1]->data, index[depth]); + for(i=0; i-1"); return -1; + } if (len > m->len) len = m->len; memmove(data, m->data, len); if (m->blocktype != blocktype) fprint(2, "Mismatched blocktype! now what?\n"); + trace(TraceRpc, "getdata->%d", len); return len; } +int +mmvtread(uchar score[VtScoreSize], uint type, uchar *buf, int n) +{ +print("mmvtread: %d\n", n); + return getdata(score, buf, n, type); +} int putscore(Packet *p, u8int *score, uchar blocktype) @@ -247,6 +257,7 @@ /* leave room for the length. This will make sure we still work for the * mmap'ed version. */ + trace(TraceRpc, "<-putdata([%V],%#p,%d, %d", score, data, len, blocktype); mmventidata += 4; /* could also use ainc here */ memmove(mmventidata, data, len); @@ -255,9 +266,16 @@ //fprint(2, "mmventidata now %p\n", mmventidata); syncentry(&maps[ix]); + trace(TraceRpc, "putdata->%d", maps[ix].len); return maps[ix].len; } +int +mmvtwrite(uchar score[VtScoreSize], uint type, uchar *buf, int n) +{ + return putdata(score, buf, n, type); +} + void