# HG changeset patch # User Francisco J Ballesteros # Date 1329500322 0 # Node ID f1f2a23bbcce40c24c81c12af7426b40605bf2ea # Parent a8d2211aa14a250db6de4f18eb4cf2afdfe0d92a creepy: 9p in place and more fixes and rewrites. still under development, not ready for use. R=nixiedev CC=nix-dev http://codereview.appspot.com/5677065 diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/9p.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/9p.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,955 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "fns.h" + +/* + * 9p server for creepy + */ + +typedef struct Fid Fid; +typedef struct Rpc Rpc; +typedef struct Cli Cli; + +enum +{ + Maxmdata = 8*KiB +}; + +/* + * One reference kept because of existence and another per req using it. + */ +struct Fid +{ + Ref; + QLock; + Fid *next; /* in hash or free list */ + void* clino; /* no is local to a client */ + int no; + Memblk *file; /* used by this fid */ + int omode; /* -1 if closed */ + int rclose; + int archived; + char *uid; + + uvlong loff; /* last offset, for dir reads */ + long lidx; /* next dir entry index to read */ +}; + +struct Rpc +{ + Cli *cli; + Rpc *next; /* in client or free list */ + Fid *fid; + Fcall t; + Fcall r; + uchar data[IOHDRSZ+Maxmdata]; +}; + +struct Cli +{ + Ref; + int fd; + int cfd; + char *addr; + int errors; + ulong msize; + + QLock wlk; /* lock for writing replies to the client */ + uchar wdata[IOHDRSZ+Maxmdata]; + + QLock rpclk; + Rpc *rpcs; +}; + +static void rflush(Rpc*), rversion(Rpc*), rauth(Rpc*), + rattach(Rpc*), rwalk(Rpc*), + ropen(Rpc*), rcreate(Rpc*), + rread(Rpc*), rwrite(Rpc*), rclunk(Rpc*), + rremove(Rpc*), rstat(Rpc*), rwstat(Rpc*); + +static void (*fcalls[])(Rpc*) = +{ + [Tversion] rversion, + [Tflush] rflush, + [Tauth] rauth, + [Tattach] rattach, + [Twalk] rwalk, + [Topen] ropen, + [Tcreate] rcreate, + [Tread] rread, + [Twrite] rwrite, + [Tclunk] rclunk, + [Tremove] rremove, + [Tstat] rstat, + [Twstat] rwstat, +}; + +static RWLock fidlk; +static Fid *fidhash[Fidhashsz]; +static Fid *fidfree; +static ulong nfids, nfreefids; + +static QLock rpclk; +static Rpc *rpcfree; +static ulong nrpcs, nfreerpcs; + +static Rpc* +newrpc(void) +{ + Rpc *rpc; + + qlock(&rpclk); + if(rpcfree != nil){ + rpc = rpcfree; + rpcfree = rpc->next; + rpc->next = nil; + nfreerpcs--; + }else{ + rpc = malloc(sizeof *rpc); + nrpcs++; + } + qunlock(&rpclk); + rpc->next = nil; + rpc->fid = nil; + memset(&rpc->t, 0, sizeof rpc->t); + memset(&rpc->r, 0, sizeof rpc->r); + return rpc; +} + +static void +freerpc(Rpc *rpc) +{ + qlock(&rpclk); + rpc->next = rpcfree; + rpcfree = rpc; + nfreerpcs++; + qunlock(&rpclk); +} + +static Fid* +newfid(void* clino, int no) +{ + Fid *fid, **fidp; + + wlock(&fidlk); + if(catcherror()){ + wunlock(&fidlk); + error(nil); + } + for(fidp = &fidhash[no%Fidhashsz]; *fidp != nil; fidp = &(*fidp)->next) + if((*fidp)->clino == clino && (*fidp)->no == no) + error("fid in use"); + if(fidfree != nil){ + fid = fidfree; + fidfree = fidfree->next; + nfreefids--; + }else{ + fid = mallocz(sizeof *fid, 1); + nfids++; + } + *fidp = fid; + fid->omode = -1; + fid->no = no; + fid->rclose = 0; + fid->clino = clino; + fid->ref = 2; /* one for the caller; another because it's kept */ + noerror(); + wunlock(&fidlk); + return fid; +} + +static Fid* +getfid(void* clino, int no) +{ + Fid *fid; + + rlock(&fidlk); + if(catcherror()){ + runlock(&fidlk); + error(nil); + } + for(fid = fidhash[no%Fidhashsz]; fid != nil; fid = fid->next) + if(fid->clino == clino && fid->no == no){ + incref(fid); + noerror(); + runlock(&fidlk); + return fid; + } + error("fid not found"); + return fid; +} + +static void +putfid(Fid *fid) +{ + Fid **fidp; + + if(fid == nil || decref(fid) > 0) + return; + mbput(fid->file); + fid->file = nil; + free(fid->uid); + fid->uid = nil; + fid->rclose = fid->archived = 0; + fid->omode = -1; + fid->loff = 0; + fid->lidx = 0; + wlock(&fidlk); + if(catcherror()){ + wunlock(&fidlk); + error(nil); + } + for(fidp = &fidhash[fid->no%Fidhashsz]; *fidp != nil; fidp = &(*fidp)->next) + if(*fidp == fid){ + *fidp = fid->next; + memset(fid, 0, sizeof *fid); + fid->next = fidfree; + noerror(); + wunlock(&fidlk); + return; + } + fatal("putfid: fid not found"); +} + +static void +putcli(Cli *c) +{ + if(decref(c) == 0){ + close(c->fd); + close(c->cfd); + free(c->addr); + free(c); + } +} + +static Qid +mkqid(Memblk *f) +{ + Qid q; + + q.path = f->mf->id; + q.vers = f->mf->mtime; + q.type = 0; + if(f->mf->mode&DMDIR) + q.type |= QTDIR; + if(f->mf->mode&DMTMP) + q.type |= QTTMP; + if(f->mf->mode&DMAPPEND) + q.type |= QTAPPEND; + return q; +} + +static void +rversion(Rpc *rpc) +{ + rpc->r.msize = rpc->t.msize; + if(rpc->r.msize > sizeof rpc->data) + rpc->r.msize = sizeof rpc->data; + rpc->cli->msize = rpc->r.msize; + if(strncmp(rpc->t.version, "9P2000", 6) != 0) + error("unknown protocol version"); + rpc->r.version = "9P2000"; +} + +static void +rflush(Rpc *) +{ + /* BUG: should reply to this after replying to the flushed request. + * Just look into rpc->c->rpcs + */ +} + +static void +rauth(Rpc*) +{ + /* BUG */ + error("no auth required"); +} + + +static void +rattach(Rpc *rpc) +{ + Fid *fid; + + fid = newfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + fid->file = fs->active; + incref(fid->file); + rwlock(fid->file, Rd); + fid->uid = strdup(rpc->t.uname); + rpc->r.qid = mkqid(fid->file); + rwunlock(fid->file, Rd); +} + +static Fid* +clone(Rpc *rpc) +{ + Fid *nfid; + + nfid = newfid(rpc->cli, rpc->t.newfid); + nfid->file = rpc->fid->file; + incref(nfid->file); + nfid->uid = strdup(rpc->fid->uid); + nfid->archived = rpc->fid->archived; + return nfid; +} + +static void +rwalk(Rpc *rpc) +{ + Fid *fid, *nfid; + Memblk *f; + int i; + + rpc->fid = getfid(rpc->cli, rpc->t.fid); + fid = rpc->fid; + if(rpc->t.fid == rpc->t.newfid && rpc->t.nwname > 1) + error("can't walk like a clone without one"); + nfid = nil; + if(rpc->t.fid != rpc->t.newfid) + nfid = clone(rpc); + if(catcherror()){ + putfid(nfid); + error(nil); + } + rpc->r.nwqid = 0; + for(i=0; i < rpc->t.nwname; i++){ + rwlock(nfid->file, Rd); + if(catcherror()){ + rwunlock(nfid->file, Rd); + if(rpc->r.nwqid == 0) + error(nil); + break; + } + dfaccessok(nfid->file, fid->uid, AEXEC); + f = dfwalk(nfid->file, rpc->t.wname[i], 0); + if(f == fs->archive) + fid->archived++; + else if(f == fs->active) + fid->archived = 0; + rwunlock(nfid->file, Rd); + mbput(nfid->file); + nfid->file = f; + noerror(); + rwlock(f, Rd); + rpc->r.wqid[i] = mkqid(f); + rwunlock(f, Rd); + rpc->r.nwqid++; + USED(rpc->r.nwqid); /* damn error()s */ + } + if(i < rpc->t.nwname){ + putfid(nfid); + putfid(nfid); + }else{ + putfid(fid); + rpc->fid = nfid; + } + noerror(); +} + +static void +ropen(Rpc *rpc) +{ + Fid *fid; + Memblk *f; + int mode, fmode, amode; + uvlong z; + + rpc->fid = getfid(rpc->cli, rpc->t.fid); + fid = rpc->fid; + + if(fid->omode != -1) + error("fid already open"); + mode = rpc->t.mode; + rpc->r.iounit = rpc->cli->msize - IOHDRSZ; + amode = 0; + if((mode&3) != OREAD || (mode&OTRUNC) != 0) + amode |= AWRITE; + if((mode&3) != OWRITE) + amode |= AREAD; + if(mode != AREAD) + fid->file = dfmelt(fid->file); + else + rwlock(fid->file, Rd); + f = fid->file; + if(catcherror()){ + rwunlock(f, amode != AREAD); + error(nil); + } + fmode = f->mf->mode; + if(mode != OREAD){ + if((fmode&DMDIR) != 0) + error("wrong open mode for a directory"); + if(fid->archived) + error("can't write in /archive"); /* yes, we can! */ + } + rpc->r.qid = mkqid(f); + dfaccessok(f, fid->uid, amode); + if(mode&ORCLOSE) + dfaccessok(f->mf->parent, fid->uid, AWRITE); + if(mode&ORCLOSE) + fid->rclose++; + if((fmode&DMEXCL) != 0 &&f->mf->open) + error("exclusive use file already open"); + if(mode&OTRUNC){ + z = 0; + dfwattr(fid->file, "length", &z, sizeof z); + } + f->mf->open++; + fid->omode = mode&3; + fid->loff = 0; + fid->lidx = 0; + noerror(); + rwunlock(f, amode != AREAD); +} + +static void +rcreate(Rpc *rpc) +{ + Fid *fid; + Memblk *f, *nf; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + + if(fid->omode != -1) + error("fid already open"); + fid->file = dfmelt(fid->file); + f = fid->file; + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + if((f->mf->mode&DMDIR) == 0) + error("not a directory"); + dfaccessok(f, fid->uid, AWRITE); + if(strcmp(rpc->t.name, ".") == 0 || strcmp(rpc->t.name, "..") == 0) + error("that file name scares me"); + if(utfrune(rpc->t.name, '/') != nil) + error("that file name is too creepy"); + if((rpc->t.perm&DMDIR) != 0 && rpc->t.mode != OREAD) + error("wrong open mode for a directory"); + if(f == fs->root || f == fs->archive) + error("can't create there"); + if(fid->archived) + error("can't create in /archive"); /* yes, we can! */ + if(!catcherror()){ + mbput(dfwalk(f, rpc->t.name, 0)); + error("file already exists"); + } + nf = dfcreate(f, rpc->t.name, fid->uid, rpc->t.perm); + rpc->r.qid = mkqid(nf); + rpc->r.iounit = rpc->cli->msize-IOHDRSZ; + nf->mf->open++; + noerror(); + rwunlock(f, Wr); + mbput(fid->file); + fid->file = nf; + if(rpc->t.mode&ORCLOSE) + fid->rclose++; + fid->omode = rpc->t.mode&3; + fid->loff = 0; + fid->lidx = 0; +} + +static ulong +readmf(Memblk *f, uchar *buf, int nbuf) +{ + Dir d; + + d.name = f->mf->name; + d.qid = mkqid(f); + d.mode = f->mf->mode; + d.length = f->mf->length; + d.uid = f->mf->uid; + d.gid = f->mf->gid; + d.muid = f->mf->muid; + d.atime = f->mf->atime; + d.mtime = f->mf->mtime; + return convD2M(&d, buf, nbuf); +} + +static ulong +readdir(Fid *fid, uchar *data, ulong ndata, uvlong) +{ + Memblk *d, *f; + ulong tot, nr; + + d = fid->file; + for(tot = 0; tot+2 < ndata; tot += nr){ + + f = dfchild(d, fid->lidx); + if(f == nil) + break; + nr = readmf(f, data+tot, ndata-tot); + mbput(f); + if(nr <= 2) + break; + fid->lidx++; + } + return tot; +} + +static void +rread(Rpc *rpc) +{ + Fid *fid; + Memblk *f; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + f = fid->file; + + if(fid->omode == -1) + error("fid not open"); + if(fid->omode == OWRITE) + error("fid not open for reading"); + if(rpc->t.offset < 0) + error("negative offset"); + if(rpc->t.count > rpc->cli->msize-IOHDRSZ) + rpc->r.count = rpc->cli->msize-IOHDRSZ; + rpc->r.data = (char*)rpc->data; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + if(f->mf->mode&DMDIR){ + if(fid->loff != rpc->t.offset) + error("non-sequential dir read"); + rpc->r.count = readdir(fid, rpc->data, rpc->t.count, rpc->t.offset); + fid->loff += rpc->r.count; + }else + rpc->r.count = dfpread(f, rpc->data, rpc->t.count, rpc->t.offset); + noerror(); + rwunlock(f, Rd); +} + +static void +rwrite(Rpc *rpc) +{ + Fid *fid; + ulong n; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + + if(fid->omode == -1) + error("fid not open"); + if(fid->omode == OREAD) + error("fid not open for writing"); + if(rpc->t.offset < 0) + error("negative offset"); + n = rpc->t.count; + if(n > rpc->cli->msize) + n = rpc->cli->msize; /* hmmm */ + fid->file = dfmelt(fid->file); + if(catcherror()){ + rwunlock(fid->file, Wr); + error(nil); + } + rpc->r.count = dfpwrite(fid->file, rpc->t.data, n, rpc->t.offset); + noerror(); +} + + +static void +rclunk(Rpc *rpc) +{ + Fid *fid; + Memblk *f, *p; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + f = fid->file; + if(fid->omode != -1){ + rwlock(f, Wr); + f->mf->open--; + rwunlock(f, Wr); + fid->omode = -1; + if(fid->rclose){ + f->mf->parent = dfmelt(f->mf->parent); + p = f->mf->parent; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + mbput(f); + }else{ + dfremove(p, f); + noerror(); + } + rwunlock(p, Wr); + } + fid->file = nil; + } + putfid(fid); + putfid(fid); + rpc->fid = nil; +} + + +static void +rremove(Rpc *rpc) +{ + Fid *fid; + Memblk *f, *p; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + f = fid->file; + if(f == fs->root || f == fs->active || f == fs->archive) + error("can't remove that"); + if(fid->archived) + error("can't remove in /archive"); /* yes, we can! */ + + f->mf->parent = dfmelt(f->mf->parent); + p = f->mf->parent; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + rwunlock(p, Wr); + error(nil); + } + dfaccessok(p, fid->uid, AWRITE); + fid->omode = -1; + dfremove(p, f); + noerror(); + rwunlock(p, Wr); + fid->file = nil; + putfid(fid); + putfid(fid); + rpc->fid = nil; +} + + +static void +rstat(Rpc *rpc) +{ + Fid *fid; + Memblk *f; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + f = fid->file; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + rpc->r.stat = rpc->data; + rpc->r.nstat = readmf(f, rpc->data, sizeof rpc->data); + if(rpc->r.nstat <= 2) + fatal("rstat: convD2M"); + noerror(); + rwunlock(f, Rd); +} + +static void +wstatint(Memblk *f, char *name, u64int v) +{ + dfwattr(f, name, &v, sizeof v); +} + +static void +wstatstr(Memblk *f, char *name, char *s) +{ + dfwattr(f, name, s, strlen(s)+1); +} + +static void +rwstat(Rpc *rpc) +{ + Fid *fid; + Memblk *f; + Dir d, *sd; + u64int n; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + f = fid->file; + + + if(f == fs->root || f == fs->archive || fid->archived) + error("can't wstat there"); + fid->file = dfmelt(fid->file); + n = convM2D(rpc->t.stat, rpc->t.nstat, &d, nil); + sd = malloc(n); + if(catcherror()){ + rwunlock(fid->file, Wr); + free(sd); + error(nil); + } + f = fid->file; + n = convM2D(rpc->t.stat, rpc->t.nstat, sd, (char*)&sd[1]); + if(n <= BIT16SZ){ + free(sd); + error("wstat: convM2D"); + } + if(sd->length != ~0 && sd->length != f->mf->length){ + if(f->mf->mode&DMDIR) + error("can't resize a directory"); + dfaccessok(f, fid->uid, AWRITE); + }else + sd->length = ~0; + + if(sd->name[0] && strcmp(f->mf->name, sd->name) != 0){ + if(f == fs->active) + error("can't rename /active"); + dfaccessok(f->mf->parent, fid->uid, AWRITE); + if(!catcherror()){ + mbput(dfwalk(f, sd->name, 0)); + error("file already exists"); + } + }else + sd->name[0] = 0; + + if(sd->uid[0] != 0 && strcmp(sd->uid, f->mf->uid) != 0){ + if(!fs->config && strcmp(fid->uid, f->mf->uid) != 0) + error("only the owner may donate a file"); + }else + sd->uid[0] = 0; + if(sd->gid[0] != 0 && strcmp(sd->gid, f->mf->gid) != 0){ + if(!fs->config && strcmp(fid->uid, f->mf->uid) != 0) + error("only the onwer may change group"); + }else + sd->gid[0] = 0; + if(sd->mode != ~0 && f->mf->mode != sd->mode){ + if(!fs->config && strcmp(fid->uid, f->mf->uid) != 0 && + strcmp(fid->uid, f->mf->gid) != 0) + error("only the onwer may change mode"); + }else + sd->mode = ~0; + + if(sd->length != ~0) + wstatint(f, "length", sd->length); + if(sd->name[0]) + wstatstr(f, "name", sd->name); + if(sd->uid[0]) + wstatstr(f, "name", sd->name); + if(sd->gid[0]) + wstatstr(f, "name", sd->name); + if(sd->mode != ~0) + wstatint(f, "mode", sd->mode); + if(fs->config && sd->atime != ~0) + wstatint(f, "atime", sd->atime); + if(fs->config && sd->mtime != ~0) + wstatint(f, "mtime", sd->mtime); + if(fs->config && sd->muid[0] != 0 && strcmp(sd->muid, f->mf->muid) != 0) + wstatint(f, "mtime", sd->mtime); + + noerror(); + rwunlock(f, Wr); + free(sd); + +} + +static void +replied(Rpc *rpc) +{ + Rpc **rl; + + qlock(&rpc->cli->rpclk); + for(rl = &rpc->cli->rpcs; (*rl != nil); rl = &(*rl)->next) + if(*rl == rpc){ + *rl = rpc->next; + break; + } + qunlock(&rpc->cli->rpclk); + rpc->next = nil; + putfid(rpc->fid); + rpc->fid = nil; + putcli(rpc->cli); + rpc->cli = nil; + freerpc(rpc); +} + +static char* +rpcworker(void *v, void**aux) +{ + Rpc *rpc; + Cli *cli; + char err[128]; + long n; + + rpc = v; + cli = rpc->cli; + threadsetname("cliproc %s rpc", cli->addr); + d9print("cliproc %s rpc starting\n", cli->addr); + + if(*aux == nil){ + errinit(Errstack); + *aux = v; /* make it not nil */ + } + + rpc->r.tag = rpc->t.tag; + rpc->r.type = rpc->t.type + 1; + + if(catcherror()){ + rpc->r.type = Rerror; + rpc->r.ename = err; + rerrstr(err, sizeof err); + goto out; + } + + fcalls[rpc->t.type](rpc); + noerror(); + +out: + d9print("-> %F\n", &rpc->r); + qlock(&cli->wlk); + n = convS2M(&rpc->r, cli->wdata, sizeof cli->wdata); + if(n == 0) + fatal("rpcworker: convS2M"); + if(write(cli->fd, cli->wdata, n) != n) + d9print("%s: %r\n", cli->addr); + qunlock(&cli->wlk); + + d9print("cliproc %s rpc exiting\n", cli->addr); + replied(rpc); + return nil; +} + +static char* +cliworker(void *v, void**) +{ + Cli *c; + long n; + Rpc *rpc; + + c = v; + threadsetname("cliproc %s", c->addr); + d9print("cliproc %s started\n", c->addr); + + rpc = nil; + for(;;){ + if(rpc == nil) + rpc = newrpc(); + n = read9pmsg(c->fd, rpc->data, sizeof rpc->data); + if(n < 0){ + d9print("%s: read: %r\n", c->addr); + break; + } + if(n == 0) + continue; + if(convM2S(rpc->data, n, &rpc->t) == 0){ + d9print("%s: convM2S failed\n", c->addr); + continue; + } + if(rpc->t.type >= nelem(fcalls) || fcalls[rpc->t.type] == nil){ + d9print("%s: bad fcall type %d\n", c->addr, rpc->t.type); + continue; + } + if(dbg['0']) + fprint(2, "<-%F\n", &rpc->t); + rpc->cli = c; + incref(c); + + qlock(&c->rpclk); + rpc->next = c->rpcs; + c->rpcs = rpc; + qunlock(&c->rpclk); + + getworker(rpcworker, rpc, nil); + } + d9print("cliproc %s exiting\n", c->addr); + putcli(c); + return nil; +}; + +static char* +getremotesys(char *ndir) +{ + char buf[128], *serv, *sys; + int fd, n; + + snprint(buf, sizeof buf, "%s/remote", ndir); + sys = nil; + fd = open(buf, OREAD); + if(fd >= 0){ + n = read(fd, buf, sizeof(buf)-1); + if(n>0){ + buf[n-1] = 0; + serv = strchr(buf, '!'); + if(serv) + *serv = 0; + sys = strdup(buf); + } + close(fd); + } + if(sys == nil) + sys = strdup("unknown"); + return sys; +} + +static void +postfd(char *name, int pfd) +{ + int fd; + + remove(name); + fd = create(name, OWRITE|ORCLOSE|OCEXEC, 0600); + if(fd < 0) + fatal("postfd: %r\n"); + if(fprint(fd, "%d", pfd) < 0){ + close(fd); + fatal("postfd: %r\n"); + } +} + +void +srv9p(char *srv) +{ + Cli *cli; + int fd[2]; + char *name; + + name = smprint("/srv/%s", srv); + if(pipe(fd) < 0) + fatal("pipe: %r"); + postfd(name, fd[1]); + cli = mallocz(sizeof *cli, 1); + cli->fd = fd[0]; + cli->cfd = -1; + cli->addr = name; + cli->ref = 1; + getworker(cliworker, cli, nil); +} + +void +listen9p(char *addr) +{ + Cli *cli; + char ndir[NETPATHLEN], dir[NETPATHLEN]; + int ctl, data, nctl; + + ctl = announce(addr, dir); + if(ctl < 0) + fatal("announce %s: %r", addr); + for(;;){ + nctl = listen(dir, ndir); + if(nctl < 0) + fatal("listen %s: %r", addr); + data = accept(nctl, ndir); + if(data < 0){ + fprint(2, "%s: accept %s: %r\n", argv0, ndir); + continue; + } + cli = mallocz(sizeof *cli, 1); + cli->fd = data; + cli->cfd = nctl; + cli->addr = getremotesys(ndir); + cli->ref = 1; + getworker(cliworker, cli, nil); + } +} + diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/9pix.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/9pix.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,362 @@ +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "fns.h" + +enum +{ + Nels = 64 +}; + +static char *fsdir; +static int verb; + +/* + * Walks elems starting at f. + * Ok if nelems is 0. + */ +static Memblk* +walkpath(Memblk *f, char *elems[], int nelems) +{ + int i; + Memblk *f0, *nf; + + isfile(f); + f0 = f; + for(i = 0; i < nelems; i++){ + if((f->mf->mode&DMDIR) == 0) + error("not a directory"); + rwlock(f, Rd); + if(catcherror()){ + if(f != f0) + mbput(f); + rwunlock(f, Rd); + error("walk: %r"); + } + nf = dfwalk(f, elems[i], 0); + rwunlock(f, Rd); + if(f != f0) + mbput(f); + f = nf; + USED(&f); /* in case of error() */ + noerror(); + } + if(f == f0) + incref(f); + return f; +} + +static char* +fsname(char *p) +{ + if(p[0] == '/') + return strdup(p); + if(fsdir) + return smprint("%s/%s", fsdir, p); + return strdup(p); +} + +static Memblk* +walkto(char *a, char **lastp) +{ + char *els[Nels], *path; + int nels; + Memblk *f; + + path = fsname(a); + nels = gettokens(path, els, Nels, "/"); + if(nels < 1){ + free(path); + error("invalid path"); + } + if(catcherror()){ + free(path); + error("walkpath: %r"); + } + if(lastp != nil){ + f = walkpath(fs->root, els, nels-1); + *lastp = a + strlen(a) - strlen(els[nels-1]); + }else + f = walkpath(fs->root, els, nels); + free(path); + noerror(); + if(verb) + print("walked to %H\n", f); + return f; +} + +static void +fscd(int, char *argv[]) +{ + free(fsdir); + fsdir = strdup(argv[1]); +} + +static void +fsput(int, char *argv[]) +{ + int fd; + char *fn; + Memblk *m, *f; + Dir *d; + char buf[4096]; + uvlong off; + long nw, nr; + + fd = open(argv[1], OREAD); + if(fd < 0) + error("open: %r\n"); + d = dirfstat(fd); + if(d == nil){ + error("dirfstat: %r\n"); + } + if(catcherror()){ + close(fd); + free(d); + error(nil); + } + m = walkto(argv[2], &fn); + m = dfmelt(m); + if(catcherror()){ + rwunlock(m, Wr); + mbput(m); + error(nil); + } + f = dfcreate(m, fn, d->uid, d->mode&(DMDIR|0777)); + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + mbput(f); + error(nil); + } + if((d->mode&DMDIR) == 0){ + off = 0; + for(;;){ + nr = read(fd, buf, sizeof buf); + if(nr <= 0) + break; + nw = dfpwrite(f, buf, nr, off); + dDprint("wrote %ld of %ld bytes\n", nw, nr); + off += nr; + } + } + noerror(); + noerror(); + noerror(); + if(verb) + print("created %H\nat %H\n", f, m); + rwunlock(f, Wr); + rwunlock(m, Wr); + mbput(m); + mbput(f); + close(fd); + free(d); +} + +static void +fscat(int, char *argv[]) +{ + Memblk *f; + Mfile *m; + char buf[4096]; + uvlong off; + long nr; + + f = walkto(argv[2], nil); + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + mbput(f); + error(nil); + } + m = f->mf; + print("cat %-30s\t%M\t%5ulld\t%s %ulld refs\n", + m->name, (ulong)m->mode, m->length, m->uid, dbgetref(f->addr)); + if((m->mode&DMDIR) == 0){ + off = 0; + for(;;){ + nr = dfpread(f, buf, sizeof buf, off); + if(nr <= 0) + break; + write(1, buf, nr); + off += nr; + } + } + noerror(); + rwunlock(f, Rd); + mbput(f); +} + +static void +fsget(int, char *argv[]) +{ + Memblk *f; + Mfile *m; + char buf[4096]; + uvlong off; + long nr; + int fd; + + fd = create(argv[1], OWRITE, 0664); + if(fd < 0) + error("create: %r\n"); + if(catcherror()){ + close(fd); + error(nil); + } + f = walkto(argv[2], nil); + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + mbput(f); + error(nil); + } + m = f->mf; + print("get %-30s\t%M\t%5ulld\t%s %ulld refs\n", + m->name, (ulong)m->mode, m->length, m->uid, dbgetref(f->addr)); + if((m->mode&DMDIR) == 0){ + off = 0; + for(;;){ + nr = dfpread(f, buf, sizeof buf, off); + if(nr <= 0) + break; + if(write(fd, buf, nr) != nr){ + fprint(2, "%s: error: %r\n", argv[0]); + break; + } + off += nr; + } + } + close(fd); + rwunlock(f, Rd); + noerror(); + noerror(); + mbput(f); +} + +static void +fsls(int, char**) +{ + if(verb) + fsdump(1); + else + fslist(); +} + +static void +fssnap(int, char**) +{ + fssync(); +} + +static void +fsrcl(int, char**) +{ + fsreclaim(); +} + +static void +fsdmp(int, char**) +{ + fsdump(0); +} + +static void +fsdmpall(int, char**) +{ + fsdump(1); +} + +static void +fsdbg(int, char *argv[]) +{ + dbg['D'] = atoi(argv[1]); +} + +static void +fsout(int, char*[]) +{ + fslowmem(); +} + +static void +fsrm(int, char *argv[]) +{ + Memblk *f, *p; + + f = walkto(argv[1], nil); + if(catcherror()){ + mbput(f); + error(nil); + } + f->mf->parent = dfmelt(f->mf->parent); + p = f->mf->parent; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + rwunlock(p, Wr); + error(nil); + } + dfremove(p, f); + noerror(); + noerror(); + rwunlock(p, Wr); +} + +static void +usage(void) +{ + fprint(2, "usage: %s [-DFLAGS] [-v] [-s file] [-9 addr] disk\n", argv0); + exits("usage"); +} + +void +threadmain(int argc, char *argv[]) +{ + char *addr, *dev, *srv; + + addr = "tcp!localhost!6699"; + srv = "creepy"; + ARGBEGIN{ + case 'v': + verb++; + break; + case 's': + srv = EARGF(usage()); + break; + case '9': + addr = EARGF(usage()); + break; + default: + if(ARGC() >= 'A' && ARGC() <= 'Z'){ + dbg['d'] = 1; + dbg[ARGC()] = 1; + }else + usage(); + }ARGEND; + if(argc != 1) + usage(); + dev = argv[0]; + + fmtinstall('H', mbfmt); + fmtinstall('M', dirmodefmt); + errinit(Errstack); + if(catcherror()) + fatal("error: %r"); + rfork(RFNAMEG); + fsopen(dev); + if(srv != nil) + srv9p(srv); + if(addr != nil) + listen9p(addr); + noerror(); + exits(nil); +} + diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/attr.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/attr.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,382 @@ +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "fns.h" + +/* + * Attribute handling + */ + +typedef struct Adef Adef; + +struct Adef +{ + char* name; + int sz; + long (*wattr)(Memblk*, void*, long); + long (*rattr)(Memblk*, void*, long); +}; + +long wname(Memblk*, void*, long); +static long rname(Memblk*, void*, long); +static long rid(Memblk*, void*, long); +long watime(Memblk*, void*, long); +static long ratime(Memblk*, void*, long); +long wmtime(Memblk*, void*, long); +static long rmtime(Memblk*, void*, long); +static long wlength(Memblk*, void*, long); +static long rlength(Memblk*, void*, long); + +static Adef adef[] = +{ + {"name", 0, wname, rname}, + {"id", BIT64SZ, nil, rid}, + {"atime", BIT64SZ, watime, ratime}, + {"mtime", BIT64SZ, wmtime, rmtime}, + {"length", BIT64SZ, wlength, rlength}, +}; + +/* + * Return size for attributes embedded in file. + * At least Dminattrsz bytes are reserved in the file block, + * at most Embedsz. + * Size is rounded to the size of an address. + */ +ulong +embedattrsz(Memblk *f) +{ + ulong sz; + + sz = f->d.asize; + sz = ROUNDUP(sz, BIT64SZ); + if(sz < Dminattrsz) + sz = Dminattrsz; + else + sz = Embedsz; + return sz; +} + +void +gmeta(Fmeta *meta, void *buf, ulong nbuf) +{ + Dmeta *d; + char *p, *x; + int i; + + if(nbuf < sizeof *d) + error("metadata buffer too small"); + d = buf; + meta->id = d->id; + meta->mode = d->mode; + meta->atime = d->atime; + meta->mtime = d->mtime; + meta->length = d->length; + + if(d->ssz[FMuid] + sizeof *d > nbuf || + d->ssz[FMgid] + sizeof *d > nbuf || + d->ssz[FMmuid] + sizeof *d > nbuf || + d->ssz[FMname] + sizeof *d > nbuf) + error("corrupt meta: wrong string size"); + + p = (char*)(&d[1]); + x = p; + for(i = 0; i < nelem(d->ssz); i++){ + if(x[d->ssz[i]-1] != 0) + error("corrupt meta: unterminated string"); + x += d->ssz[i]; + } + + meta->uid = p; + p += d->ssz[FMuid]; + meta->gid = p; + p += d->ssz[FMgid]; + meta->muid = p; + p += d->ssz[FMmuid]; + meta->name = p; +} + +static ulong +metasize(Fmeta *meta) +{ + ulong n; + + n = sizeof(Dmeta); + n += strlen(meta->uid) + 1; + n += strlen(meta->gid) + 1; + n += strlen(meta->muid) + 1; + n += strlen(meta->name) + 1; + /* + * BUG: meta->attr + */ + return n; +} + +/* + * Pack the metadata into buf. + * pointers in meta are changed to refer to the packed data. + * Return pointer past the packed metadata. + * The caller is responsible for ensuring that metadata fits in buf. + */ +ulong +pmeta(void *buf, ulong nbuf, Fmeta *meta) +{ + Dmeta *d; + char *p, *bufp; + ulong sz; + + sz = metasize(meta); + if(sz > nbuf){ + fatal("bug: allocate and use ablk"); + error("attributes are too long"); + } + d = buf; + bufp = buf; + d->id = meta->id; + d->mode = meta->mode; + d->atime = meta->atime; + d->mtime = meta->mtime; + d->length = meta->length; + + p = (char*)(&d[1]); + d->ssz[FMuid] = strlen(meta->uid) + 1; + strcpy(p, meta->uid); + meta->uid = p; + p += d->ssz[FMuid]; + + d->ssz[FMgid] = strlen(meta->gid) + 1; + strcpy(p, meta->gid); + meta->gid = p; + p += d->ssz[FMgid]; + + d->ssz[FMmuid] = strlen(meta->muid) + 1; + strcpy(p, meta->muid); + meta->muid = p; + p += d->ssz[FMmuid]; + + d->ssz[FMname] = strlen(meta->name) + 1; + strcpy(p, meta->name); + meta->name = p; + p += d->ssz[FMname]; + + assert(p - bufp <= sz); /* can be <, to leave room for growing */ + return sz; +} + +long +wname(Memblk *f, void *buf, long len) +{ + char *p, *old; + ulong maxsz; + + p = buf; + if(len < 1 || p[len-1] != 0) + error("name must end in \\0"); + old = f->mf->name; + f->mf->name = p; + maxsz = embedattrsz(f); + if(metasize(f->mf) > maxsz){ + f->mf->name = old; + fprint(2, "%s: bug: no attribute block implemented\n", argv0); + error("no room to grow metadata"); + } + /* name goes last, we can pack in place */ + pmeta(f->d.embed, maxsz, f->mf); + return len; +} + +static long +rname(Memblk *f, void *buf, long len) +{ + long l; + + l = strlen(f->mf->name) + 1; + if(l > len) + error("buffer too short"); + strcpy(buf, f->mf->name); + return l; +} + +static long +rid(Memblk *f, void *buf, long) +{ + u64int *p; + + p = buf; + *p = f->mf->id; + return BIT64SZ; +} + +long +watime(Memblk *f, void *buf, long) +{ + u64int *p; + Dmeta *d; + + p = buf; + d = (Dmeta*)f->d.embed; + f->mf->atime = *p; + d->atime = *p; + return BIT64SZ; +} + +static long +ratime(Memblk *f, void *buf, long) +{ + u64int *p; + + p = buf; + *p = f->mf->atime; + return BIT64SZ; +} + +long +wmtime(Memblk *f, void *buf, long) +{ + u64int *p; + Dmeta *d; + + p = buf; + d = (Dmeta*)f->d.embed; + f->mf->mtime = *p; + d->mtime = *p; + return BIT64SZ; +} + +static long +rmtime(Memblk *f, void *buf, long) +{ + u64int *p; + + p = buf; + *p = f->mf->mtime; + return BIT64SZ; +} + +static uvlong +fresize(Memblk *f, uvlong sz) +{ + ulong boff, bno, bend; + + if(f->mf->mode&DMDIR) + error("can't resize a directory"); + + if(sz > maxfsz) + error("max file size exceeded"); + if(sz >= f->mf->length) + return sz; + bno = dfbno(f, sz, &boff); + if(boff > 0) + bno++; + bend = dfbno(f, sz, &boff); + if(boff > 0) + bend++; + dfdropblks(f, bno, bend); + return sz; +} + +static long +wlength(Memblk *f, void *buf, long) +{ + u64int *p; + Dmeta *d; + + p = buf; + d = (Dmeta*)f->d.embed; + f->mf->length = fresize(f, *p); + d->length = *p; + return BIT64SZ; +} + +static long +rlength(Memblk *f, void *buf, long) +{ + u64int *p; + + p = buf; + *p = f->mf->length; + return BIT64SZ; +} + +long +dfwattr(Memblk *f, char *name, void *val, long nval) +{ + int i; + long tot; + + isfile(f); + ismelted(f); + isrwlocked(f, Wr); + if(fsfull()) + error("file system full"); + + for(i = 0; i < nelem(adef); i++) + if(strcmp(adef[i].name, name) == 0) + break; + if(i == nelem(adef)) + error("user defined attributes not yet implemented"); + if(adef[i].wattr == nil) + error("can't write %s", name); + if(adef[i].sz != 0 && adef[i].sz != nval) + error("wrong length for attribute"); + + tot = adef[i].wattr(f, val, nval); + changed(f); + return tot; +} + +long +dfrattr(Memblk *f, char *name, void *val, long count) +{ + int i; + long tot; + + isfile(f); + isrwlocked(f, Rd); + for(i = 0; i < nelem(adef); i++) + if(strcmp(adef[i].name, name) == 0) + break; + if(i == nelem(adef)) + error("no such attribute"); + if(adef[i].sz != 0 && count < adef[i].sz) + error("buffer too short for attribute"); + + tot = adef[i].rattr(f, val, count); + return tot; +} + +static int +member(char *gid, char *uid) +{ + /* BUG: no groups */ + return strcmp(gid, uid) == 0; +} + +void +dfaccessok(Memblk *f, char *uid, int bits) +{ + uint mode; + + if(fs->config) + return; + + bits &= 3; + mode = f->mf->mode &0777; + + if((mode&bits) == bits) + return; + mode >>= 3; + + if(member(f->mf->gid, uid) && (mode&bits) == bits) + return; + mode >>= 3; + if(strcmp(f->mf->uid, uid) == 0 && (mode&bits) == bits) + return; + error("permission denied"); +} diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/conf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/conf.h Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,65 @@ + + +#define TESTING + +enum +{ + KiB = 1024UL, + MiB = KiB * 1024UL, + GiB = MiB * 1024UL, + +#ifdef TESTING + Incr = 2, + Fsysmem = 1*MiB, /* size for in-memory block array */ + Dzerofree = 10, /* out of disk blocks */ + Dminfree = 100000, /* low on disk */ + Dmaxfree = 100000, /* high on disk */ + Mminfree = 1000000ULL, /* low on mem */ + Mmaxfree = 1000000ULL, /* high on mem */ + + /* disk parameters; don't change */ + Dblksz = 512UL, /* disk block size */ + Dblkhdrsz = 2*BIT64SZ, + Ndptr = 2, /* # of direct data pointers */ + Niptr = 2, /* # of indirect data pointers */ +#else + Incr = 16, + Fsysmem = 2*GiB, /* size for in-memory block array */ + Dzerofree = 10, /* out of disk blocks */ + Dminfree = 1000, /* low on disk blocks */ + Dmaxfree = 1000, /* high on disk blocks */ + Mminfree = 50, /* low on mem blocks */ + Mmaxfree = 500, /* high on mem blocks */ + + /* disk parameters; don't change */ + Dblksz = 16*KiB, /* disk block size */ + Dblkhdrsz = 2*BIT64SZ, + Ndptr = 8, /* # of direct data pointers */ + Niptr = 4, /* # of indirect data pointers */ +#endif + + Dminattrsz = Dblksz/2, /* min size for attributes */ + + /* + * The format of the disk is: + * blk 0: unused + * blk 1: super + * Nblkgrpsz blocks (1st is ref, Nblkgrpsz-1 are data) + * ... + * Nblkgrpsz blocks (1st is ref, Nblkgrpsz-1 are data) + * + */ + Nblkgrpsz = (Dblksz - Dblkhdrsz) / BIT64SZ, + Dblk0addr = 2*Dblksz, + + /* + * Caution: Errstack also limits the max tree depth, + * because of recursive routines (in the worst case). + */ + Stack = 32*KiB, /* stack size for threads */ + Errstack = 64, /* max # of nested error labels */ + Fhashsz = 7919, /* size of file hash (plan9 has 35454 files). */ + Fidhashsz = 97, /* size of the fid hash size */ + +}; + diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/dbg.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/dbg.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,14 @@ +#include +#include + +char dbg[256]; + +int +dbgclr(uchar flag) +{ + int x; + + x = dbg[flag]; + dbg[flag] = 0; + return x; +} diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/dbg.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/dbg.h Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,12 @@ +/* + * 'd': general debug + * 'D': disk + * 'W': block write + * 'R': block read + * '9': 9p + */ +#define dDprint if(!dbg['D']){}else print +#define dRprint if(!dbg['R']){}else print +#define dWprint if(!dbg['W']){}else print +#define d9print if(!dbg['9']){}else print +extern char dbg[]; diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/dblk.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/dblk.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,455 @@ +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "fns.h" + +/* + * disk blocks, built upon memory blocks provided by mblk.c + * see dk.h + */ + +void +checktag(u64int tag, uint type, u64int addr) +{ + if(tag != TAG(addr,type)){ + fprint(2, "%s: bad tag: %#ullx != %#ux d%#ullx pc = %#p\n", + argv0, tag, type, addr, getcallerpc(&tag)); + error("bad tag"); + } +} + + +void +dbclear(u64int addr, int type) +{ + static Diskblk d; + static QLock lk; + + dDprint("dbclear d%#ullx type %s\n", addr, tname(type)); + qlock(&lk); + d.tag = TAG(addr, type); + d.epoch = now(); + if(pwrite(fs->fd, &d, sizeof d, addr) != Dblksz){ + qunlock(&lk); + fprint(2, "%s: dbclear: d%#ullx: %r\n", argv0, addr); + error("dbclear: d%#ullx: %r", addr); + } + qunlock(&lk); +} + +void +meltedref(Memblk *rb) +{ + if(canqlock(&fs->rlk)) + fatal("meltedref rlk"); + if(rb->frozen && rb->dirty) + dbwrite(rb); + rb->frozen = rb->dirty = 0; +} + +/* + * BUG: the free list of blocks using entries in the ref blocks + * shouldn't span all those blocks as it does now. To prevent + * massive loses of free blocks each DBref block should keep its own + * little free list, and all blocks with free entries should be linked + * in the global list. + * This would keep locality and make it less likely that a failure in the + * middle of a sync destroyes the entire list. + */ + +u64int +newblkaddr(void) +{ + u64int addr, naddr; + + qlock(fs); + if(catcherror()){ + qunlock(fs); + error(nil); + } +Again: + if(fs->super == nil) + addr = Dblksz; + else if(fs->super->d.eaddr < fs->limit){ + addr = fs->super->d.eaddr; + fs->super->d.eaddr += Dblksz; + changed(fs->super); + /* + * ref blocks are allocated and initialized on demand, + * and they must be zeroed before used. + * do this holding the lock so others find everything + * initialized. + */ + if(((addr-Dblk0addr)/Dblksz)%Nblkgrpsz == 0){ + dDprint("new ref blk addr = d%#ullx\n", addr); + dbclear(addr, DBref); /* fs initialization */ + addr += Dblksz; + fs->super->d.eaddr += Dblksz; + } + }else if(fs->super->d.free != 0){ + addr = fs->super->d.free; + + /* + * Caution: can't acquire new locks while holding the fs lock, + * but dbgetref may allocate blocks. + */ + qunlock(fs); + if(catcherror()){ + qlock(fs); /* restore the default in this fn. */ + error(nil); + } + naddr = dbgetref(addr); /* acquires locks */ + noerror(); + qlock(fs); + if(addr != fs->super->d.free){ + /* had a race */ + goto Again; + } + fs->super->d.free = naddr; + fs->super->d.nfree -= 1; + changed(fs->super); + }else{ + addr = 0; + /* preserve backward compatibility with fossil */ + sysfatal("disk is full"); + } + + noerror(); + qunlock(fs); + okaddr(addr); + dDprint("newblkaddr = d%#ullx\n", addr); + return addr; +} + +u64int +addrofref(u64int refaddr, int idx) +{ + u64int bno; + + bno = (refaddr - Dblk0addr)/Dblksz; + bno *= Nblkgrpsz; + bno += idx; + + return Dblk0addr + bno*Dblksz; +} + +u64int +refaddr(u64int addr, int *idx) +{ + u64int bno, refaddr; + + addr -= Dblk0addr; + bno = addr/Dblksz; + *idx = bno%Nblkgrpsz; + refaddr = Dblk0addr + bno/Nblkgrpsz * Nblkgrpsz * Dblksz; + dDprint("refaddr d%#ullx = d%#ullx[%d]\n", + Dblk0addr + addr, refaddr, *idx); + return refaddr; +} + +/* + * db*ref() functions update the on-disk reference counters. + * memory blocks use Memblk.Ref instead. Beware. + */ +static u64int +dbaddref(u64int addr, int delta, int set) +{ + Memblk *rb; + u64int raddr, ref; + int i, flg; + + if(addr == 0) + return 0; + if(addr == Noaddr) /* root doesn't count */ + return 0; + + if(set != 0) + dDprint("dbsetref %#ullx = %d\n", addr, set); + else if(delta != 0) + dDprint("dbaddref %#ullx += %d\n", addr, delta); + flg = dbgclr('D'); + raddr = refaddr(addr, &i); + rb = dbget(DBref, raddr); + qlock(&fs->rlk); + if(catcherror()){ + mbput(rb); + qunlock(&fs->rlk); + dbg['D'] = flg; + error(nil); + } + if(delta != 0 || set != 0){ + meltedref(rb); + if(set) + rb->d.ref[i] = set; + else + rb->d.ref[i] += delta; + rb->dirty = 1; + } + ref = rb->d.ref[i]; + noerror(); + qunlock(&fs->rlk); + mbput(rb); + dbg['D'] = flg; + return ref; +} + +u64int +dbgetref(u64int addr) +{ + return dbaddref(addr, 0, 0); +} + +void +dbsetref(u64int addr, int ref) +{ + dbaddref(addr, 0, ref); +} + +u64int +dbincref(u64int addr) +{ + return dbaddref(addr, +1, 0); +} + +u64int +dbdecref(u64int addr) +{ + return dbaddref(addr, -1, 0); +} + +Memblk* +dballoc(uint type) +{ + Memblk *b; + u64int addr; + int root, flg; + + flg = dbgclr('D'); + + root = (type == Noaddr); + addr = Noaddr; + if(root) + type = DBfile; + else + addr = newblkaddr(); + b = mballoc(addr); + b->d.tag = TAG(b->addr,type); + if(catcherror()){ + mbput(b); + dbg['D'] = flg; + error(nil); + } + changed(b); + if(addr != Noaddr && addr >= Dblk0addr) + dbsetref(addr, 1); + if(type == DBfile) + b->mf = mfalloc(); + b = mbhash(b); + noerror(); + dbg['D'] = flg; + dDprint("dballoc %s -> %H\n", tname(type), b); + return b; +} + +/* + * BUG: these should ensure that all integers are converted between + * little endian (disk format) and the machine endianness. + * We know the format of all blocks and the type of all file + * attributes. Those are the integers to convert to fix the bug. + */ +Memblk* +hosttodisk(Memblk *b) +{ + if(!TAGADDROK(b->d.tag, b->addr)) + fatal("hosttodisk: bad tag"); + incref(b); + return b; +} + +void +disktohost(Memblk *b) +{ + static union + { + u64int i; + uchar m[BIT64SZ]; + } u; + + u.i = 0x1122334455667788ULL; + if(u.m[0] != 0x88) + fatal("fix hosttodisk/disktohost for big endian"); + checktag(b->d.tag, TAGTYPE(b->d.tag), b->addr); +} + +long +dbwrite(Memblk *b) +{ + Memblk *nb; + + dWprint("dbwrite %H\n",b); + nb = hosttodisk(b); + nb->d.epoch = now(); + if(pwrite(fs->fd, &nb->d, sizeof nb->d, nb->addr) != Dblksz){ + mbput(nb); + fprint(2, "%s: dbwrite: d%#ullx: %r\n", argv0, b->addr); + error("dbwrite: %r"); + } + mbput(nb); + + return Dblksz; +} + +long +dbread(Memblk *b) +{ + long tot, nr; + uchar *p; + + + p = b->d.ddata; + for(tot = 0; tot < Dblksz; tot += nr){ + nr = pread(fs->fd, p+tot, Dblksz-tot, b->addr + tot); + if(nr == 0) + werrstr("eof on disk file"); + if(nr <= 0){ + fprint(2, "%s: dbread: d%#ullx: %r\n", argv0, b->addr); + error("dbread: %r"); + } + } + assert(tot == sizeof b->d); + + disktohost(b); + if(TAGTYPE(b->d.tag) != DBref) + b->frozen = 1; + dRprint("dbread %H\n", b); + return tot; +} + +Memblk* +dbget(uint type, u64int addr) +{ + Memblk *b; + + dDprint("dbget %s d%#ullx\n", tname(type), addr); + okaddr(addr); + b = mbget(addr, 1); + if(b == nil) + error("i/o error"); + if(TAGTYPE(b->d.tag) != DBnew){ + if(TAGTYPE(b->d.tag) != type) + fatal("dbget: bug"); + return b; + } + + /* the file is new, must read it */ + if(catcherror()){ + b->d.tag = TAG(addr, DBnew); + qunlock(&b->newlk); /* awake those waiting for it */ + mbput(b); /* our ref and the hash ref */ + mbput(b); + error(nil); + } + dbread(b); + checktag(b->d.tag, type, addr); + if(type == DBfile){ + assert(b->mf == nil); + b->mf = mfalloc(); + gmeta(b->mf, b->d.embed, Embedsz); + b->written = 1; + } + noerror(); + qunlock(&b->newlk); + return b; +} + +void +dupdentries(void *p, int n) +{ + int i; + Dentry *d; + + d = p; + for(i = 0; i < n; i++) + if(d[i].file != 0){ + dDprint("add ref on melt d%#ullx\n", d[i].file); + dbincref(d[i].file); + } +} +/* + * caller responsible for locking. + * On errors we leak disk blocks because of added references. + */ +Memblk* +dbdup(Memblk *b) +{ + Memblk *nb; + uint type; + int i; + Mfile *nm; + ulong doff; + + type = TAGTYPE(b->d.tag); + nb = dballoc(type); + if(catcherror()){ + mbput(nb); + error(nil); + } + switch(type){ + case DBfree: + case DBref: + case DBsuper: + case DBattr: + fatal("dbdup: %s", tname(type)); + case DBdata: + memmove(nb->d.data, b->d.data, Dblkdatasz); + break; + case DBfile: + if(!b->frozen) + isrwlocked(b, Rd); + nb->d.asize = b->d.asize; + nb->d.aptr = b->d.aptr; + if(nb->d.aptr != 0) + dbincref(b->d.aptr); + for(i = 0; i < nelem(b->d.dptr); i++){ + nb->d.dptr[i] = b->d.dptr[i]; + if(nb->d.dptr[i] != 0) + dbincref(b->d.dptr[i]); + } + for(i = 0; i < nelem(b->d.iptr); i++){ + nb->d.iptr[i] = b->d.iptr[i]; + if(nb->d.iptr[i] != 0) + dbincref(b->d.iptr[i]); + } + memmove(nb->d.embed, b->d.embed, Embedsz); + nm = nb->mf; + gmeta(nm, nb->d.embed, Embedsz); + if((nm->mode&DMDIR) == 0) + break; + doff = embedattrsz(nb); + dupdentries(nb->d.embed+doff, (Embedsz-doff)/sizeof(Dentry)); + if(b->frozen && b->mf->melted == nil){ + incref(nb); + b->mf->melted = nb; /* XXX race *rlocked* */ + } + break; + default: + if(type < DBptr0 || type >= DBptr0 + Niptr) + fatal("dbdup: bad type %d", type); + for(i = 0; i < Dptrperblk; i++){ + nb->d.ptr[i] = b->d.ptr[i]; + if(nb->d.ptr[i] != 0) + dbincref(b->d.ptr[i]); + } + } + changed(nb); + noerror(); + return nb; +} + diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/dk.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/dk.h Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,387 @@ +typedef struct Fmeta Fmeta; +typedef struct Ddatablk Ddatablk; +typedef struct Dptrblk Dptrblk; +typedef struct Drefblk Drefblk; +typedef struct Dattrblk Dattrblk; +typedef struct Dfileblk Dfileblk; +typedef struct Dsuperblk Dsuperblk; +typedef union Diskblk Diskblk; +typedef struct Diskblkhdr Diskblkhdr; +typedef struct Memblk Memblk; +typedef struct Fsys Fsys; +typedef struct Dentry Dentry; +typedef struct Dmeta Dmeta; +typedef struct Blksl Blksl; +typedef struct Mfile Mfile; + +/* + * these are used by several functions that have flags to indicate + * mem-only, also on disk; and read-access/write-access. (eg. dfmap). + */ +enum{ + Mem=0, + Disk, + + Rd=0, + Wr, + No, +}; + +#define HOWMANY(x, y) (((x)+((y)-1))/(y)) +#define ROUNDUP(x, y) (HOWMANY((x), (y))*(y)) + +/* + * Conventions: + * + * References: + * - Ref is used for in-memory RCs. This has nothing to do with on-disk refs. + * - Mem refs include the reference from the hash. That one keeps the file + * loaded in memory while unused. + * - The hash ref also accounts for the lru list and list of DBref blocks. + * - Disk refs count only references within the tree on disk. + * - Children imply new refs to the parents. But not vice-versa. + * + * Assumptions: + * - /active is *never* found on disk, it's memory-only. + * - b->addr is worm. + * - blocks are added to the end of the hash chain. + * - We try not to hold more than one lock, using the + * reference counters when we need to be sure that + * an unlocked resource does not vanish. + * - parents of file blocks in memory are in memory (because of RCs) + * - reference blocks are never removed from memory. + * - disk refs are frozen while waiting to go to disk during a fs freeze. + * in which case db*ref functions write the block in place and melt it. + * - frozen blocks are quiescent. + * - the block epoch number for a on-disk block is the time when it + * was written (thus it's archived "/" has a newer epoch). + * - mb*() functions do not raise errors. + * + * Locking: + * - the caller to functions in [mbf]blk.c acquires the locks before + * calling them, and makes sure the file is melted if needed. + * This prevents races and deadlocks. + * - blocks are locked by the file responsible for them, when not frozen. + * - next fields in blocks are locked by the list they are used for. + * + * Lock order: + * - fs, super,... : while locked can't acquire fs or blocks. + * - parent -> child + * (but a DBfile protects all ptr and data blocks under it). + * - block -> ref block + * + * All the code assumes outofmemoryexits = 1. + */ + +enum +{ + /* block types */ + DBfree = 0, + DBnew, /* never found on disk */ + DBref, + DBattr, + DBfile, + DBsuper, + DBdata, /* direct block */ + DBptr0 = DBdata+1, /* simple-indirect block */ + /* double */ + /* triple */ + /*...*/ +}; + +/* + * ##### On disk structures. ##### + * + * All on-disk integer values are little endian. + * + * blk 0: unused + * blk 1: super + * ref blk + Nblkgrpsz-1 blocks + * ... + * ref blk + Nblkgrpsz-1 blocks + * + * The code assumes these structures are packed. + * Be careful if they are changed to make things easy for the + * compiler and keep them naturally aligned. + */ + +struct Ddatablk +{ + uchar data[1]; /* raw memory */ +}; + +struct Dptrblk +{ + u64int ptr[1]; /* array of block addresses */ +}; + +struct Drefblk +{ + u64int ref[1]; /* disk RC or next block in free list */ +}; + +struct Dattrblk +{ + u64int next; /* next block used for attribute data */ + uchar attr[1]; /* raw attribute data */ +}; + +/* + * directory entry. contents of data blocks in directories. + * Each block stores only an integral number of Dentries, for simplicity. + */ +struct Dentry +{ + u64int file; /* file address or 0 when unused */ +}; + +/* + * The trailing part of the file block is used to store attributes + * and initial file data. + * At least Dminattrsz is reserved for attributes, at most + * all the remaining embedded space. + * Past the attributes, starts the file data. + * If more attribute space is needed, an attribute block is allocated. + * For huge attributes, it is suggested that a file is allocated and + * the attribute value refers to that file. + * The pointer in iptr[n] is an n-indirect data pointer. + * + * Directories are also files, but their data is simply an array of + * Dentries. + */ +struct Dfileblk +{ + u64int asize; /* attribute size */ + u64int aptr; /* attribute block pointer */ + u64int dptr[Ndptr]; /* direct data pointers */ + u64int iptr[Niptr]; /* indirect data pointers */ + uchar embed[1]; /* embedded attrs and data */ +}; + +enum +{ + FMuid = 0, /* strings in mandatory attributes */ + FMgid, + FMmuid, + FMname, + FMnstr, +}; + +struct Dmeta /* mandatory metadata */ +{ + u64int id; /* ctime, actually */ + u64int mode; + u64int atime; + u64int mtime; + u64int length; + u16int ssz[FMnstr]; + /* uid\0gid\0muid\0name\0 */ +}; + +#define MAGIC 0x6699BCB06699BCB0ULL +/* + * Superblock. + * The stored tree is: + * archive/ root of the archived tree + * + * ... + * (/ and /active are only memory and never on disk, parts + * under /active that are on disk are shared with entries in /archive) + */ +struct Dsuperblk +{ + u64int magic; /* MAGIC */ + u64int free; /* first free block on list */ + u64int eaddr; /* end of the assigned disk portion */ + u64int root; /* address of /archive in disk */ + u64int nfree; /* # of blocks in free list */ + u64int dblksz; /* only for checking */ + u64int nblkgrpsz; /* only for checking */ + u64int dminattrsz; /* only for checking */ + u64int ndptr; /* only for checking */ + u64int niptr; /* only for checking */ + u64int dblkdatasz; /* only for checking */ + u64int embedsz; /* only for checking */ + u64int dptrperblk; /* only for checking */ + uchar vac0[24]; /* score for last venti archive + 4pad */ + uchar vac1[24]; /* score for previous venti archive + 4pad */ +}; + +enum +{ + Noaddr = ~0UL /* null address, for / */ +}; + +#define TAG(addr,type) ((addr)<<8|((type)&0x7F)) +#define TAGTYPE(t) ((t)&0x7F) +#define TAGADDROK(t,addr) (((t)&~0xFF) == ((addr)<<8)) + +/* + * disk blocks + */ + +/* + * header for all disk blocks. + * Those using on-disk references keep them at a DBref block + */ +struct Diskblkhdr +{ + u64int tag; /* block tag */ + u64int epoch; /* block epoch */ +}; + +union Diskblk +{ + struct{ + Diskblkhdr; + union{ + Ddatablk; /* data block */ + Dptrblk; /* pointer block */ + Drefblk; /* reference counters block */ + Dattrblk; /* attribute block */ + Dfileblk; /* file block */ + Dsuperblk; + }; + }; + uchar ddata[Dblksz]; +}; + +/* + * These are derived. + */ +enum +{ + Dblkdatasz = sizeof(Diskblk) - sizeof(Diskblkhdr), + Embedsz = Dblkdatasz - sizeof(Dfileblk), + Dptrperblk = Dblkdatasz / sizeof(u64int), + Drefperblk = Dblkdatasz / sizeof(u64int), +}; + + +/* + * File attributes are name/value pairs. + * By now, only mandatory attributes are implemented, and + * have names implied by their position in the Dmeta structure. + */ + +/* + * ##### On memory structures. ##### + */ + +/* + * File metadata + */ +struct Fmeta +{ + Dmeta; + char *uid; + char *gid; + char *muid; + char *name; +}; + +/* + * On memory file information. + */ +struct Mfile +{ + RWLock; + Fmeta; + union{ + Memblk *parent; /* most recent parent */ + Mfile *next; /* in free Mfile list */ + }; + + Memblk* melted; /* next version for this one, if frozen */ + ulong lastbno; /* help for RA */ + int open; /* for DMEXCL */ +}; + +/* + * memory block + */ +struct Memblk +{ + Ref; + u64int addr; /* block address */ + Memblk *next; /* in hash or free list */ + + union{ + Memblk *rnext; /* in list of DBref blocks */ + Mfile *mf; /* DBfile on memory info. */ + }; + + int dirty; /* must be written */ + int frozen; /* is frozen */ + int written; /* no need to scan this for dirties */ + + Memblk *lnext; /* list from fs->mru -> fs->lru */ + Memblk *lprev; + + QLock newlk; /* only to wait on DBnew blocks */ + Diskblk d; +}; + +/* + * Slice into a block, used to read/write file blocks. + */ +struct Blksl +{ + Memblk *b; + void *data; + long len; +}; + +struct Fsys +{ + QLock; + + struct{ + QLock; + Memblk *b; + } fhash[Fhashsz]; /* hash of blocks by address */ + + Memblk *blk; /* static global array of memory blocks */ + usize nblk; /* # of entries used */ + usize nablk; /* # of entries allocated */ + usize nused; /* blocks in use */ + usize nfree; /* free blocks */ + + Memblk *free; /* free list of unused blocks in blk */ + + QLock llk; + Memblk *lru; + Memblk *mru; + + QLock mlk; + Mfile *mfree; /* unused list */ + + QLock rlk; + Memblk *refs; /* list of DBref blocks (also hashed) */ + + Memblk *super; /* locked by blklk */ + Memblk *root; /* only in memory */ + Memblk *active; /* /active */ + Memblk *archive; /* /archive */ + Memblk *cons; /* /cons */ + Channel *consc; /* of char*; output for /cons */ + + Memblk *fzsuper; /* frozen super */ + + QLock fzlk; /* free or reclaim in progress. */ + + char *dev; /* name for disk */ + int fd; /* of disk */ + usize limit; /* address for end of disk */ + + int config; /* config mode enabled */ +}; + +#pragma varargck type "H" Memblk* + +typedef int(*Blkf)(Memblk*); + + +extern Fsys*fs; +extern uvlong maxfsz; + diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/fblk.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/fblk.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,771 @@ +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "fns.h" + +/* + * File block tools. + * Should be used mostly by file.c, where the interface is kept. + * see dk.h + */ + +void +rwlock(Memblk *f, int iswr) +{ + if(iswr == No) + return; + if(iswr) + wlock(f->mf); + else + rlock(f->mf); +} + +void +rwunlock(Memblk *f, int iswr) +{ + if(iswr == No) + return; + if(iswr) + wunlock(f->mf); + else + runlock(f->mf); +} + +void +isfile(Memblk *f) +{ + if(TAGTYPE(f->d.tag) != DBfile || f->mf == nil) + fatal("isfile: not a file at pc %#p", getcallerpc(&f)); +} + +void +isrwlocked(Memblk *f, int iswr) +{ + if(TAGTYPE(f->d.tag) != DBfile || f->mf == nil) + fatal("isrwlocked: not a file at pc %#p", getcallerpc(&f)); + if(iswr == No) + return; + if((iswr && canrlock(f->mf)) || (!iswr && canwlock(f->mf))) + fatal("is%clocked at pc %#p", iswr?'w':'r', getcallerpc(&f)); +} + +void +isdir(Memblk *f) +{ + if(TAGTYPE(f->d.tag) != DBfile || f->mf == nil) + fatal("isdir: not a file at pc %#p", getcallerpc(&f)); + if((f->mf->mode&DMDIR) == 0) + fatal("isdir: not a dir at pc %#p", getcallerpc(&f)); +} + +void +isnotdir(Memblk *f) +{ + if(TAGTYPE(f->d.tag) != DBfile || f->mf == nil) + fatal("isnotdir: not a file at pc %#p", getcallerpc(&f)); + if((f->mf->mode&DMDIR) != 0) + fatal("isnotdir: dir at pc %#p", getcallerpc(&f)); +} + +/* for dfblk only */ +static Memblk* +getmelted(uint isdir, int isarch, uint type, u64int *addrp) +{ + Memblk *b, *nb; + + if(*addrp == 0){ + b = dballoc(type); + *addrp = b->addr; + incref(b); + return b; + } + + b = dbget(type, *addrp); + nb = nil; + if(isarch) + b->frozen = 0; /* /archive always melted */ + if(!b->frozen) + return b; + if(catcherror()){ + mbput(b); + mbput(nb); + error(nil); + } + nb = dbdup(b); + if(isdir && type == DBdata) + dupdentries(nb->d.data, Dblkdatasz/sizeof(Dentry)); + USED(&nb); /* for error() */ + *addrp = nb->addr; + incref(nb); + dbdecref(b->addr); + noerror(); + return nb; +} + +/* + * Get a file data block, perhaps allocating it on demand + * if mkit. The file must be r/wlocked and melted if mkit. + * + * Adds disk refs for dir entries copied during melts and + * considers that /archive is always melted. + * + * Read-ahead is not considered here. The file only records + * the last accessed block number, to help the caller do RA. + */ +static Memblk* +dfblk(Memblk *f, ulong bno, int mkit) +{ + ulong prev, nblks; + int i, idx, nindir, type, isdir, isarch; + Memblk *b, *pb; + u64int *addrp; + + isrwlocked(f, mkit); + isarch = f == fs->archive; + if(isarch) + f->frozen = 0; + if(mkit) + ismelted(f); + isdir = (f->mf->mode&DMDIR); + + f->mf->lastbno = bno; + /* + * bno: block # relative to the the block we are looking at. + * prev: # of blocks before the current one. + */ + prev = 0; + + /* + * Direct block? + */ + if(bno < nelem(f->d.dptr)) + if(mkit) + return getmelted(isdir, isarch, DBdata, &f->d.dptr[bno]); + else + return dbget(DBdata, f->d.dptr[bno]); + + bno -= nelem(f->d.dptr); + prev += nelem(f->d.dptr); + + /* + * Indirect block + * nblks: # of data blocks addressed by the block we look at. + */ + nblks = Dptrperblk; + for(i = 0; i < nelem(f->d.iptr); i++){ + if(bno < nblks) + break; + bno -= nblks; + prev += nblks; + nblks *= Dptrperblk; + } + if(i == nelem(f->d.iptr)) + error("offset exceeds file capacity"); + + type = DBptr0+i; + dDprint("dfblk: indirect %s nblks %uld (ppb %ud) bno %uld\n", + tname(type), nblks, Dptrperblk, bno); + + addrp = &f->d.iptr[i]; + if(mkit) + b = getmelted(isdir, isarch, type, addrp); + else + b = dbget(type, *addrp); + pb = f; + incref(pb); + if(catcherror()){ + mbput(pb); + mbput(b); + error(nil); + } + + /* at the loop header: + * pb: parent of b + * b: DBptr block we are looking at. + * addrp: ptr to b within fb. + * nblks: # of data blocks addressed by b + */ + for(nindir = i+1; nindir >= 0; nindir--){ + dDprint("indir %s d%#ullx nblks %uld ptrperblk %d bno %uld\n", + tname(DBdata+nindir), *addrp, nblks, Dptrperblk, bno); + dDprint(" in %H\n", b); + idx = 0; + if(nindir > 0){ + nblks /= Dptrperblk; + idx = bno/nblks; + } + if(*addrp == 0 && !mkit){ + /* hole */ + b = nil; + }else{ + assert(type >= DBdata); + if(mkit) + b = getmelted(isdir, isarch, type, addrp); + else + b = dbget(type, *addrp); + addrp = &b->d.ptr[idx]; + mbput(pb); + pb = b; + } + USED(&b); /* force to memory in case of error */ + USED(&pb); /* force to memory in case of error */ + bno -= idx * nblks; + prev += idx * nblks; + type--; + } + noerror(); + return b; +} + +/* + * Remove [bno:bend) file data blocks. + * The file must be r/wlocked and melted. + */ +void +dfdropblks(Memblk *f, ulong bno, ulong bend) +{ + Memblk *b; + + isrwlocked(f, Wr); + ismelted(f); + isnotdir(f); + + dDprint("dfdropblks: could remove d%#ullx[%uld:%uld]\n", + f->addr, bno, bend); + /* + * Instead of releasing the references on the data blocks, + * considering that the file might grow again, we keep them. + * Consider recompiling again and again and... + * + * The length has been adjusted and data won't be returned + * before overwritten. + * + * We only have to zero the data, because the file might + * grow using holes and the holes must read as zero, and also + * for safety. + */ + for(; bno < bend; bno++){ + if(catcherror()) + continue; + b = dfblk(f, bno, 0); + noerror(); + memset(b->d.data, 0, Dblkdatasz); + changed(b); + mbput(b); + } +} + +/* + * block # for the given offset (first block in file is 0). + * embedded data accounts also as block #0. + * If boffp is not nil it returns the offset within that block + * for the given offset. + */ +ulong +dfbno(Memblk *f, uvlong off, ulong *boffp) +{ + ulong doff, dlen; + + doff = embedattrsz(f); + dlen = Embedsz - doff; + if(off < dlen){ + *boffp = doff + off; + return 0; + } + off -= dlen; + if(boffp != nil) + *boffp = off%Dblkdatasz; + return off/Dblkdatasz; +} + +static void +updatesize(Memblk *f, uvlong nsize) +{ + Dmeta *d; + + isrwlocked(f, Wr); + f->mf->length = nsize; + d = (Dmeta*)f->d.embed; + d->length = nsize; +} + +/* + * Return a block slice for data in f. + * The slice returned is resized to keep in a single block. + * If there's a hole in the file, Blksl.data == nil && Blksl.len > 0. + * + * If mkit, the data block (and any pointer block crossed) + * is allocated/melted if needed, and the file length updated. + * + * The file must be r/wlocked by the caller, and melted if mkit. + * The block is returned referenced but unlocked, + * (it's still protected by the file lock.) + */ +Blksl +dfslice(Memblk *f, ulong len, uvlong off, int iswr) +{ + Blksl sl; + ulong boff, doff, dlen, bno; + + memset(&sl, 0, sizeof sl); + + if(iswr) + ismelted(f); + else + if(off >= f->mf->length) + goto done; + + doff = embedattrsz(f); + dlen = Embedsz - doff; + + if(off < dlen){ + sl.b = f; + incref(f); + sl.data = f->d.embed + doff + off; + sl.len = dlen - off; + }else{ + bno = (off-dlen) / Dblkdatasz; + boff = (off-dlen) % Dblkdatasz; + + sl.b = dfblk(f, bno, iswr); + if(iswr) + ismelted(sl.b); + if(sl.b != nil) + sl.data = sl.b->d.data + boff; + sl.len = Dblkdatasz - boff; + } + + if(sl.len > len) + sl.len = len; + if(off + sl.len > f->mf->length) + if(iswr) + updatesize(f, off + sl.len); + else + sl.len = f->mf->length - off; +done: + if(sl.b == nil){ + dDprint("slice m%#p[%#ullx:+%#ulx]%c -> 0[%#ulx]\n", + f, off, len, iswr?'w':'r', sl.len); + return sl; + } + if(TAGTYPE(sl.b->d.tag) == DBfile) + dDprint("slice m%#p[%#ullx:+%#ulx]%c -> m%#p:e+%#uld[%#ulx]\n", + f, off, len, iswr?'w':'r', + sl.b, (uchar*)sl.data - sl.b->d.embed, sl.len); + else + dDprint("slice m%#p[%#ullx:+%#ulx]%c -> m%#p:%#uld[%#ulx]\n", + f, off, len, iswr?'w':'r', + sl.b, (uchar*)sl.data - sl.b->d.data, sl.len); + + assert(sl.b->ref > 1); + return sl; +} + +static void +compact(Memblk *d, Dentry *de, u64int off) +{ + Blksl sl; + uvlong lastoff; + Dentry *lastde; + + if(catcherror()) + return; + assert(d->mf->length >= sizeof(Dentry)); + lastoff = d->mf->length - sizeof(Dentry); + if(d->mf->length > sizeof(Dentry) && off < lastoff){ + sl = dfslice(d, sizeof(Dentry), lastoff, 0); + assert(sl.b); + lastde = sl.data; + de->file = lastde->file; + lastde->file = 0; + changed(sl.b); + mbput(sl.b); + } + noerror(); + updatesize(d, lastoff); + changed(d); +} + +/* + * Find a dir entry for addr (perhaps 0 == avail) and change it to + * naddr. If iswr, the entry is allocated if needed and the blocks + * melted on demand. + * Return the offset for the entry in the file or Noaddr + */ +u64int +dfchdentry(Memblk *d, u64int addr, u64int naddr, int iswr) +{ + Blksl sl; + Dentry *de; + uvlong off; + int i; + + dDprint("dfchdentry d%#ullx -> d%#ullx\nin %H\n", addr, naddr, d); + isrwlocked(d, iswr); + isdir(d); + + off = 0; + for(;;){ + sl = dfslice(d, Dblkdatasz, off, iswr); + if(sl.len == 0) + break; + if(sl.b == 0){ + if(addr == 0 && !iswr) + return off; + continue; + } + de = sl.data; + for(i = 0; i < sl.len/sizeof(Dentry); i++){ + if(de[i].file == addr){ + if(naddr != addr){ + if(iswr && naddr == 0) + compact(d, &de[i], off+i*sizeof(Dentry)); + else + de[i].file = naddr; + changed(sl.b); + } + mbput(sl.b); + return off + i*sizeof(Dentry); + } + } + off += sl.len; + mbput(sl.b); + } + if(iswr) + fatal("dfchdentry: bug"); + return Noaddr; +} + +static u64int +dfdirnth(Memblk *d, int n) +{ + Blksl sl; + Dentry *de; + uvlong off; + int i, tot; + + isdir(d); + off = 0; + tot = 0; + for(;;){ + sl = dfslice(d, Dblkdatasz, off, 0); + if(sl.len == 0) + break; + if(sl.b == 0) + continue; + de = sl.data; + for(i = 0; i < sl.len/sizeof(Dentry); i++) + if(de[i].file != 0 && tot++ >= n){ + mbput(sl.b); + dDprint("dfdirnth d%#ullx[%d] = d%#ullx\n", + d->addr, n, de[i].file); + return de[i].file; + } + off += sl.len; + mbput(sl.b); + } + return 0; +} + +static Memblk* +xfchild(Memblk *f, int n, int disktoo) +{ + u64int addr; + Memblk *b; + + addr = dfdirnth(f, n); + if(addr == 0) + return nil; + b = mbget(addr, 0); + if(b != nil || disktoo == 0) + return b; + b = dbget(DBfile, addr); + b->mf->parent = f; + incref(f); + + return b; +} + +Memblk* +dfchild(Memblk *f, int n) +{ + return xfchild(f, n, 1); +} + +Memblk* +mfchild(Memblk *f, int n) +{ + return xfchild(f, n, 0); +} + +/* + * does not dbincref(f) + * caller locks both d and f + */ +void +dflink(Memblk *d, Memblk *f) +{ + ismelted(d); + isdir(d); + + dfchdentry(d, 0, f->addr, Wr); + f->mf->parent = d; + incref(d); + changed(d); +} + +/* + * does not dbdecref(f) + * caller locks both d and f + */ +void +dfunlink(Memblk *d, Memblk *f) +{ + ismelted(d); + isdir(d); + + dfchdentry(d, f->addr, 0, Wr); + if(f->mf->parent == d){ /* f may be shared */ + mbput(f->mf->parent); + f->mf->parent = nil; + } + changed(d); +} + +/* + * Walk to a child and return it referenced. + * If iswr, d must not be frozen and the child is returned melted. + */ +static Memblk* +xdfwalk(Memblk *d, char *name, int iswr) +{ + Memblk *f, *nf; + Blksl sl; + Dentry *de; + uvlong off; + int i; + + dDprint("dfwalk '%s' at %H\n", name, d); + isdir(d); + if(iswr) + ismelted(d); + + off = 0; + for(;;){ + sl = dfslice(d, Dblkdatasz, off, 0); + if(sl.len == 0) + break; + if(sl.b == nil) + continue; + if(catcherror()){ + mbput(sl.b); + error(nil); + } + for(i = 0; i < sl.len/sizeof(Dentry); i++){ + de = sl.data; + de += i; + if(de->file == 0) + continue; + f = dbget(DBfile, de->file); + if(strcmp(f->mf->name, name) != 0){ + mbput(f); + continue; + } + + /* found */ + noerror(); + mbput(sl.b); + if(!iswr || !f->frozen) + goto done; + + /* It's for writing, and frozen: melt it and its ref. */ + if(catcherror()){ + mbput(f); + error(nil); + } + nf = dbdup(f); + if(!catcherror()){ + dbdecref(f->addr); + noerror(); + } + mbput(f); + f = nf; + USED(&f); + sl = dfslice(d, sizeof(Dentry), off+i*sizeof(Dentry), 1); + de = sl.data; + assert(sl.b); + de->file = f->addr; + mbput(sl.b); + noerror(); + changed(d); + goto done; + + } + noerror(); + mbput(sl.b); + off += sl.len; + } + error("file not found"); + +done: + return f; +} + +Memblk* +dfwalk(Memblk *d, char *name, int iswr) +{ + Memblk *x; + + isrwlocked(d, iswr); + if(strcmp(name, "..") == 0){ + x = d->mf->parent; + if(x == nil) + x = d; + incref(x); + }else + x = xdfwalk(d, name, iswr); + return x; +} + + +static char ** +dfrevpath(Memblk *f, int *nnamesp) +{ + Memblk *b, *pb; + char **names; + int nnames; + + isrwlocked(f, Rd); + names = nil; + nnames = 0; + for(b = f; b != nil; b = pb){ + if(b == fs->active || b == fs->archive) + break; + if(nnames%Incr == 0) + names = realloc(names, (nnames+Incr)*sizeof(char*)); + rwlock(b, Rd); + names[nnames++] = strdup(b->mf->name); + pb = b->mf->parent; + rwunlock(b, Rd); + } + *nnamesp = nnames; + return names; +} + +static Memblk* +meltedactive(void) +{ + Memblk *b; + + for(;;){ + b = fs->active; + rwlock(b, Wr); + if(!b->frozen) + break; + rwunlock(b, Wr); + } + ismelted(b); + isrwlocked(b, Wr); + return b; +} + +/* + * Want to write on f, make sure it's melted. + * Return the version of f that we must use, locked for writing and melted. + * (our reference to f is traded for the one returned). + * + * This function exploits that freezing a tree walks from the root down + * to the leaves, and requires an wlock for each file frozen, including active. + * Once active is melted and wlocked, no file can't be frozen after we melt it. + */ +Memblk* +dfmelt(Memblk *f) +{ + char **names; + int nnames, i; + Memblk *b, *nb, *f0, *nf; + + /* + * 0. Try to get a melted version for f. + * Preserve f0 so we keep a ref upon errors. + */ + isfile(f); + f0 = f; + incref(f0); + rwlock(f0, Wr); + while(f->mf->melted != nil){ + incref(f->mf->melted); + nf = f->mf->melted; + mbput(f); + f = nf; + } + rwunlock(f0, Wr); + rwlock(f, Wr); + if(!f->frozen){ + mbput(f0); + return f; + } + rwunlock(f, Wr); + if(catcherror()){ + mbput(f); /* both if f == f0 or f != f0 */ + error(nil); + } + + /* + * 1. travel up to a melted block or to the root, recording + * the names we will have to walk down to reach f. + * TODO: If we find a melted file we could stop there. + */ + dDprint("dfmelt %H\n", f); + rwlock(f, Rd); + names = dfrevpath(f, &nnames); + rwunlock(f, Rd); + if(catcherror()){ + for(i = 0; i < nnames; i++) + free(names[i]); + free(names); + error(nil); + } + + /* + * 2. walk down from active to f, ensuring everything is melted. + * be careful to hold wlocks so that things are not frozen + * again while we walk. + */ + b = meltedactive(); + incref(b); + if(catcherror()){ + rwunlock(b, Wr); + mbput(b); + error(nil); + } + for(i = nnames-1; i >= 0; i--){ + nb = xdfwalk(b, names[i], 1); + rwlock(nb, Wr); + rwunlock(b, Wr); + ismelted(nb); + mbput(b); + b = nb; + USED(&b); /* in case of error() */ + } + noerror(); + noerror(); + noerror(); + for(i = 0; i < nnames; i++) + free(names[i]); + free(names); + + mbput(f0); + + isrwlocked(b, Wr); + ismelted(b); + return b; +} + diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/file.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/file.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,410 @@ +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "fns.h" + +/* + * Interface to handle files. + * see dk.h + */ + +static void +dfchanged(Memblk *f) +{ + isfile(f); + changed(f); + wmtime(f, &f->d.epoch, sizeof f->d.epoch); + watime(f, &f->d.epoch, sizeof f->d.epoch); +} + +static void +dfused(Memblk *f) +{ + u64int t; + + isfile(f); + t = now(); + wmtime(f, &t, sizeof t); +} + + +Memblk* +dfcreate(Memblk *parent, char *name, char *uid, ulong mode) +{ + Memblk *b; + Mfile *m; + + if(fsfull()) + error("file system full"); + + if(parent != nil){ + dDprint("dfcreate '%s' %M at\n%H\n", name, mode, parent); + isdir(parent); + isrwlocked(parent, Wr); + ismelted(parent); + b = dballoc(DBfile); + }else{ + dDprint("dfcreate '%s' %M", name, mode); + b = dballoc(Noaddr); /* root */ + } + if(catcherror()){ + mbput(b); + if(parent != nil) + rwunlock(parent, Wr); + error(nil); + } + + m = b->mf; + m->id = b->d.epoch; + m->mode = mode; + m->mtime = b->d.epoch; + m->length = 0; + m->uid = uid; + m->gid = uid; + m->muid = uid; + m->name = name; + b->d.asize = pmeta(b->d.embed, Embedsz, m); + dfchanged(b); + + if(parent != nil){ + m->gid = parent->mf->uid; + dflink(parent, b); + dfchanged(parent); + } + noerror(); + dDprint("dfcreate-> %H\n", b); + incref(b); /* initial ref for tree; this for caller */ + return b; +} + +void +dfremove(Memblk *p, Memblk *f) +{ + /* funny as it seems, we may need extra blocks to melt */ + if(fsfull()) + error("file system full"); + + isrwlocked(f, Wr); + isrwlocked(p, Wr); + ismelted(p); + if((f->mf->mode&DMDIR) != 0 && f->mf->length > 0) + error("directory not empty"); + incref(p); + if(catcherror()){ + mbput(p); + error(nil); + } + dfunlink(p, f); + /* can't fail now. it's unlinked */ + noerror(); + rwunlock(f, Wr); + if(!catcherror()){ + dfreclaim(f); + noerror(); + } + mbput(f); + mbput(p); +} + +ulong +dfpread(Memblk *f, void *a, ulong count, uvlong off) +{ + Blksl sl; + ulong tot; + char *p; + + p = a; + isrwlocked(f, Rd); + for(tot = 0; tot < count; tot += sl.len){ + sl = dfslice(f, count-tot, off+tot, Rd); + if(sl.len == 0) + break; + if(sl.data == nil){ + memset(p+tot, 0, sl.len); + continue; + } + memmove(p+tot, sl.data, sl.len); + mbput(sl.b); + } + dfused(f); + return tot; +} + +ulong +dfpwrite(Memblk *f, void *a, ulong count, uvlong off) +{ + Blksl sl; + ulong tot; + char *p; + + if(fsfull()) + error("file system full"); + + isrwlocked(f, Wr); + ismelted(f); + p = a; + for(tot = 0; tot < count; tot += sl.len){ + sl = dfslice(f, count-tot, off+tot, Wr); + if(sl.len == 0 || sl.data == nil) + fatal("dfpwrite: bug"); + memmove(sl.data, p+tot, sl.len); + changed(sl.b); + mbput(sl.b); + } + dfchanged(f); + return tot; +} + +/* + * Called only by dfwattr(), for "length", to + * adjust the file data structure before actually + * updating the file length attribute. + * Should return the size in use. + */ + +static int +ptrmap(u64int addr, int nind, Blkf f, int isdisk) +{ + int i; + Memblk *b; + long tot; + + if(addr == 0) + return 0; + if(isdisk) + b = dbget(DBdata+nind, addr); + else{ + b = mbget(addr, 0); + if(b == nil) + return 0; /* on disk */ + } + if(catcherror()){ + mbput(b); + error(nil); + } + tot = 0; + if(f(b) == 0){ + tot++; + if(nind > 0) + for(i = 0; i < Dptrperblk; i++) + tot += ptrmap(b->d.ptr[i], nind-1, f, isdisk); + } + noerror(); + mbput(b); + return tot; +} + +int +dfmap(Memblk *f, Blkf pre, Blkf post, Blkf bf, int isdisk, int lk) +{ + int i; + Memblk *b; + Memblk *(*child)(Memblk*, int); + long tot; + + isfile(f); + rwlock(f, lk); + if(catcherror()){ + rwunlock(f, lk); + error(nil); + } + if(pre != nil && pre(f) < 0){ + noerror(); + rwunlock(f, lk); + return 0; + } + tot = 1; + if(bf != nil){ + for(i = 0; i < nelem(f->d.dptr); i++) + tot += ptrmap(f->d.dptr[i], 0, bf, isdisk); + for(i = 0; i < nelem(f->d.iptr); i++) + tot += ptrmap(f->d.dptr[i], i+1, bf, isdisk); + } + if((f->mf->mode&DMDIR) != 0){ + child = dfchild; + if(!isdisk) + child = mfchild; + for(i = 0; i < f->mf->length/sizeof(Dentry); i++){ + b = child(f, i); + if(b == nil) + continue; + if(!catcherror()){ + tot += dfmap(b, pre, post, bf, isdisk, lk); + noerror(); + } + mbput(b); + } + } + if(post != nil) + post(f); + noerror(); + rwunlock(f, lk); + return tot; +} + +static int +bfreezef(Memblk *b) +{ + if(b->frozen) + return -1; + b->frozen = 1; + return 0; +} + +static int +ffreezef(Memblk *f) +{ + /* see fsfreeze() */ + if(f->frozen && f != fs->active && f != fs->archive) + return -1; + f->frozen = 1; + return 0; +} + +int +dffreeze(Memblk *f) +{ + return dfmap(f, ffreezef, nil, bfreezef, Mem, Wr); +} + +static int +bsyncf(Memblk *b) +{ + if(!b->frozen) + fatal("bsyncf: not frozen\n%H\n", b); + + if(b->dirty) + dbwrite(b); + b->dirty = 0; + return 0; +} + +static int +fsyncf(Memblk *f) +{ + if(f->written) + return -1; + return 0; +} +static int +fsyncedf(Memblk *f) +{ + if((f != fs->archive && !f->frozen) || f->written) + fatal("fsyncf: not frozen or written\n%H\n", f); + if(f->dirty) + dbwrite(f); + f->dirty = 0; + f->written = 1; /* but for errors! */ + return 0; +} + +int +dfsync(Memblk *f) +{ + return dfmap(f, fsyncf, fsyncedf, bsyncf, Mem, Rd); +} + +static int +breclaimf(Memblk *b) +{ + if(catcherror()) + return -1; + if(dbdecref(b->addr) != 0){ + noerror(); + return -1; + } + if(b->ref != 1) + fatal("breclaimf: ref is %d", b->ref); + noerror(); + return 0; +} + +static int +freclaimf(Memblk *f) +{ + if(dbdecref(f->addr) != 0) + return -1; + if(f->ref != 1) + print("freclaimf: ref is %d\n", f->ref); + return 0; +} + +/* + * While reclaiming, we drop disk references from the parent + * to the children, but, in memory, + * the parent is never released before releasing the children, + * so clients holding locks within the reclaimed tree should be safe. + */ +int +dfreclaim(Memblk *f) +{ + return dfmap(f, freclaimf, nil, breclaimf, Disk, Wr); +} + +static int +fdumpf(Memblk *f) +{ + extern int mbtab; + + isfile(f); + print("%H\n", f); + mbtab++; + return 0; +} + +static int +fdumpedf(Memblk *) +{ + extern int mbtab; + + mbtab--; + return 0; +} + +int +dfdump(Memblk *f, int disktoo) +{ + int n; + + incref(f); + n = dfmap(f, fdumpf, fdumpedf, nil, disktoo, No); + decref(f); + return n; +} + +/* + * DEBUG: no locks. + */ +void +dflist(Memblk *f, char *ppath) +{ + char *path; + Mfile *m; + int i; + Memblk *cf; + + m = f->mf; + if(ppath == nil){ + print("/"); + path = strdup(m->name); + }else + path = smprint("%s/%s", ppath, m->name); + print("%-30s\t%M\t%5ulld\t%s mr=%d dr=%ulld\n", + path, (ulong)m->mode, m->length, m->uid, f->ref, dbgetref(f->addr)); + if(m->mode&DMDIR) + for(i = 0; (cf = dfchild(f, i)) != nil; i++){ + dflist(cf, path); + mbput(cf); + } + free(path); + if(ppath == nil) + print("\n"); +} + diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/fns.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/fns.h Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,85 @@ +extern u64int addrofref(u64int refaddr, int idx); +extern void changed(Memblk *b); +extern void checktag(u64int tag, uint type, u64int addr); +extern void clean(Memblk *b); +extern void consprint(char *fmt, ...); +extern long consread(char *buf, long count); +extern long conswrite(char *buf, long count); +extern Memblk* dballoc(uint type); +extern void dbclear(u64int addr, int type); +extern u64int dbdecref(u64int addr); +extern Memblk* dbdup(Memblk *b); +extern int dbgclr(uchar flag); +extern Memblk* dbget(uint type, u64int addr); +extern u64int dbgetref(u64int addr); +extern u64int dbincref(u64int addr); +extern long dbread(Memblk *b); +extern void dbsetref(u64int addr, int ref); +extern long dbwrite(Memblk *b); +extern void dfaccessok(Memblk *f, char *uid, int bits); +extern ulong dfbno(Memblk *f, uvlong off, ulong *boffp); +extern u64int dfchdentry(Memblk *d, u64int addr, u64int naddr, int iswr); +extern Memblk* dfchild(Memblk *f, int n); +extern Memblk* dfcreate(Memblk *parent, char *name, char *uid, ulong mode); +extern void dfdropblks(Memblk *f, ulong bno, ulong bend); +extern int dfdump(Memblk *f, int disktoo); +extern int dffreeze(Memblk *f); +extern void dflink(Memblk *d, Memblk *f); +extern void dflist(Memblk *f, char *ppath); +extern int dfmap(Memblk *f, Blkf pre, Blkf post, Blkf bf, int isdisk, int lk); +extern Memblk* dfmelt(Memblk *f); +extern ulong dfpread(Memblk *f, void *a, ulong count, uvlong off); +extern ulong dfpwrite(Memblk *f, void *a, ulong count, uvlong off); +extern long dfrattr(Memblk *f, char *name, void *val, long count); +extern int dfreclaim(Memblk *f); +extern void dfremove(Memblk *p, Memblk *f); +extern Blksl dfslice(Memblk *f, ulong len, uvlong off, int iswr); +extern int dfsync(Memblk *f); +extern void dfunlink(Memblk *d, Memblk *f); +extern Memblk* dfwalk(Memblk *d, char *name, int iswr); +extern long dfwattr(Memblk *f, char *name, void *val, long nval); +extern void disktohost(Memblk *b); +extern void dupdentries(void *p, int n); +extern ulong embedattrsz(Memblk *f); +extern void fatal(char *fmt, ...); +extern void fsdump(int disktoo); +extern void fsfmt(char *dev); +extern Memblk* fsfreeze(void); +extern int fsfull(void); +extern void fslist(void); +extern int fslowmem(void); +extern void fsopen(char *dev); +extern void fspolicy(void); +extern int fsreclaim(void); +extern void fssync(void); +extern void gmeta(Fmeta *meta, void *buf, ulong nbuf); +extern Memblk* hosttodisk(Memblk *b); +extern void isdir(Memblk *f); +extern void isfile(Memblk *f); +extern void ismelted(Memblk *b); +extern void isnotdir(Memblk *f); +extern void isrwlocked(Memblk *f, int iswr); +extern void listen9p(char *addr); +extern Memblk* mballoc(u64int addr); +extern Memblk* mbdup(Memblk *b); +extern int mbfmt(Fmt *fmt); +extern Memblk* mbget(u64int addr, int mkit); +extern Memblk* mbhash(Memblk *b); +extern void mbput(Memblk *b); +extern void mbunhash(Memblk *b); +extern void meltedref(Memblk *rb); +extern Mfile* mfalloc(void); +extern Memblk* mfchild(Memblk *f, int n); +extern void mffree(Mfile *mf); +extern u64int newblkaddr(void); +extern uvlong now(void); +extern void okaddr(u64int addr); +extern ulong pmeta(void *buf, ulong nbuf, Fmeta *meta); +extern u64int refaddr(u64int addr, int *idx); +extern void rwlock(Memblk *f, int iswr); +extern void rwunlock(Memblk *f, int iswr); +extern void srv9p(char *srv); +extern char* tname(int t); +extern long watime(Memblk *f, void *buf, long); +extern long wmtime(Memblk *f, void *buf, long); +extern long wname(Memblk *f, void *buf, long len); diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/fscmd.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/fscmd.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,402 @@ +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "fns.h" + +enum +{ + Nels = 64 +}; + +static char *fsdir; +static int verb; + +/* + * Walks elems starting at f. + * Ok if nelems is 0. + */ +static Memblk* +walkpath(Memblk *f, char *elems[], int nelems) +{ + int i; + Memblk *f0, *nf; + + isfile(f); + f0 = f; + for(i = 0; i < nelems; i++){ + if((f->mf->mode&DMDIR) == 0) + error("not a directory"); + rwlock(f, Rd); + if(catcherror()){ + if(f != f0) + mbput(f); + rwunlock(f, Rd); + error("walk: %r"); + } + nf = dfwalk(f, elems[i], 0); + rwunlock(f, Rd); + if(f != f0) + mbput(f); + f = nf; + USED(&f); /* in case of error() */ + noerror(); + } + if(f == f0) + incref(f); + return f; +} + +static char* +fsname(char *p) +{ + if(p[0] == '/') + return strdup(p); + if(fsdir) + return smprint("%s/%s", fsdir, p); + return strdup(p); +} + +static Memblk* +walkto(char *a, char **lastp) +{ + char *els[Nels], *path; + int nels; + Memblk *f; + + path = fsname(a); + nels = gettokens(path, els, Nels, "/"); + if(nels < 1){ + free(path); + error("invalid path"); + } + if(catcherror()){ + free(path); + error("walkpath: %r"); + } + if(lastp != nil){ + f = walkpath(fs->root, els, nels-1); + *lastp = a + strlen(a) - strlen(els[nels-1]); + }else + f = walkpath(fs->root, els, nels); + free(path); + noerror(); + if(verb) + print("walked to %H\n", f); + return f; +} + +static void +fscd(int, char *argv[]) +{ + free(fsdir); + fsdir = strdup(argv[1]); +} + +static void +fsput(int, char *argv[]) +{ + int fd; + char *fn; + Memblk *m, *f; + Dir *d; + char buf[4096]; + uvlong off; + long nw, nr; + + fd = open(argv[1], OREAD); + if(fd < 0) + error("open: %r\n"); + d = dirfstat(fd); + if(d == nil){ + error("dirfstat: %r\n"); + } + if(catcherror()){ + close(fd); + free(d); + error(nil); + } + m = walkto(argv[2], &fn); + m = dfmelt(m); + if(catcherror()){ + rwunlock(m, Wr); + mbput(m); + error(nil); + } + f = dfcreate(m, fn, d->uid, d->mode&(DMDIR|0777)); + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + mbput(f); + error(nil); + } + if((d->mode&DMDIR) == 0){ + off = 0; + for(;;){ + nr = read(fd, buf, sizeof buf); + if(nr <= 0) + break; + nw = dfpwrite(f, buf, nr, off); + dDprint("wrote %ld of %ld bytes\n", nw, nr); + off += nr; + } + } + noerror(); + noerror(); + noerror(); + if(verb) + print("created %H\nat %H\n", f, m); + rwunlock(f, Wr); + rwunlock(m, Wr); + mbput(m); + mbput(f); + close(fd); + free(d); +} + +static void +fscat(int, char *argv[]) +{ + Memblk *f; + Mfile *m; + char buf[4096]; + uvlong off; + long nr; + + f = walkto(argv[2], nil); + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + mbput(f); + error(nil); + } + m = f->mf; + print("cat %-30s\t%M\t%5ulld\t%s %ulld refs\n", + m->name, (ulong)m->mode, m->length, m->uid, dbgetref(f->addr)); + if((m->mode&DMDIR) == 0){ + off = 0; + for(;;){ + nr = dfpread(f, buf, sizeof buf, off); + if(nr <= 0) + break; + write(1, buf, nr); + off += nr; + } + } + noerror(); + rwunlock(f, Rd); + mbput(f); +} + +static void +fsget(int, char *argv[]) +{ + Memblk *f; + Mfile *m; + char buf[4096]; + uvlong off; + long nr; + int fd; + + fd = create(argv[1], OWRITE, 0664); + if(fd < 0) + error("create: %r\n"); + if(catcherror()){ + close(fd); + error(nil); + } + f = walkto(argv[2], nil); + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + mbput(f); + error(nil); + } + m = f->mf; + print("get %-30s\t%M\t%5ulld\t%s %ulld refs\n", + m->name, (ulong)m->mode, m->length, m->uid, dbgetref(f->addr)); + if((m->mode&DMDIR) == 0){ + off = 0; + for(;;){ + nr = dfpread(f, buf, sizeof buf, off); + if(nr <= 0) + break; + if(write(fd, buf, nr) != nr){ + fprint(2, "%s: error: %r\n", argv[0]); + break; + } + off += nr; + } + } + close(fd); + rwunlock(f, Rd); + noerror(); + noerror(); + mbput(f); +} + +static void +fsls(int, char**) +{ + if(verb) + fsdump(1); + else + fslist(); +} + +static void +fssnap(int, char**) +{ + fssync(); +} + +static void +fsrcl(int, char**) +{ + fsreclaim(); +} + +static void +fsdmp(int, char**) +{ + fsdump(0); +} + +static void +fsdmpall(int, char**) +{ + fsdump(1); +} + +static void +fsdbg(int, char *argv[]) +{ + dbg['D'] = atoi(argv[1]); +} + +static void +fsout(int, char*[]) +{ + fslowmem(); +} + +static void +fsrm(int, char *argv[]) +{ + Memblk *f, *p; + + f = walkto(argv[1], nil); + if(catcherror()){ + mbput(f); + error(nil); + } + f->mf->parent = dfmelt(f->mf->parent); + p = f->mf->parent; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + rwunlock(p, Wr); + error(nil); + } + dfremove(p, f); + noerror(); + noerror(); + rwunlock(p, Wr); +} + +static void +usage(void) +{ + fprint(2, "usage: %s [-DFLAGS] [-dv] [-f disk] cmd...\n", argv0); + exits("usage"); +} + +static struct +{ + char *name; + void (*f)(int, char**); + int nargs; + char *usage; +} cmds[] = +{ + {"cd", fscd, 2, "cd!where"}, + {"put", fsput, 3, "put!src!dst"}, + {"get", fsget, 3, "get!dst!src"}, + {"cat", fscat, 3, "cat!what"}, + {"ls", fsls, 1, "ls"}, + {"dump", fsdmp, 1, "dump"}, + {"dumpall", fsdmpall, 1, "dumpall"}, + {"snap", fssnap, 1, "snap"}, + {"rcl", fsrcl, 1, "rcl"}, + {"dbg", fsdbg, 2, "dbg!n"}, + {"out", fsout, 1, "out"}, + {"rm", fsrm, 2, "rm!what"}, +}; + +void +threadmain(int argc, char *argv[]) +{ + char *dev; + char *args[Nels]; + int i, j, nargs; + + dev = "disk"; + ARGBEGIN{ + case 'v': + verb++; + break; + case 'f': + dev = EARGF(usage()); + break; + default: + if(ARGC() >= 'A' && ARGC() <= 'Z'){ + dbg['d'] = 1; + dbg[ARGC()] = 1; + }else + usage(); + }ARGEND; + if(argc == 0) + usage(); + fmtinstall('H', mbfmt); + fmtinstall('M', dirmodefmt); + errinit(Errstack); + if(catcherror()) + fatal("error: %r"); + fsopen(dev); + for(i = 0; i < argc; i++){ + if(catcherror()) + fatal("cmd %s: %r", argv[i]); + if(verb>1) + fsdump(0); + print("%% %s\n", argv[i]); + nargs = gettokens(argv[i], args, Nels, "!"); + for(j = 0; j < nelem(cmds); j++){ + if(strcmp(cmds[j].name, argv[i]) != 0) + continue; + if(cmds[j].nargs != 0 && cmds[j].nargs != nargs) + print("usage: %s\n", cmds[j].usage); + else{ + cmds[j].f(nargs, args); + } + break; + } + noerror(); + if(j == nelem(cmds)){ + print("no such command\n"); + for(j = 0; j < nelem(cmds); j++) + print("\t%s\n", cmds[j].usage); + break; + } + } + if(verb>1) + fsdump(0); + noerror(); + exits(nil); +} + diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/fsfmt.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/fsfmt.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "fns.h" + +static void +usage(void) +{ + fprint(2, "usage: %s [-DFLAGS] [-dv]\n", argv0); + exits("usage"); +} + +static char xdbg[256]; +static char zdbg[256]; + +void +threadmain(int argc, char *argv[]) +{ + int verb; + char *dev; + + dev = "disk"; + verb = 0; + ARGBEGIN{ + case 'v': + verb++; + break; + default: + if(ARGC() >= 'A' && ARGC() <= 'Z'){ + xdbg['d'] = 1; + xdbg[ARGC()] = 1; + }else + usage(); + }ARGEND; + if(argc == 1) + dev = argv[0]; + else if(argc > 0) + usage(); + fmtinstall('H', mbfmt); + fmtinstall('M', dirmodefmt); + errinit(Errstack); + if(catcherror()) + fatal("error: %r"); + memmove(dbg, xdbg, sizeof xdbg); + fsfmt(dev); + memmove(dbg, zdbg, sizeof zdbg); + if(verb) + fsdump(0); + else + fslist(); + noerror(); + exits(nil); +} + diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/fsys.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/fsys.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,682 @@ +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "fns.h" + +/* + * All the code assumes outofmemoryexits = 1. + */ + +Fsys *fs; +int fatalaborts = 1; +uvlong maxfsz; + +void +fatal(char *fmt, ...) +{ + va_list arg; + + va_start(arg, fmt); + vfprint(2, fmt, arg); + va_end(arg); + fprint(2, "\n"); + if(fatalaborts) + abort(); + exits("fatal"); +} + +uvlong +now(void) +{ + return nsec(); +} + +void +okaddr(u64int addr) +{ + if(addr < Dblksz || addr >= fs->limit) + error("okaddr %#ullx", addr); +} + +/* + * NO LOCKS. debug only + */ +void +fsdump(int disktoo) +{ + int i, flg; + Memblk *b; + u64int a; + + flg = dbg['D']; + dbg['D'] = 0; + if(fs != nil){ + print("\n\nfsys '%s' limit %#ulx super m%#p root m%#p:\n", + fs->dev, fs->limit, fs->super, fs->root); + print("nblk %uld nablk %uld used %uld free %uld\n", + fs->nblk, fs->nablk, fs->nused, fs->nfree); + print("%H\n", fs->super); + dfdump(fs->root, disktoo); + for(b = fs->refs; b != nil; b = b->next) + print("ref %H\n", b); + if(1) + for(i = 0; i < nelem(fs->fhash); i++) + for(b = fs->fhash[i].b; b != nil; b = b->next) + print("h[%d] = d%#ullx\n", i, b->addr); + + } + b = fs->super; + if(b->d.free != 0){ + print("free:"); + for(a = b->d.free; a != 0; a = dbgetref(a)) + print(" d%#ullx", a); + print("\n"); + } + print("mru:"); + for(b = fs->mru; b != nil; b = b->lnext) + print(" d%#ullx", b->addr); + print("\n"); + print("Fsysmem\t= %uld\n", Fsysmem); + print("Dminfree\t= %d\n", Dminfree); + print("Dblksz\t= %uld\n", Dblksz); + print("Dminattrsz\t= %uld\n", Dminattrsz); + print("Nblkgrpsz\t= %uld\n", Nblkgrpsz); + print("Dblkdatasz\t= %d\n", Dblkdatasz); + print("Embedsz\t= %d\n", Embedsz); + print("Dentryperblk\t= %d\n", Dblkdatasz/sizeof(Dentry)); + print("Dptrperblk\t= %d\n\n", Dptrperblk); + dbg['D'] = flg; +} + +void +fslist(void) +{ + int flg; + + flg = dbgclr('D'); + print("fsys '%s' blksz %ulld maxfsz %ulld:\n", + fs->dev, fs->super->d.dblksz, maxfsz); + dflist(fs->root, nil); + print("\n"); + dbg['D'] = flg; +} + +static usize +disksize(int fd) +{ + Dir *d; + u64int sz; + + d = dirfstat(fd); + if(d == nil) + return 0; + sz = d->length; + free(d); + return sz; +} + +static void +freezerefs(void) +{ + Memblk *rb; + + qlock(&fs->rlk); + for(rb = fs->refs; rb != nil; rb = rb->next) + rb->frozen = 1; + qunlock(&fs->rlk); +} + +static void +writerefs(void) +{ + Memblk *rb; + + qlock(&fs->rlk); + for(rb = fs->refs; rb != nil; rb = rb->next) + meltedref(rb); + qunlock(&fs->rlk); +} + +static Memblk* +readsuper(void) +{ + Memblk *super; + + if(catcherror()){ + error("not a creepy disk: %r"); + error(nil); + } + fs->super = dbget(DBsuper, Dblksz); + super = fs->super; + if(super->d.magic != MAGIC) + error("bad magic number"); + if(super->d.dblksz != Dblksz) + error("bad Dblksz"); + if(super->d.nblkgrpsz != Nblkgrpsz) + error("bad Nblkgrpsz"); + if(super->d.dminattrsz != Dminattrsz) + error("bad Dminattrsz"); + if(super->d.ndptr != Ndptr) + error("bad ndptr"); + if(super->d.niptr != Niptr) + error("bad niptr"); + if(super->d.dblkdatasz != Dblkdatasz) + error("bad Dblkdatasz"); + if(super->d.embedsz != Embedsz) + error("bad Embedsz"); + if(super->d.dptrperblk != Dptrperblk) + error("bad Dptrperblk"); + noerror(); + return super; +} + +static void +freezesuper(void) +{ + Memblk *b; + + b = mbdup(fs->super); + qlock(fs); + b->d = fs->super->d; + assert(fs->fzsuper == nil); + fs->fzsuper = b; + fs->fzsuper->frozen = 1; + qunlock(fs); +} + +static void +writezsuper(void) +{ + if(canqlock(&fs->fzlk)) + fatal("writezsuper: lock"); + assert(fs->fzsuper != nil); + dbwrite(fs->fzsuper); + dDprint("writezsuper: %H\n", fs->fzsuper); + mbput(fs->fzsuper); + fs->fzsuper = nil; +} + +/* + * Write any dirty frozen state after a freeze. + * Only this function and initialization routines + * may write to the disk. + */ +static void +fswrite(void) +{ + qlock(&fs->fzlk); + if(fs->fzsuper == nil) + fatal("can't fswrite if we didn't fsfreeze"); + if(catcherror()){ + qunlock(&fs->fzlk); + error(nil); + } + writerefs(); + dfsync(fs->archive); + writezsuper(); + noerror(); + qunlock(&fs->fzlk); +} + +/* + * Freeze the file tree, keeping active as a new melted file + * that refers to frozen children now in the archive. + * returns the just frozen tree. + * + * This requires two or three free blocks: + * - one free block to dup the new active + * - one to freeze the super block + * - an extra ref block if the new blocks come from a new block group. + */ +Memblk* +fsfreeze(void) +{ + Memblk *na, *oa, *arch; + char name[50]; + + /* call fslowmem? */ + qlock(&fs->fzlk); + if(catcherror()){ + /* + * There was an error during freeze. + * It's better not to continue to prevent disk corruption. + * The user is expected to restart from the last frozen + * version of the tree. + */ + fatal("freeze: %r"); + } + oa = fs->active; + arch = fs->archive; + rwlock(fs->root, Wr); + rwlock(oa, Wr); + rwlock(arch, Wr); + + /* + * move active into /archive/. + */ + seprint(name, name+sizeof(name), "%ulld", oa->d.epoch); + wname(oa, name, strlen(name)+1); + dflink(arch, oa); + + /* 1. Freeze the entire previously active. + */ + rwunlock(oa, Wr); /* race */ + dffreeze(oa); + rwunlock(arch, Wr); + + /* 2. Freeze the on-disk reference counters + * and the state of the super-block. + */ + freezerefs(); + freezesuper(); + + /* 3. Make a new archive and replace the old one. + */ + na = dbdup(oa); + rwlock(na, Wr); + wname(na, "active", strlen("active")+1); + fs->active = na; + dfchdentry(fs->root, oa->addr, na->addr, 1); + + rwunlock(na, Wr); + rwunlock(fs->root, Wr); + qunlock(&fs->fzlk); + noerror(); + return na; +} + +static void +fsinit(char *dev, int nblk) +{ + uvlong fact; + int i; + + maxfsz = Ndptr*Dblkdatasz; + fact = 1; + for(i = 0; i < Niptr; i++){ + maxfsz += Dptrperblk * fact; + fact *= Dptrperblk; + } + + fs = mallocz(sizeof *fs, 1); + fs->dev = strdup(dev); + fs->fd = open(dev, ORDWR); + if(fs->fd < 0) + fatal("can't open disk: %r"); + + fs->nablk = Fsysmem / sizeof(Memblk); + if(nblk > 0 && nblk < fs->nablk) + fs->nablk = nblk; + fs->limit = disksize(fs->fd); + if(fs->nablk > fs->limit/Dblksz) + fs->nablk = fs->limit/Dblksz; + fs->limit = fs->nablk * Dblksz; + if(fs->limit < 10*Dblksz) + fatal("buy a larger disk"); + fs->blk = malloc(fs->nablk * sizeof fs->blk[0]); + dDprint("fsys '%s' init\n", fs->dev); +} + +/* + * / is only in memory. It's `on-disk' address is Noaddr. + * + * /archive is the root on disk. + * /active is allocated on disk, but not on disk. It will be linked into + * /archive as a child in the future. + */ +void +fsfmt(char *dev) +{ + Memblk *super; + + fsinit(dev, 16); /* enough # of blocks for fmt */ + + if(catcherror()) + fatal("fsfmt: error: %r"); + + fs->super = dballoc(DBsuper); + super = fs->super; + super->d.magic = MAGIC; + super->d.eaddr = fs->super->addr + Dblksz; + super->d.dblksz = Dblksz; + super->d.nblkgrpsz = Nblkgrpsz; + super->d.dminattrsz = Dminattrsz; + super->d.ndptr = Ndptr; + super->d.niptr = Niptr; + super->d.dblkdatasz = Dblkdatasz; + super->d.embedsz = Embedsz; + super->d.dptrperblk = Dptrperblk; + fs->root = dfcreate(nil, "", getuser(), DMDIR|0555); + rwlock(fs->root, Wr); + fs->active = dfcreate(fs->root, "active", getuser(), DMDIR|0775); + fs->archive = dfcreate(fs->root, "archive", getuser(), DMDIR|0555); + rwunlock(fs->root, Wr); + super->d.root = fs->archive->addr; + fsfreeze(); + fswrite(); + + noerror(); +} + +void +fssync(void) +{ + /* + * TODO: If active has not changed and we are just going + * to dump a new archive for no change, do nothing. + */ + fsfreeze(); + fswrite(); +} + +/* + * One process per file system, so consume all the memory + * for the cache. + * To open more file systems, use more processes! + */ + +void +fsopen(char *dev) +{ + Memblk *arch; + Memblk *last, *c; + int i; + + if(catcherror()) + fatal("fsopen: error: %r"); + + fsinit(dev, 0); + readsuper(); + + qlock(&fs->fzlk); + fs->root = dfcreate(nil, "", getuser(), DMDIR|0555); + arch = dbget(DBfile, fs->super->d.root); + fs->archive = arch; + rwlock(fs->root, Wr); + rwlock(arch, Wr); + last = nil; + for(i = 0; (c = dfchild(arch, i)) != nil; i++){ + if(last == nil || last->d.epoch < c->d.epoch){ + mbput(last); + last = c; + incref(c); + } + mbput(c); + } + if(last != nil){ + rwlock(last, Rd); + fs->active = dbdup(last); + wname(fs->active, "active", strlen("active")+1); + rwlock(fs->active, Wr); + dflink(fs->root, fs->active); + rwunlock(fs->active, Wr); + rwunlock(last, Rd); + mbput(last); + }else + fs->active = dfcreate(fs->root, "active", getuser(), DMDIR|0775); + dflink(fs->root, arch); + rwunlock(arch, Wr); + fs->cons = dfcreate(fs->root, "cons", getuser(), DMEXCL|600); + fs->consc = chancreate(sizeof(char*), 256); + rwunlock(fs->root, Wr); + qunlock(&fs->fzlk); + noerror(); +} + +static uvlong +fsmemfree(void) +{ + uvlong nfree; + + qlock(fs); + nfree = fs->nablk - fs->nblk; + nfree += fs->nfree; + qunlock(fs); + return nfree; +} + +/* + * This should be called if fs->nblk == fs->nablk && fs->nfree < some number. + */ +int +fslowmem(void) +{ + int type; + ulong n, tot; + Memblk *b, *bprev; + + if(fsmemfree() > Mminfree) + return 0; + + /* + * We are low on memory, try to make a snapshot so that + * dirty blocks are moved to disk and we can release them if we want. + */ + dDprint("low on memory: syncing\n"); + fssync(); + + tot = 0; + do{ + if(fsmemfree() > Mmaxfree) + break; + qlock(&fs->fzlk); + if(catcherror()){ + qunlock(&fs->fzlk); + fprint(2, "%s: fslowmem: %r\n", argv0); + break; + } + n = 0; + for(b = fs->lru; b != nil && tot < Mmaxfree; b = bprev){ + bprev = b->lprev; + type = TAGTYPE(b->d.tag); + switch(type){ + case DBsuper: + case DBref: + dDprint("out: ignored: %H\n", b); + continue; + case DBfile: + if(b == fs->root || b == fs->active || b == fs->archive){ + dDprint("out: ignored: %H\n", b); + continue; + } + break; + } + if(b->dirty || b->ref > 1){ + dDprint("out: ignored: %H\n", b); + continue; + } + /* + * Blocks have one ref because of the hash table. + * Those that have exactly 1 ref are not used: + * we have a clean unused block: throw it away. + */ + dDprint("block out: m%#p d%#ullx\n", b, b->addr); + mbput(b); + n++; + tot++; + } + noerror(); + qunlock(&fs->fzlk); + }while(n > 0); + if(tot == 0) + fprint(2, "%s: out: everything in use or dirty.\n", argv0); + else + dDprint("out: %uld blocks\n", tot); + return 1; +} + +static uvlong +fsdiskfree(void) +{ + uvlong nfree; + + qlock(fs); + nfree = fs->super->d.nfree; + nfree += (fs->limit - fs->super->d.eaddr)/Dblksz; + qunlock(fs); + return nfree; +} + +/* + * Freeze requires 3 free blocks, but we declare the fs full + * when less that Dzerofree are avail, to prevent freeze from + * failing should we made a mistake counting 1, 2, 3. + */ +int +fsfull(void) +{ + return fsdiskfree() < Dzerofree; +} + +/* + * This should be called if fs->super->d.nfree < some number. + */ +int +fsreclaim(void) +{ + Memblk *arch, *c, *victim; + int i; + u64int addr; + Blksl sl; + Dentry *de; + ulong n, tot; + + if(fsdiskfree() > Dminfree) + return 0; + + qlock(&fs->fzlk); + arch = fs->archive; + rwlock(arch, Wr); + if(catcherror()){ + rwunlock(arch, Wr); + qunlock(&fs->fzlk); + error(nil); + } + tot = 0; + for(;;){ + if(fsdiskfree() > Dmaxfree){ + dDprint("fsreclaim: got >= %d free\n", Dmaxfree); + break; + } + dDprint("fsreclaim: reclaiming\n"); + victim = nil; + for(i = 0; (c = dfchild(arch, i)) != nil; i++){ + if(victim == nil) + victim = c; + else if(victim->d.epoch > c->d.epoch){ + mbput(victim); + victim = c; + }else + mbput(c); + + } + if(i < 2){ + mbput(victim); + dDprint("nothing to reclaim\n"); + break; + } + fprint(2, "%s: reclaiming /archive/%s\n", argv0, victim->mf->name); + dDprint("victim is %H\n", victim); + + /* + * Don't make a new archive. Edit in-place the one we have to + * clear the reference to the victim. + */ + addr = dfchdentry(arch, victim->addr, 0, 0); + assert(addr != Noaddr); + sl = dfslice(arch, sizeof(Dentry), addr, 0); + assert(sl.b); + if(catcherror()){ + mbput(sl.b); + error(nil); + } + de = sl.data; + de->file = 0; + dbwrite(sl.b); + noerror(); + mbput(sl.b); + + n = dbgetref(victim->addr); + if(n != 1) + fatal("reclaim: victim disk ref is %d != 1", n); + + fs->super->d.root = fs->archive->addr; + + n = dfreclaim(victim); + mbput(victim); + dDprint("%uld block%s reclaimed\n", n, n?"s":""); + tot += n; + + freezerefs(); + writerefs(); + freezesuper(); + writezsuper(); + } + if(tot > 0) + fprint(2, "%s: %uld block%s reclaimed\n", argv0, tot, tot?"s":""); + rwunlock(arch, Wr); + qunlock(&fs->fzlk); + noerror(); + return 1; +} + +void +fspolicy(void) +{ + /* + * If low on memory, move some blocks out. + * Otherwise, reclaim old snapshots if low on disk. + */ + if(!fslowmem()) + fsreclaim(); +} + +void +consprint(char *fmt, ...) +{ + va_list arg; + char *s, *x; + + va_start(arg, fmt); + s = vsmprint(fmt, arg); + va_end(arg); + /* consume some message if the channel is full */ + while(nbsendp(fs->consc, s) == 0) + if((x = nbrecvp(fs->consc)) != nil) + free(x); +} + +long +consread(char *buf, long count) +{ + char *s; + + s = recvp(fs->consc); + if(count > strlen(s)) + count = strlen(s); + memmove(buf, s, count); + free(s); + return count; +} + +/* + * XXX: conswrite should take a look to the command and process it, + * the reply must be issued by calling consprint(), + * the writer should be also reading the file. + */ +long +conswrite(char *buf, long count) +{ + if(count <= 1) + return 0; + buf[count-1] = 0; + consprint("??\n"); + return count; +} diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/mblk.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/mblk.c Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,464 @@ +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "fns.h" + +/* + * memory blocks. + * see dk.h + */ + +/* + * For simplicity, functions in mblk.c do not raise errors. + * (debug dump functions may be an exception). + */ + +char* +tname(int t) +{ + static char*nms[] = { + [DBfree] "DBfree", + [DBnew] "DBnew", + [DBsuper] "DBsuper", + [DBref] "DBref", + [DBdata] "DBdata", + [DBattr] "DBattr", + [DBfile] "DBfile", + [DBptr0] "DBptr0", + [DBptr0+1] "DBptr1", + [DBptr0+2] "DBptr2", + [DBptr0+3] "DBptr3", + [DBptr0+4] "DBptr4", + [DBptr0+5] "DBptr5", + [DBptr0+6] "DBptr6", + }; + + if(t < 0 || t >= nelem(nms)) + return "BADTYPE"; + return nms[t]; +} + +#define EP(e) ((e)&0xFFFFFFFFUL) +/* + * NO LOCKS. debug only + */ +static void +fmttab(Fmt *fmt, int t) +{ + if(t-- > 0) + fmtprint(fmt, "\t"); + while(t-- > 0) + fmtprint(fmt, " "); +} +int mbtab; +static void +fmtptr(Fmt *fmt, u64int addr, char *tag, int n) +{ + Memblk *b; + + if(addr == 0) + return; + b = mbget(addr, 0); + if(b == nil){ + fmttab(fmt, mbtab); + fmtprint(fmt, " %s[%d] = d%#ullx \n", tag, n, addr); + }else{ + decref(b); + fmtprint(fmt, "%H", b); + } +} +static void +dumpsomedata(Fmt *fmt, Memblk *b) +{ + long doff; + u64int *p; + int i; + + if(b->mf->length == 0) + return; + doff = embedattrsz(b); + if(doff < Embedsz){ + fmttab(fmt, mbtab); + p = (u64int*)(b->d.embed+doff); + for(i = 0; i < 5 && (uchar*)p < b->d.embed+Embedsz - BIT64SZ; i++) + fmtprint(fmt, "%s%#ullx", i?" ":" data: ", *p++); + fmtprint(fmt, "\n"); + } +} + +int +mbfmt(Fmt *fmt) +{ + Memblk *b; + int type, i, n, xdbg; + + b = va_arg(fmt->args, Memblk*); + if(b == nil) + return fmtprint(fmt, "\n"); + type = TAGTYPE(b->d.tag); + fmttab(fmt, mbtab); + xdbg = dbg['D']; + dbg['D'] = 0; + fmtprint(fmt, "m%#p d%#ullx", b, b->addr); + if(b->frozen) + fmtprint(fmt, " FZ"); + if(b->dirty) + fmtprint(fmt, " DT"); + if(b->written) + fmtprint(fmt, " WR"); + fmtprint(fmt, " %s r%d", tname(type), b->ref); + fmtprint(fmt, " tag %#ullx epoch %#ullx", EP(b->d.tag), EP(b->d.epoch)); + switch(type){ + case DBfree: + fmtprint(fmt, "\n"); + break; + case DBdata: + case DBattr: + fmtprint(fmt, " dr=%ulld\n", dbgetref(b->addr)); + break; + case DBref: + fmtprint(fmt, " rnext m%#p", b->rnext); + for(i = n = 0; i < Drefperblk; i++) + if(b->d.ref[i]){ + if(n++%4 == 0){ + fmtprint(fmt, "\n"); + fmttab(fmt, mbtab); + } + fmtprint(fmt, " "); + fmtprint(fmt, "[%d]d%#ullx=%#ullx", + i, addrofref(b->addr, i), b->d.ref[i]); + } + if(n == 0 || --n%4 != 0) + fmtprint(fmt, "\n"); + break; + case DBfile: + fmtprint(fmt, " dr=%ulld\n", dbgetref(b->addr)); + if(b->mf == nil){ + fmtprint(fmt, " no mfile\n"); + break; + } + fmttab(fmt, mbtab); + fmtprint(fmt, " '%s' asz %#ullx aptr %#ullx melted m%#p\n", + b->mf->name, b->d.asize,b->d.aptr, b->mf->melted); + fmttab(fmt, mbtab); + fmtprint(fmt, " id %#ullx mode %M mt %#ullx sz %#ullx '%s'\n", + EP(b->mf->id), (ulong)b->mf->mode, EP(b->mf->mtime), + b->mf->length, b->mf->uid); + fmttab(fmt, mbtab); + fmtprint(fmt, " parent m%#p nr%d nw%d\n", + b->mf->parent, b->mf->readers, b->mf->writer); + dumpsomedata(fmt, b); + mbtab++; + for(i = 0; i < nelem(b->d.dptr); i++) + fmtptr(fmt, b->d.dptr[i], "d", i); + for(i = 0; i < nelem(b->d.iptr); i++) + fmtptr(fmt, b->d.iptr[i], "i", i); + mbtab--; + break; + case DBsuper: + fmtprint(fmt, "\n"); + fmttab(fmt, mbtab); + fmtprint(fmt, " free d%#ullx eaddr d%#ullx root d%#ullx\n", + b->d.free, b->d.eaddr, b->d.root); + break; + default: + if(type < DBptr0 || type >= DBptr0+Niptr) + fatal("", type); + fmtprint(fmt, " dr=%ulld\n", dbgetref(b->addr)); + mbtab++; + for(i = 0; i < Dptrperblk; i++) + fmtptr(fmt, b->d.ptr[i], "p", i); + mbtab--; + break; + } + dbg['D'] = xdbg; + return 0; +} + +void +clean(Memblk *b) +{ + b->dirty = 0; +} + +void +ismelted(Memblk *b) +{ + if(b != fs->archive && b->frozen) + fatal("frozen at pc %#p", getcallerpc(&b)); +} + +void +changed(Memblk *b) +{ + if(TAGTYPE(b->d.tag) != DBsuper) + ismelted(b); + b->d.epoch = now(); + b->dirty = 1; + b->written = 0; +} + +static void +lruunlink(Memblk *b) +{ + if(b->lprev != nil) + b->lprev->lnext = b->lnext; + else + fs->mru = b->lnext; + if(b->lnext != nil) + b->lnext->lprev = b->lprev; + else + fs->lru = b->lprev; + b->lnext = nil; + b->lprev = nil; +} + + +static void +lrulink(Memblk *b) +{ + b->lnext = fs->mru; + b->lprev = nil; + if(fs->mru) + fs->mru->lprev = b; + else + fs->lru = b; + fs->mru = b; +} + +static void +mbused(Memblk *b) +{ + qlock(&fs->llk); + lruunlink(b); + lrulink(b); + qunlock(&fs->llk); +} + +static void +linkblock(Memblk *b) +{ + if(TAGTYPE(b->d.tag) == DBref){ + qlock(fs); + b->rnext = fs->refs; + fs->refs = b; + qunlock(fs); + } + qlock(&fs->llk); + lrulink(b); + qunlock(&fs->llk); +} + +Memblk* +mbhash(Memblk *b) +{ + Memblk **h, *ob; + uint hv; + + hv = b->addr%nelem(fs->fhash); + qlock(&fs->fhash[hv]); + fs->nused++; + ob = nil; + for(h = &fs->fhash[hv].b; *h != nil; h = &(*h)->next) + if((*h)->addr == b->addr) + fatal("mbhash: dup"); + *h = b; + if(b->next != nil) + fatal("mbhash: next"); + incref(b); + linkblock(b); + + qunlock(&fs->fhash[hv]); + mbput(ob); + return b; +} + +void +mbunhash(Memblk *b) +{ + Memblk **h; + uint hv; + + if(TAGTYPE(b->d.tag) == DBref) + fatal("mbunhash: DBref"); + + hv = b->addr%nelem(fs->fhash); + qlock(&fs->fhash[hv]); + for(h = &fs->fhash[hv].b; *h != nil; h = &(*h)->next) + if((*h)->addr == b->addr){ + if(*h != b) + fatal("mbunhash: dup"); + *h = b->next; + b->next = nil; + fs->nused--; + qlock(&fs->llk); + lruunlink(b); + qunlock(&fs->llk); + qunlock(&fs->fhash[hv]); + return; + } + fatal("mbunhash: not found"); +} + +static void +mbfree(Memblk *b) +{ + Mfile *mf; + + if(b == nil) + return; + dDprint("mbfree %H\n", b); + if(b->ref > 0) + fatal("mbfree: has %d refs", b->ref); + if(b->next != nil) + fatal("mbfree: has next"); + + if(TAGTYPE(b->d.tag) != DBsuper) + mbunhash(b); + /* this could panic, but errors reading a block might cause it */ + if(TAGTYPE(b->d.tag) == DBref) + fprint(2, "%s: free of DBref. i/o errors?\n", argv0); + + if(TAGTYPE(b->d.tag) == DBfile && b->mf != nil){ + mf = b->mf; + b->mf = nil; + mbput(mf->melted); + mf->melted = nil; + mbput(mf->parent); + mf->parent = nil; + mf->next = nil; + assert(mf->writer == 0 && mf->readers == 0); + mffree(mf); + } + b->d.tag = DBfree; + b->frozen = b->written = b->dirty = 0; + b->addr = 0; + + qlock(fs); + fs->nfree++; + b->next = fs->free; + fs->free = b; + qunlock(fs); +} + +Memblk* +mballoc(u64int addr) +{ + Memblk *b; + + b = nil; + qlock(fs); + if(fs->nblk < fs->nablk) + b = &fs->blk[fs->nblk++]; + else if(fs->free != nil){ + b = fs->free; + fs->free = b->next; + fs->nfree--; + }else{ + qunlock(fs); + fatal("mballoc: evict block not implemented"); + } + qunlock(fs); + memset(b, 0, sizeof *b); + b->addr = addr; + b->ref = 1; + dDprint("mballoc %#ullx -> %H", addr, b); + return b; +} + +Memblk* +mbget(u64int addr, int mkit) +{ + Memblk *b; + uint hv; + + hv = addr%nelem(fs->fhash); + qlock(&fs->fhash[hv]); + for(b = fs->fhash[hv].b; b != nil; b = b->next) + if(b->addr == addr){ + incref(b); + break; + } + if(mkit) + if(b == nil){ + b = mballoc(addr); + b->d.tag = TAG(addr, DBnew); + b->next = fs->fhash[hv].b; + fs->fhash[hv].b = b; + incref(b); + linkblock(b); + qlock(&b->newlk); /* make others wait for it */ + }else if(TAGTYPE(b->d.tag) == DBnew){ + qunlock(&fs->fhash[hv]); + qlock(&b->newlk); /* wait for it */ + qunlock(&b->newlk); + if(TAGTYPE(b->d.tag) == DBnew){ + mbput(b); + dDprint("mbget %#ullx -> i/o error\n", addr); + return nil; /* i/o error reading it */ + } + dDprint("mbget %#ullx -> waited for m%#p\n", addr, b); + return b; + } + qunlock(&fs->fhash[hv]); + if(b != nil) + mbused(b); + dDprint("mbget %#ullx -> m%#p\n", addr, b); + return b; +} + +void +mbput(Memblk *b) +{ + if(b == nil) + return; + dDprint("mbput m%#p pc=%#p\n", b, getcallerpc(&b)); + if(decref(b) == 0) + mbfree(b); +} + +Memblk* +mbdup(Memblk *b) +{ + Memblk *nb; + + nb = mballoc(b->addr); + memmove(&nb->d, &b->d, sizeof b->d); + return nb; +} + +Mfile* +mfalloc(void) +{ + Mfile *mf; + + qlock(&fs->mlk); + mf = fs->mfree; + if(mf != nil){ + fs->mfree = mf->next; + mf->next = nil; + } + qunlock(&fs->mlk); + if(mf == nil) + mf = mallocz(sizeof *mf, 1); + return mf; +} + +void +mffree(Mfile *mf) +{ + if(mf == nil) + return; + qlock(&fs->mlk); + mf->next = fs->mfree; + fs->mfree = mf; + qunlock(&fs->mlk); +} + diff -r a8d2211aa14a -r f1f2a23bbcce sys/src/cmd/creepy/mkfile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/cmd/creepy/mkfile Fri Feb 17 17:38:42 2012 +0000 @@ -0,0 +1,32 @@ +fns.h