Reference: /n/patches.lsub.org/patch/creep Date: Fri Apr 13 13:00:23 CES 2012 Signed-off-by: nemo@lsub.org --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,774 @@ +#include "all.h" + +/* + * 9p server for creepy + */ + +static void rflush(Rpc*), rversion(Rpc*), rauth(Rpc*), + rattach(Rpc*), rwalk(Rpc*), + ropen(Rpc*), rcreate(Rpc*), + rread(Rpc*), rwrite(Rpc*), rclunk(Rpc*), + rremove(Rpc*), rstat(Rpc*), rwstat(Rpc*); + +static void (*fcalls[])(Rpc*) = +{ + [Tversion] rversion, + [Tflush] rflush, + [Tauth] rauth, + [Tattach] rattach, + [Twalk] rwalk, + [Topen] ropen, + [Tcreate] rcreate, + [Tread] rread, + [Twrite] rwrite, + [Tclunk] rclunk, + [Tremove] rremove, + [Tstat] rstat, + [Twstat] rwstat, +}; + +char* +ninestats(char *s, char *e, int clr, int verb) +{ + int i; + + s = seprint(s, e, "fids:\t%4uld alloc %4uld free (%4uld bytes)\n", + fidalloc.nalloc, fidalloc.nfree, fidalloc.elsz); + s = seprint(s, e, "rpcs:\t%4uld alloc %4uld free (%4uld bytes)\n", + rpcalloc.nalloc, rpcalloc.nfree, rpcalloc.elsz); + s = seprint(s, e, "clis:\t%4uld alloc %4uld free (%4uld bytes)\n", + clialloc.nalloc, clialloc.nfree, clialloc.elsz); + if(verb == 0) + return s; + for(i = 0; i < nelem(fcalls); i++) + if(fcalls[i] != nil && ncalls[i] > 0){ + s = seprint(s, e, "%-8s\t%5uld calls\t%11ulld µs per call\n", + callname[i], ncalls[i], + (calltime[i]/ncalls[i])/1000); + if(clr){ + ncalls[i] = 0; + calltime[i] = 0; + } + } + return s; +} + +/* + * Ok if f is nil, for auth files. + */ +static Qid +mkqid(Memblk *f) +{ + Qid q; + static uvlong authgen; + + if(f == nil){ + authgen++; + q.type = QTAUTH; + q.path = authgen; + q.vers = 0; + return q; + } + + q.path = f->d.id; + q.vers = f->d.mtime; + q.type = 0; + if(f->d.mode&DMDIR) + q.type |= QTDIR; + if(f->d.mode&DMTMP) + q.type |= QTTMP; + if(f->d.mode&DMAPPEND) + q.type |= QTAPPEND; + if(f->d.mode&DMEXCL) + q.type |= QTEXCL; + if((q.type&QTEXCL) == 0) + q.type |= QTCACHE; + return q; +} + +static void +rversion(Rpc *rpc) +{ + rpc->r.msize = rpc->t.msize; + if(rpc->r.msize > Maxmdata) + rpc->r.msize = Maxmdata; + rpc->cli->msize = rpc->r.msize; + if(strncmp(rpc->t.version, "9P2000", 6) != 0) + error("unknown protocol version"); + rpc->r.version = "9P2000"; +} + +/* + * Served in the main client process. + */ +static void +rflush(Rpc *rpc) +{ + Cli *cli; + Rpc *r; + + cli = rpc->cli; + xqlock(&cli->wlk); /* nobody replies now */ + xqlock(&rpc->cli->rpclk); + for(r = rpc->cli->rpcs; r != nil; r = r->next) + if(r->t.tag == rpc->t.oldtag) + break; + if(r != nil){ + r->flushed = 1; + if(r->t.type == Tread && r->fid->consopen) + consprint(""); /* in case it's waiting... */ + } + xqunlock(&rpc->cli->rpclk); + xqunlock(&cli->wlk); +} + +static void +rauth(Rpc *rpc) +{ + Fid *fid; + static char spec[] = "proto=p9any role=server"; + + if(noauth) + error("no auth required"); + + fid = newfid(rpc->cli, rpc->t.afid); + rpc->fid = fid; + + setfiduid(fid, rpc->t.uname); + + fid->omode = ORDWR; + fid->afd = open("/mnt/factotum/rpc", ORDWR); + if(fid->afd < 0) + error("factotum: %r"); + fid->rpc = auth_allocrpc(fid->afd); + if(fid->rpc == nil){ + close(fid->afd); + error("auth rpc: %r"); + } + if(auth_rpc(fid->rpc, "start", spec, strlen(spec)) != ARok){ + auth_freerpc(fid->rpc); + close(fid->afd); + error("auth_rpc start failed"); + } + rpc->r.qid = mkqid(nil); + d9print("factotum rpc started\n"); +} + +static long +xauthread(Fid *fid, long count, void *data) +{ + AuthInfo *ai; + + switch(auth_rpc(fid->rpc, "read", nil, 0)){ + case ARdone: + ai = auth_getinfo(fid->rpc); + if(ai == nil) + error("authread: info: %r"); + auth_freeAI(ai); + d9print("auth: %s: ok\n", usrname(fid->uid)); + fid->authok = 1; + return 0; + case ARok: + if(count < fid->rpc->narg) + error("authread: count too small"); + count = fid->rpc->narg; + memmove(data, fid->rpc->arg, count); + return count; + } + error("authread: phase error"); + return -1; +} + +static void +rattach(Rpc *rpc) +{ + Fid *fid, *afid; + Path *p; + Memblk *f; + char buf[ERRMAX]; + + fid = newfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + afid = nil; + if(!noauth){ + afid = getfid(rpc->cli, rpc->t.afid); + if(catcherror()){ + putfid(afid); + error(nil); + } + if(afid->rpc == nil) + error("afid is not an auth fid"); + if(afid->authok == 0) + xauthread(afid, 0, buf); + } + fidattach(fid, rpc->t.aname, rpc->t.uname); + if(!noauth){ + if(fid->uid != afid->uid) + error("auth uid mismatch"); + noerror(); + putfid(afid); + } + p = fid->p; + f = p->f[p->nf-1]; + rwlock(f, Rd); + rpc->r.qid = mkqid(f); + rwunlock(f, Rd); + + if(rpc->cli->uid == -1) + rpc->cli->uid = rpc->fid->uid; +} + +static void +rwalk(Rpc *rpc) +{ + Fid *fid, *nfid; + Path *p; + Memblk *nf; + int i; + + rpc->fid = getfid(rpc->cli, rpc->t.fid); + fid = rpc->fid; + if(rpc->t.fid == rpc->t.newfid && rpc->t.nwname > 1) + error("can't walk like a clone without one"); + nfid = nil; + if(rpc->t.fid != rpc->t.newfid) + nfid = fidclone(rpc->cli, rpc->fid, rpc->t.newfid); + if(catcherror()){ + putfid(nfid); + putfid(nfid); /* clunk */ + error(nil); + } + rpc->r.nwqid = 0; + for(i=0; i < rpc->t.nwname; i++){ + if(catcherror()){ + if(rpc->r.nwqid == 0) + error(nil); + break; + } + fidwalk(nfid, rpc->t.wname[i]); + noerror(); + p = nfid->p; + nf = p->f[p->nf-1]; + rwlock(nf, Rd); + rpc->r.wqid[i] = mkqid(nf); + rwunlock(nf, Rd); + rpc->r.nwqid++; + USED(rpc->r.nwqid); /* damn error()s */ + } + if(i < rpc->t.nwname){ + putfid(nfid); + putfid(nfid); /* clunk */ + }else{ + putfid(fid); + rpc->fid = nfid; + } + noerror(); +} + +static void +ropen(Rpc *rpc) +{ + Fid *fid; + Memblk *f; + + rpc->fid = getfid(rpc->cli, rpc->t.fid); + fid = rpc->fid; + + if(fid->rpc != nil) /* auth fids are always open */ + return; + + rpc->r.iounit = rpc->cli->msize - IOHDRSZ; + fidopen(rpc->fid, rpc->t.mode); + f = fid->p->f[fid->p->nf-1]; + rwlock(f, Rd); + rpc->r.qid = mkqid(f); + rwunlock(f, Rd); +} + +static void +rcreate(Rpc *rpc) +{ + Fid *fid; + Path *p; + Memblk *f; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + if(fid->rpc != nil) + error("create on auth fid"); + + fidcreate(fid, rpc->t.name, rpc->t.mode, rpc->t.perm); + p = fid->p; + f = p->f[p->nf-1]; + rwlock(f, Rd); + rpc->r.qid = mkqid(f); + rwunlock(f, Rd); + rpc->r.iounit = rpc->cli->msize-IOHDRSZ; +} + +static ulong +pack9dir(Memblk *f, uchar *buf, int nbuf) +{ + Dir d; + + nulldir(&d); + d.name = f->mf->name; + d.qid = mkqid(f); + d.mode = f->d.mode; + d.length = f->d.length; + if(d.mode&DMDIR) + d.length = 0; + d.uid = f->mf->uid; + d.gid = f->mf->gid; + d.muid = f->mf->muid; + d.atime = f->d.atime; + d.mtime = f->d.mtime / NSPERSEC; + return convD2M(&d, buf, nbuf); +} + +static void +authread(Rpc *rpc) +{ + Fid *fid; + + fid = rpc->fid; + if(fid->rpc == nil) + error("authread: not an auth fid"); + rpc->r.data = (char*)rpc->data; + rpc->r.count = xauthread(fid, rpc->t.count, rpc->r.data); + putfid(fid); + rpc->fid = nil; +} + +static void +rread(Rpc *rpc) +{ + Fid *fid; + vlong off; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + if(fid->rpc != nil){ + authread(rpc); + return; + } + if(rpc->t.count > rpc->cli->msize-IOHDRSZ) + rpc->r.count = rpc->cli->msize-IOHDRSZ; + rpc->r.data = (char*)rpc->data; + off = rpc->t.offset; + rpc->r.count = fidread(fid, rpc->r.data, rpc->t.count, off, pack9dir); + +} + +static void +authwrite(Rpc *rpc) +{ + Fid *fid; + + fid = rpc->fid; + if(fid->rpc == nil) + error("authwrite: not an auth fid"); + if(auth_rpc(fid->rpc, "write", rpc->t.data, rpc->t.count) != ARok) + error("authwrite: %r"); + rpc->r.count = rpc->t.count; + putfid(fid); + rpc->fid = nil; +} + +static void +rwrite(Rpc *rpc) +{ + Fid *fid; + uvlong off; + + if(rpc->t.offset < 0) + error("negative offset"); + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + if(fid->rpc != nil){ + authwrite(rpc); + return; + } + off = rpc->t.offset; + rpc->r.count = fidwrite(fid, rpc->t.data, rpc->t.count, &off); +} + +static void +rclunk(Rpc *rpc) +{ + Fid *fid; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + if(fid->rpc != nil){ + fid->omode = -1; + if(fid->rpc != nil) + auth_freerpc(fid->rpc); + fid->rpc = nil; + close(fid->afd); + fid->afd = -1; + }else if(fid->omode != -1) + fidclose(fid); + putfid(fid); + putfid(fid); + rpc->fid = nil; +} + +static void +rremove(Rpc *rpc) +{ + Fid *fid; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + if(fid->rpc != nil) + error("remove on auth fid"); + if(catcherror()){ + dEprint("clunking %X:\n\t%r\n", fid); + putfid(fid); + putfid(fid); + rpc->fid = nil; + error(nil); + } + + fidremove(fid); + noerror(); + dEprint("clunking %X\n\n", fid); + putfid(fid); + putfid(fid); + rpc->fid = nil; +} + +static void +rstat(Rpc *rpc) +{ + Fid *fid; + Memblk *f; + Path *p; + + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + if(fid->rpc != nil) + error("stat on auth fid"); + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + p = lastpath(&fid->p, fid->p->nf); + f = p->f[p->nf-1]; + rwlock(f, Rd); + noerror(); + xqunlock(fid); + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + rpc->r.stat = rpc->data; + rpc->r.nstat = pack9dir(f, rpc->data, rpc->cli->msize-IOHDRSZ); + if(rpc->r.nstat <= 2) + fatal("rstat: convD2M"); + noerror(); + rwunlock(f, Rd); +} + +static void +rwstat(Rpc *rpc) +{ + Fid *fid; + Memblk *f; + Path *p; + Dir sd; + u64int n; + + n = convM2D(rpc->t.stat, rpc->t.nstat, &sd, (char*)rpc->t.stat); + if(n != rpc->t.nstat) + error("convM2D: bad stat"); + fid = getfid(rpc->cli, rpc->t.fid); + rpc->fid = fid; + if(fid->rpc != nil) + error("wstat on auth fid"); + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + if(fs->worm) + error("read only file system"); + if(writedenied(fid->uid)) + error("user can't write"); + p = fid->p; + f = p->f[p->nf-1]; + if(fid->archived || isro(f)) + error("can't wstat archived or built-in files"); + p = meltedpath(&fid->p, fid->p->nf, 1); + f = p->f[p->nf-1]; + noerror(); + xqunlock(fid); + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + + if(f->d.mode&DMUSERS) + error("can't wstat the users file"); + if(sd.length != ~0 && sd.length != f->d.length){ + if(f->d.mode&DMDIR) + error("can't resize a directory"); + if(sd.length != 0) + error("can't truncate to non-zero length"); + dfaccessok(f, fid->uid, AWRITE); + }else + sd.length = ~0; + + if(sd.name[0] && strcmp(f->mf->name, sd.name) != 0){ + if(isro(f) || f == fs->active) + error("can't rename built-in files"); + dfaccessok(p->f[p->nf-2], fid->uid, AWRITE); + if(!catcherror()){ + mbput(dfwalk(p->f[p->nf-2], sd.name)); + error("file already exists"); + } + }else + sd.name[0] = 0; + + if(sd.uid[0] != 0 && strcmp(sd.uid, f->mf->uid) != 0){ + if(!allowed(f->d.uid)){ + if(fid->uid != f->d.uid && !leader(f->d.gid, fid->uid)) + error("not the owner or group leader"); + if(!member(usrid(sd.uid), fid->uid) != 0) + error("you are not a member"); + } + }else + sd.uid[0] = 0; + + if(sd.gid[0] != 0 && strcmp(sd.gid, f->mf->gid) != 0){ + /* + * Not std. in 9: leader must be member of the new gid, not + * leader of the new gid. + */ + if(!allowed(f->d.uid)){ + if(fid->uid != f->d.uid && !leader(f->d.gid, fid->uid)) + error("not the owner or group leader"); + if(!member(usrid(sd.gid), fid->uid) != 0) + error("you are not a member"); + } + }else + sd.gid[0] = 0; + + /* + * Not std. in 9: muid can be updated if uid is allowed, it's + * ignored otherwise. + */ + if(sd.muid[0] != 0 && strcmp(sd.muid, f->mf->muid) != 0){ + if(!allowed(f->d.uid)) + sd.muid[0] = 0; + }else + sd.muid[0] = 0; + + if(sd.mode != ~0 && f->d.mode != sd.mode){ + if((sd.mode&DMBITS) != sd.mode) + error("unknown bit set in mode"); + if(!allowed(f->d.uid)) + if(fid->uid != f->d.uid && !leader(f->d.gid, fid->uid)) + error("not the owner or group leader"); + if((sd.mode&DMDIR) ^ (f->d.mode&DMDIR)) + error("attempt to change DMDIR"); + }else + sd.mode = ~0; + + /* + * Not std. in 9: allowed users can also set atime. + */ + if(sd.atime != ~0 && f->d.atime != sd.atime){ + if(!allowed(f->d.uid)) + sd.atime = ~0; /* ignore it */ + }else + sd.atime = ~0; + + if(sd.mtime != ~0 && f->d.mtime != sd.mtime){ + if(!allowed(f->d.uid)) + if(fid->uid != f->d.uid && !leader(f->d.gid, fid->uid)) + error("not the owner or group leader"); + }else + sd.mtime = ~0; + + /* + * Not std. in 9: other non-null fields, if any, are ignored. + */ + if(sd.length != ~0) + wstatint(f, "length", sd.length); + if(sd.name[0]) + dfwattr(f, "name", sd.name); + if(sd.uid[0]) + dfwattr(f, "uid", sd.uid); + if(sd.gid[0]) + dfwattr(f, "gid", sd.gid); + if(sd.muid[0]) + dfwattr(f, "muid", sd.muid); + if(sd.mode != ~0) + wstatint(f, "mode", sd.mode); + if(sd.atime != ~0) + wstatint(f, "atime", sd.atime); + if(sd.mtime != ~0) + wstatint(f, "mtime", sd.mtime); + + noerror(); + rwunlock(f, Wr); +} + +static char* +rpcworker9p(void *v, void**aux) +{ + Rpc *rpc; + Cli *cli; + char err[128]; + long n; + int nerr; + Memblk *fahead; + + rpc = v; + cli = rpc->cli; + threadsetname("rpcworker9p %s %R", cli->addr, rpc); + dPprint("%s starting\n", threadgetname()); + + if(*aux == nil){ + errinit(Errstack); + *aux = v; /* make it not nil */ + } + nerr = errstacksize(); + + + fspolicy(Pre); + + rpc->r.tag = rpc->t.tag; + rpc->r.type = rpc->t.type + 1; + + quiescent(No); + if(catcherror()){ + quiescent(Yes); + rpc->r.type = Rerror; + rpc->r.ename = err; + rerrstr(err, sizeof err); + }else{ + fcalls[rpc->t.type](rpc); + quiescent(Yes); + noerror(); + } + + xqlock(&cli->wlk); + fahead = nil; + if(rpc->fid != nil && rpc->fid->p != nil) + if(rpc->r.type == Rread || rpc->r.type == Rwalk){ + fahead = rpc->fid->p->f[rpc->fid->p->nf - 1]; + incref(fahead); + } + if(catcherror()){ + mbput(fahead); + error(nil); + } + + putfid(rpc->fid); /* release rpc fid before replying */ + rpc->fid = nil; /* or we might get "fid in use" errors */ + + if(rpc->flushed == 0){ + d9print("-> %F\n", &rpc->r); + n = convS2M(&rpc->r, cli->wdata, sizeof cli->wdata); + if(n == 0) + fatal("rpcworker: convS2M"); + if(write(cli->fd, cli->wdata, n) != n) + d9print("%s: %r\n", cli->addr); + }else + dprint("flushed: %F\n", &rpc->r); + if(fs->profile) + calltime[rpc->t.type] += nsec() - rpc->t0; + ncalls[rpc->t.type]++; + xqunlock(&cli->wlk); + + if(fahead != nil){ + if(rpc->r.type == Rread) + rahead(fahead, rpc->t.offset + rpc->r.count); + mbput(fahead); + } + noerror(); + + replied(rpc); + freerpc(rpc); + + fspolicy(Post); + + dPprint("%s exiting\n", threadgetname()); + + if(errstacksize() != nerr) + fatal("%s: unbalanced error stack", threadgetname()); + threadsetname("rpcworker9p"); + return nil; +} + +char* +cliworker9p(void *v, void**aux) +{ + Cli *cli; + long n; + Rpc *rpc; + + cli = v; + threadsetname("cliworker9p %s", cli->addr); + dPprint("%s started\n", threadgetname()); + if(*aux == nil){ + errinit(Errstack); + *aux = v; /* make it not nil */ + } + + if(catcherror()) + fatal("worker: uncatched: %r"); + + rpc = nil; + for(;;){ + if(dbg['E']) + dumpfids(); + if(rpc == nil) + rpc = newrpc(); + n = read9pmsg(cli->fd, rpc->data, Maxmdata+IOHDRSZ); + if(n < 0){ + d9print("%s: read: %r\n", cli->addr); + break; + } + if(n == 0) + continue; + if(fs->profile) + rpc->t0 = nsec(); + if(convM2S(rpc->data, n, &rpc->t) == 0){ + d9print("%s: convM2S failed\n", cli->addr); + continue; + } + if(rpc->t.type >= nelem(fcalls) || fcalls[rpc->t.type] == nil){ + d9print("%s: bad fcall type %d\n", cli->addr, rpc->t.type); + continue; + } + d9print("<-%F\n", &rpc->t); + rpc->cli = cli; + incref(cli); + + xqlock(&cli->rpclk); + rpc->next = cli->rpcs; + cli->rpcs = rpc; + cli->nrpcs++; + xqunlock(&cli->rpclk); + + if(rpc->t.type == Tflush || + (Rpcspercli != 0 && cli->nrpcs >= Rpcspercli)) + rpcworker9p(rpc, aux); + else + getworker(rpcworker9p, rpc, nil); + if(fs->halt){ + warn("halted"); + threadexitsall(nil); + } + rpc = nil; + } + putcli(cli); + noerror(); + dPprint("%s exiting\n", threadgetname()); + return nil; +}; --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,73 @@ +#include "all.h" +/* + * Creepy file server 9P and IX service. + */ + +static void +usage(void) +{ + fprint(2, "usage: %s [-DFLAGS] [-a] [-A addr] [-S srv] disk\n", argv0); + exits("usage"); +} + +int mainstacksize = Stack; + +void +threadmain(int argc, char *argv[]) +{ + char *addr, *dev, *srv; + + addr = "tcp!*!9fs"; + srv = "9pix"; + ARGBEGIN{ + case 'A': + addr = EARGF(usage()); + break; + case 'S': + srv = EARGF(usage()); + break; + case 'a': + noauth = 1; + break; + default: + if(ARGC() >= 'A' && ARGC() <= 'Z' || ARGC() == '9'){ + dbg[ARGC()] = 1; + fatalaborts = 1; + }else + usage(); + }ARGEND; + if(argc != 1) + usage(); + dev = argv[0]; + if(dbg['d']) + dbg['Z'] = 1; + + outofmemoryexits(1); + workerthreadcreate = proccreate; + fmtinstall('H', mbfmt); + fmtinstall('M', dirmodefmt); + fmtinstall('F', fcallfmt); + fmtinstall('G', ixcallfmt); + fmtinstall('X', fidfmt); + fmtinstall('R', rpcfmt); + fmtinstall('A', usrfmt); + fmtinstall('P', pathfmt); + + errinit(Errstack); + if(catcherror()) + fatal("uncatched error: %r"); + rfork(RFNAMEG|RFNOTEG); + rwusers(nil); + fsopen(dev, Normal, Wr); + if(srv != nil) + srv9pix(srv, cliworker9p); + if(addr != nil) + listen9pix(addr, cliworker9p); + + consinit(); + proccreate(timeproc, nil, Stack); + proccreate(fssyncproc, nil, Stack); + noerror(); + threadexits(nil); +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,15 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "conf.h" +#include "dbg.h" +#include "dk.h" +#include "ix.h" +#include "net.h" +#include "fns.h" --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,503 @@ +#include +#include +#include +#include +#include +#include + +/* + * Archer: archive creepy file trees into a creepy worm. + */ + +#define dprint(...) if(!debug){}else fprint(2, __VA_ARGS__) + +enum +{ + Errstack = 128, + Stack = 128*1024, +}; + +typedef struct Path Path; +typedef struct Idx Idx; + +typedef u64int daddrt; + +struct Idx +{ + uchar sha[SHA1dlen]; + daddrt addr; +}; + +struct Path +{ + char *name; + Path *up; + int fd; + Dir *d, *nd; + Idx; +}; + +#pragma varargck type "P" Path* +#pragma varargck type "H" uchar* +#pragma varargck type "I" uchar* + +static int debug; +int mainstacksize = Stack; +static int consfd; +static int dontprune; +static char *idxdir; + +static int +pathfmt(Fmt *fmt) +{ + Path *p; + + p = va_arg(fmt->args, Path*); + if(p == nil) + return fmtprint(fmt, "none"); + if(p->up == nil) + return fmtprint(fmt, "%s", p->name); + return fmtprint(fmt, "%P/%s", p->up, p->name); +} + +static int +shafmt(Fmt *fmt) +{ + uchar *p; + int i; + + p = va_arg(fmt->args, uchar*); + if(p == nil) + return fmtprint(fmt, "[]"); + fmtprint(fmt, "["); + for(i = 0; i < SHA1dlen; i++) + fmtprint(fmt, "%02x", p[i]); + return fmtprint(fmt, "]"); +} + +static int +idxfmt(Fmt *fmt) +{ + uchar *p; + int i; + + p = va_arg(fmt->args, uchar*); + for(i = 0; i < SHA1dlen - 1; i++) + fmtprint(fmt, "%02x/", p[i]); + return 0; +} + +static Path* +newpath(char *dir) +{ + Path *p; + + p = malloc(sizeof *p); + p->name = dir; + p->up = nil; + p->fd = open(p->name, OREAD); + if(p->fd < 0){ + free(p); + error("open: %r"); + } + p->d = dirfstat(p->fd); + p->nd = p->d; + if(p->d == nil){ + free(p); + close(p->fd); + error("stat: %r"); + } + return p; +} + +static Path* +walkpath(Path *up, char *name, Dir *d) +{ + Path *p; + + p = mallocz(sizeof *p, 1); + p->name = name; + p->nd = nil; + p->d = d; + p->up = up; + p->fd = -1; + return p; +} + +static void +pathsha(Path *p) +{ + uchar buf[8*1024]; + long nr; + DigestState *s; + + s = sha1((uchar*)"creepy", 6, nil, nil); + for(;;){ + nr = read(p->fd, buf, sizeof buf); + if(nr <= 0){ + sha1((uchar*)"creepy", 6, p->sha, s); + if(nr < 0) + error("%P: read: %r", p); + break; + } + sha1(buf, nr, nil, s); + } + seek(p->fd, 0, 0); + dprint("sha: %P: %H\n", p, p->sha); + +} + +static void +openpath(Path *p, int perm, int mode) +{ + char *s; + + dprint("openpath %P %s\n", p, mode == OREAD?"rd":"wr"); + assert(mode == OREAD || mode == OWRITE); + s = smprint("%P", p); + if(p->d == nil){ + if(mode == OWRITE) + p->fd = create(s, perm, mode); + }else{ + if(mode == OWRITE) + mode |= OTRUNC; + p->fd = open(s, mode); + } + free(s); + if(p->fd < 0) + error("%P: %r", p); + if(mode == OREAD && p->d != nil) + pathsha(p); +} + +static void +copy(Path *dp, Path *sp) +{ + char buf[8*1024]; + long nr; + Dir *d; + + dprint("copy %P\n", sp); + for(;;){ + nr = read(sp->fd, buf, sizeof buf); + if(nr < 0) + error("%P: read: %r", sp); + if(nr == 0) + break; + if(write(dp->fd, buf, nr) != nr) + error("%P: write: %r", dp); + } + d = dirfstat(dp->fd); + if(d == nil) + error("%P: stat: %r", dp); + dp->addr = d->qid.path; + free(d); +} + +static void +closepath(Path *p) +{ + dprint("closepath %P\n", p); + if(p->fd >= 0) + close(p->fd); + free(p->nd); + free(p); +} + +static void +wlink(Path *old, Path *new) +{ + char *s; + int r; + + dprint("link %#ullx %P\n", old->addr, new); + s = smprint("link %#ullx %P\n", old->addr, new); + r = write(consfd, s, strlen(s)); + free(s); + if(r < 0) + error("link %P: %r", new); +} + +static void +wunlink(Path *p) +{ + char *s; + int r; + + dprint("unlink %P\n", p); + s = smprint("unlink %P\n", p); + r = write(consfd, s, strlen(s)); + free(s); + if(r < 0) + error("unlink %P\n", p); +} + +static int +getidx(Path *p) +{ + char *ifn; + int fd; + Idx x; + + ifn = smprint("%s/%I", idxdir, p->sha); + dprint("getidx %s...", ifn); + fd = open(ifn, ORDWR); + free(ifn); + if(fd < 0){ + dprint("no\n"); + return -1; + } + seek(fd, p->sha[sizeof p->sha - 1]*sizeof x, 0); + if(read(fd, &x, sizeof x) != sizeof x){ + close(fd); + dprint("no\n"); + return -1; + } + close(fd); + p->Idx = x; + dprint(" d%#010ullx\n", p->addr); + return 0; +} + +static void +setidx(Path *p) +{ + char *ifn, *s, *e; + int i, fd; + ulong off; + + ifn = smprint("%s/%I", idxdir, p->sha); + dprint("setidx %s d%#010ullx...\n", ifn, p->addr); + if(catcherror()){ + free(ifn); + error(nil); + } + s = ifn + strlen(idxdir); + fd = -1; + for(i = 0; i < SHA1dlen-1; i++){ + e = s + i*3; + assert(*e == '/'); + *e = 0; + if(i < SHA1dlen-2){ + if(access(ifn, AEXIST) < 0){ + dprint("\tcreate d: %s\n", ifn); + fd = create(ifn, OREAD, 0770|DMDIR); + if(fd < 0) + error("%s: %r", ifn); + close(fd); + } + }else{ + fd = open(ifn, OWRITE); + if(fd < 0){ + dprint("\tcreate f: %s\n", ifn); + fd = create(ifn, OWRITE, 0660); + } + if(fd < 0) + error("%s: %r", ifn); + } + *e = '/'; + } + off = sizeof(Idx) * (uint)p->sha[SHA1dlen-1]; + seek(fd, off, 0); + if(write(fd, &p->Idx, sizeof p->Idx) != sizeof p->Idx){ + close(fd); + error("%s: write: %r", ifn); + } + close(fd); + free(ifn); + noerror(); +} + +static int +archived(Path *p1, Path *p2) +{ + if(p1->d == nil || p2->d == nil) + return 0; + return p1->d->mtime == p2->d->mtime && p1->d->length == p2->d->length; +} + +static int +metachanges(Path *p1, Path *p2) +{ + if((p1->d == nil && p2->d != nil) || (p1->d != nil && p2->d == nil)) + return 1; + if(!archived(p1, p2)) + return 1; + if(strcmp(p1->d->uid, p2->d->uid) != 0) + return 1; + if(strcmp(p1->d->gid, p2->d->gid) != 0) + return 1; + if(strcmp(p1->d->muid, p2->d->muid) != 0) + return 1; + return p1->d->mode != p2->d->mode; +} + +static Dir* +match(Dir *d, Dir *ds, int nds) +{ + int i; + + for(i = 0; i < nds; i++) + if(strcmp(d->name, ds[i].name) == 0) + return &ds[i]; + return nil; +} + +static void archer(Path*, Path*); + +static void +archdir(Path *cp, Path *wp) +{ + Dir *cds, *wds, *d; + int ncds, nwds, i; + Path *ccp, *cwp; + + if(dontprune == 0 && archived(cp, wp)){ + dprint("archdir: prune %P -> %P\n", cp, wp); + return; + } + ncds = dirreadall(cp->fd, &cds); + if(ncds < 0) + error("read %P: %r", cp); + if(catcherror()){ + free(cds); + error(nil); + } + nwds = dirreadall(wp->fd, &wds); + if(nwds < 0) + error("read %P: %r", wp); + if(catcherror()){ + free(wds); + error(nil); + } + + for(i = 0; i < ncds; i++){ + ccp = walkpath(cp, cds[i].name, &cds[i]); + d = match(&cds[i], wds, nwds); + cwp = walkpath(wp, cds[i].name, d); + if(!catcherror()){ + archer(ccp, cwp); + noerror(); + }else + fprint(2, "%P: %r", cwp); + closepath(ccp); + closepath(cwp); + if(d != nil) + d->name = nil; /* visited */ + } + for(i = 0; i < nwds; i++) + if(wds[i].name != nil){ + d = match(&wds[i], cds, ncds); + ccp = walkpath(cp, wds[i].name, d); + cwp = walkpath(wp, wds[i].name, &wds[i]); + if(!catcherror()){ + archer(ccp, cwp); + noerror(); + }else + fprint(2, "%P: %r", cwp); + closepath(ccp); + closepath(cwp); + } + noerror(); + noerror(); + free(cds); + free(wds); +} + +static void +archfile(Path *cp, Path *wp) +{ + if(wp->d != nil && archived(cp, wp)){ + dprint("archfile: prune %P -> %P\n", cp, wp); + return; + } + + openpath(cp, cp->d->mode, OREAD); + if(getidx(cp) == 0){ + wlink(cp, wp); + return; + }else{ + openpath(wp, cp->d->mode, OWRITE); + copy(wp, cp); + setidx(wp); + } +} + +static void +archer(Path *cp, Path *wp) +{ + dprint("archer %P -> %P\n", cp, wp); + assert(cp->d != nil || wp->d != nil); + if(wp->d != nil){ + if(cp->d == nil){ + wunlink(wp); + return; + } + if((cp->d->mode&DMDIR) ^ (wp->d->mode&DMDIR)) + wunlink(wp); + } + if(cp->d->mode&DMDIR) + archdir(cp, wp); + else + archfile(cp, wp); + if(metachanges(cp, wp) && dirfwstat(wp->fd, cp->d) < 0){ + dprint("stat %P\n", wp); + fprint(2, "%P: wstat: %r\n", wp); + } +} + +static void +usage(void) +{ + fprint(2, "usage: %s [-d] [-t] cdir wdir\n", argv0); + exits("usage"); +} + +void +threadmain(int argc, char *argv[]) +{ + char *cdir, *wdir, *wact, *wcons; + Path *cp, *wp; + + dontprune = 0; + ARGBEGIN{ + case 'd': + debug = 1; + break; + case 't': + dontprune = 1; + break; + default: + usage(); + }ARGEND; + if(argc != 2) + usage(); + cdir = argv[0]; + wdir = argv[1]; + + outofmemoryexits(1); + errinit(Errstack); + if(catcherror()) + sysfatal("uncatched error: %r"); + + fmtinstall('H', shafmt); + fmtinstall('I', idxfmt); + fmtinstall('P', pathfmt); + wact = smprint("%s/active", wdir); + idxdir = smprint("%s/idx", wact); + wcons = smprint("%s/cons", wdir); + consfd = open(wcons, OWRITE); + if(consfd < 0) + sysfatal("cons: %r"); + cp = newpath(cdir); + wp = newpath(wact); + if(catcherror()) + sysfatal("%P: %r", wp); + archer(cp, wp); + noerror(); + closepath(cp); + closepath(wp); + noerror(); + threadexits(nil); +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,467 @@ +#include "all.h" + +/* + * Attribute handling + * + * BUG: we only support the predefined attributes. + * + */ + +typedef struct Adef Adef; + +struct Adef +{ + char* name; + int sz; + long (*wattr)(Memblk*, char*); + long (*rattr)(Memblk*, char*, long); +}; + +long wname(Memblk*, char*); +static long ratime(Memblk*, char*, long); +static long rgid(Memblk*, char*, long); +static long rid(Memblk*, char*, long); +static long rlength(Memblk*, char*, long); +static long rmode(Memblk*, char*, long); +static long rmtime(Memblk*, char*, long); +static long rmuid(Memblk*, char*, long); +static long rname(Memblk*, char*, long); +static long rstar(Memblk*, char*, long); +static long ruid(Memblk*, char*, long); +static long watime(Memblk*, char*); +static long wgid(Memblk*, char*); +static long wid(Memblk*, char*); +static long wlength(Memblk*, char*); +static long wmode(Memblk*, char*); +static long wmtime(Memblk*, char*); +static long wmuid(Memblk*, char*); +static long wuid(Memblk*, char*); + +static Adef adef[] = +{ + {"name", 0, wname, rname}, + {"id", BIT64SZ, nil, rid}, + {"atime", BIT64SZ, watime, ratime}, + {"mtime", BIT64SZ, wmtime, rmtime}, + {"length", BIT64SZ, wlength, rlength}, + {"uid", 0, wuid, ruid}, + {"gid", 0, wgid, rgid}, + {"muid", 0, wuid, ruid}, + {"mode", BIT64SZ, wmode, rmode}, + {"*", 0, nil, rstar}, +}; + +/* + * Return size for attributes embedded in file. + * At least Dminattrsz bytes are reserved in the file block, + * at most Embedsz. + * Size is rounded to the size of an address. + */ +ulong +embedattrsz(Memblk *f) +{ + ulong sz; + + sz = f->d.asize; + sz = ROUNDUP(sz, BIT64SZ); + if(sz < Dminattrsz) + sz = Dminattrsz; + else + sz = Embedsz; + return sz; +} + +void +gmeta(Memblk *f, void *buf, ulong nbuf) +{ + char *p; + int i; + + f->mf->uid = usrname(f->d.uid); + f->mf->gid = usrname(f->d.gid); + f->mf->muid = usrname(f->d.muid); + p = buf; + for(i = 0; i < nbuf; i++) + if(p[i] == 0) + break; + if(i == nbuf) + error("corrupt meta"); + f->mf->name = buf; + +} + +static ulong +metasize(Memblk *f) +{ + return strlen(f->mf->name) + 1; +} + +/* + * Pack the metadata into buf. + * pointers in meta are changed to refer to the packed data. + * Return pointer past the packed metadata. + * The caller is responsible for ensuring that metadata fits in buf. + */ +ulong +pmeta(void *buf, ulong nbuf, Memblk *f) +{ + char *p, *e; + ulong sz; + + sz = metasize(f); + if(sz > nbuf) + error("attributes are too long"); + p = buf; + if(f->mf->name != p) + e = strecpy(p, p+nbuf, f->mf->name); + else + e = p + strlen(p); + e++; + assert(e-p <= sz); /* can be <, to leave room for growing */ + f->mf->name = p; + return sz; +} + +long +wname(Memblk *f, char *val) +{ + char *old; + ulong maxsz; + + old = f->mf->name; + f->mf->name = val; + maxsz = embedattrsz(f); + if(metasize(f) > maxsz){ + f->mf->name = old; + warnerror("no room to grow metadata"); + } + pmeta(f->d.embed, maxsz, f); + return strlen(val)+1; +} + +static long +ru64int(uvlong v, char *buf, long n) +{ + char s[30], *p; + + p = seprint(s, s+sizeof s, "%#018ullx", v); + if((p-s)+1 > n) + error("buffer too short"); + strecpy(buf, buf+n, s); + return (p-s)+1; +} + +static long +rstr(char *s, char *buf, long len) +{ + long l; + + l = strlen(s) + 1; + if(l > len) + error("buffer too short"); + strcpy(buf, s); + return l; +} + +static long +rname(Memblk *f, char *buf, long len) +{ + return rstr(f->mf->name, buf, len); +} + +static long +ruid(Memblk *f, char *buf, long len) +{ + return rstr(f->mf->uid, buf, len); +} + +static u64int +chkusr(char *buf) +{ + int id; + + id = usrid(buf); + if(id < 0) + error("unknown user '%s'", buf); + return id; +} + +static long +wuid(Memblk *f, char *buf) +{ + f->d.uid = chkusr(buf); + return strlen(buf)+1; +} + +static long +rgid(Memblk *f, char *buf, long len) +{ + return rstr(f->mf->gid, buf, len); +} + +static long +wgid(Memblk *f, char *buf) +{ + f->d.gid = chkusr(buf); + return strlen(buf)+1; +} + +static long +rmuid(Memblk *f, char *buf, long len) +{ + return rstr(f->mf->muid, buf, len); +} + +static long +wmuid(Memblk *f, char *buf) +{ + f->d.muid = chkusr(buf); + return strlen(buf)+1; +} + +static uvlong +chku64int(char *buf) +{ + u64int v; + char *r; + + v = strtoull(buf, &r, 0); + if(r == nil || r == buf || *r != 0) + error("not a number"); + return v; +} + +static long +wid(Memblk *f, char *buf) +{ + f->d.id = chku64int(buf); + return strlen(buf)+1; +} + +static long +rid(Memblk *f, char *buf, long n) +{ + return ru64int(f->d.id, buf, n); +} + +static long +watime(Memblk *f, char *buf) +{ + f->d.atime = chku64int(buf); + return strlen(buf)+1; +} + +static long +ratime(Memblk *f, char *buf, long n) +{ + return ru64int(f->d.atime, buf, n); +} + +static long +wmode(Memblk *f, char *buf) +{ + f->d.mode = chku64int(buf) | (f->d.mode&DMUSERS); + return strlen(buf)+1; +} + +static long +rmode(Memblk *f, char *buf, long n) +{ + return ru64int(f->d.mode&~DMUSERS, buf, n); +} + +static long +wmtime(Memblk *f, char *buf) +{ + f->d.mtime = chku64int(buf); + return strlen(buf)+1; +} + +static long +rmtime(Memblk *f, char *buf, long n) +{ + return ru64int(f->d.mtime, buf, n); +} + +static uvlong +resized(Memblk *f, uvlong sz) +{ + ulong boff, bno, bend, doff; + + if(f->d.mode&DMDIR) + error("can't resize a directory"); + + if(sz > maxfsz) + error("max file size exceeded"); + if(sz >= f->d.length) + return sz; + bno = dfbno(f, sz, &boff); + if(boff > 0) + bno++; + bend = dfbno(f, sz, &boff); + if(boff > 0) + bend++; + doff = embedattrsz(f); + if(doff < Embedsz) + memset(f->d.embed+doff, 0, Embedsz-doff); + dfdropblks(f, bno, bend); + return sz; +} + +static long +wlength(Memblk *f, char *buf) +{ + f->d.length = chku64int(buf); + resized(f, f->d.length); + return strlen(buf)+1; +} + +static long +rlength(Memblk *f, char *buf, long n) +{ + return ru64int(f->d.length, buf, n); +} + +static long +rstar(Memblk *, char *buf, long len) +{ + char *s, *e; + int i; + + s = buf; + e = s + len; + for(i = 0; i < nelem(adef); i++) + if(*adef[i].name != '*') + s = seprint(s, e, "%s ", adef[i].name); + if(s > buf) + *--s = 0; + return s - (char*)buf; +} + +long +dfwattr(Memblk *f, char *name, char *val) +{ + int i; + long tot; + + isfile(f); + ismelted(f); + isrwlocked(f, Wr); + if(fsdiskfree() < Dzerofree) + error("disk full"); + + for(i = 0; i < nelem(adef); i++) + if(strcmp(adef[i].name, name) == 0) + break; + if(i == nelem(adef)) + error("bug: user defined attributes not yet implemented"); + if(adef[i].wattr == nil) + error("can't write %s", name); + tot = adef[i].wattr(f, val); + changed(f); + return tot; +} + +long +dfrattr(Memblk *f, char *name, char *val, long count) +{ + int i; + long tot; + + isfile(f); + isrwlocked(f, Rd); + for(i = 0; i < nelem(adef); i++) + if(strcmp(adef[i].name, name) == 0) + break; + if(i == nelem(adef)) + error("no such attribute"); + if(adef[i].sz != 0 && count < adef[i].sz) + error("buffer too short for attribute"); + + tot = adef[i].rattr(f, val, count); + return tot; +} + +static void +cstring(Memblk*, int op, char *v1, char *v2) +{ + int v; + + v = strcmp(v1, v2); + switch(op){ + case CEQ: + if(v != 0) + error("false"); + break; + case CGE: + if(v < 0) + error("false"); + break; + case CGT: + if(v <= 0) + error("false"); + break; + case CLE: + if(v > 0) + error("false"); + break; + case CLT: + if(v >= 0) + error("false"); + case CNE: + if(v == 0) + error("false"); + break; + } +} + +/* + * cond on attribute value + */ +void +dfcattr(Memblk *f, int op, char *name, char *val) +{ + int i; + char buf[128]; + + isfile(f); + isrwlocked(f, Rd); + + dfrattr(f, name, buf, sizeof buf); + + for(i = 0; i < nelem(adef); i++) + if(strcmp(adef[i].name, name) == 0) + break; + if(i == nelem(adef)) + error("no such attribute"); + cstring(f, op, buf, val); +} + +/* + * Does not check if the user can't write because of the "write" + * user. + * Does check if the user is allowed in config mode. + */ +void +dfaccessok(Memblk *f, int uid, int bits) +{ + uint mode; + + if(allowed(uid)) + return; + + bits &= 3; + + mode = f->d.mode &0777; + + if((mode&bits) == bits) + return; + mode >>= 3; + + if(member(f->d.gid, uid) && (mode&bits) == bits) + return; + mode >>= 3; + if(f->d.uid == uid && (mode&bits) == bits) + return; + error("permission denied"); +} --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,877 @@ +#include "all.h" + +/* + * Locking is coarse, only functions used from outside + * care to lock the user information. + * + * Access checks are like those described in Plan 9's stat(5), but for: + * + * - to change gid, the group leader is not required to be a leader + * of the new group; it suffices if he's a member. + * - attempts to change muid are honored if the user is "allowed", + * and ignored otherwise. + * - "allowed" users can also set atime. + * - attributes other than length, name, uid, gid, muid, mode, atime, and mtime + * are ignored (no error raised if they are not void) + * + * + * The user file has the format: + * uid:name:leader:members + * uid is a number. + * + * This program insists on preserving uids already seen. + * That is, after editing /active/adm/users, the server program will notice + * and re-read the file, then clean it up, and upate its contents. + * + * Cleaning ensures that uids for known users are kept as they were, and + * that users not yet seen get unique uids. Numeric uids are only an internal + * concept, the protocol uses names. + */ + +/* + * The uid numbers are irrelevant, they are rewritten. + */ +static char *defaultusers = + "1:none::\n" + "2:adm:adm:sys, elf \n" + "3:sys::glenda,elf\n" + "4:glenda:glenda:\n" + "5:elf:elf:sys\n"; + +static RWLock ulk; +static Usr *uids[Uhashsz]; +static Usr *unames[Uhashsz]; +static Usr *uwrite; +static int uidgen; + +static uint +usrhash(char* s) +{ + uchar *p; + uint hash; + + hash = 0; + for(p = (uchar*)s; *p != '\0'; p++) + hash = hash*7 + *p; + + return hash % Uhashsz; +} + +static int +findmember(Usr *u, int member) +{ + Member *m; + + for(m = u->members; m != nil; m = m->next) + if(member == m->u->id) + return 1; + return 0; +} + +static Usr* +finduid(int uid) +{ + Usr *u; + + for(u = uids[uid%Uhashsz]; u != nil; u = u->inext) + if(u->id == uid) + return u; + return nil; +} + +static Usr* +finduname(char *name, int mkit) +{ + Usr *u; + uint h; + + h = usrhash(name); + for(u = unames[h]; u != nil; u = u->nnext) + if(strcmp(u->name, name) == 0) + return u; + if(mkit){ + /* might be leaked. see freeusr() */ + u = mallocz(sizeof *u, 1); + strecpy(u->name, u->name+sizeof u->name, name); + u->nnext = unames[h]; + unames[h] = u; + } + return u; +} + +char* +usrname(int uid) +{ + Usr *u; + + xrwlock(&ulk, Rd); + u = finduid(uid); + if(u == nil){ + xrwunlock(&ulk, Rd); /* zero patatero: */ + return "ZP"; /* disgusting, isn't it? */ + } + xrwunlock(&ulk, Rd); + return u->name; +} + +int +usrid(char *n) +{ + Usr *u; + + xrwlock(&ulk, Rd); + u = finduname(n, Dontmk); + if(u == nil || !u->enabled){ + xrwunlock(&ulk, Rd); + return -1; + } + xrwunlock(&ulk, Rd); + return u->id; +} + +int +member(int uid, int member) +{ + Usr *u; + int r; + + if(uid == member) + return 1; + xrwlock(&ulk, Rd); + u = finduid(uid); + r = u != nil && u->lead != nil && u->lead->id == member; + r |= u != nil && findmember(u, member); + xrwunlock(&ulk, Rd); + return r; +} + +int +leader(int gid, int lead) +{ + Usr *u; + int r; + + xrwlock(&ulk, Rd); + u = finduid(gid); + r = 0; + if(u != nil) + if(u->lead != nil) + r = u->lead->id == lead; + else + r = findmember(u, lead); + xrwunlock(&ulk, Rd); + return r; +} + +static void +clearmembers(Usr *u) +{ + Member *m; + + while(u->members != nil){ + m = u->members; + u->members = m->next; + free(m); + } +} + +static void +addmember(Usr *u, char *n) +{ + Member *m, **ml; + + for(ml = &u->members; (m = *ml) != nil; ml = &m->next) + if(strcmp(m->u->name, n) == 0){ + xrwunlock(&ulk, Wr); + warn("'%s' is already a member of '%s'", n, u->name); + return; + } + m = mallocz(sizeof *m, 1); + m->u = finduname(n, Mkit); + *ml = m; +} + +static void +checkmembers(Usr *u) +{ + Member *m, **ml; + + for(ml = &u->members; (m = *ml) != nil; ) + if(m->u->id == 0){ + warn("no user '%s' (member of '%s')", m->u->name, u->name); + *ml = m->next; + free(m); + }else + ml = &m->next; +} + +int +usrfmt(Fmt *fmt) +{ + Usr *usr; + Member *m; + + usr = va_arg(fmt->args, Usr*); + + if(usr == nil) + return fmtprint(fmt, "#no user"); + fmtprint(fmt, "%s%d:%s:", usr->enabled?"":"!", + usr->id, usr->name); + fmtprint(fmt, "%s:", usr->lead?usr->lead->name:""); + for(m = usr->members; m != nil; m = m->next){ + fmtprint(fmt, "%s", m->u->name); + if(m->next != nil) + fmtprint(fmt, ","); + } + return 0; +} + +static void +dumpusers(void) +{ + int i; + Usr *usr; + + for(i = 0; i < nelem(uids); i++) + for(usr = uids[i]; usr != nil; usr = usr->inext){ + fprint(2, "%A\n", usr); + } +} + +/* + * Add a user. + * A partial user entry might already exists, as a placeholder + * for the user name (if seen before in the file). + * If the user was known, it's uid is preserved. + * If not, a new unique uid is assigned. + */ +static Usr* +mkusr(char *name) +{ + Usr *u; + uint h; + + u = finduname(name, Mkit); + if(u->id == 0){ + /* first seen! */ + u->id = ++uidgen; + h = u->id%Uhashsz; + u->inext = uids[h]; + uids[h] = u; + } + if(strcmp(name, "write") == 0) + uwrite = u; + return u; +} + +static void +addusr(char *p) +{ + char *c, *nc, *s, *args[5]; + int nargs, on; + Usr *usr; + + on = 1; + if(*p == '!'){ + on = 0; + p++; + } + nargs = getfields(p, args, nelem(args), 0, ":"); + if(nargs != 4) + error("wrong number of fields %s", args[0]); + if(*args[1] == 0) + error("null name"); + usr = mkusr(args[1]); + usr->enabled = on; + usr->lead = finduname(args[2], Mkit); + clearmembers(usr); + for(c = args[3]; c != nil; c = nc){ + while(*c == ' ' || *c == '\t') + c++; + if(*c == 0) + break; + nc = utfrune(c, ','); + if(nc != nil) + *nc++ = 0; + s = utfrune(c, ' '); + if(s != nil) + *s = 0; + s = utfrune(c, '\t'); + if(s != nil) + *s = 0; + if(*c != 0) + addmember(usr, c); + } +} + +/* + * Absorb the new user information as read from u. + * Old users are not removed, but renamed to be disabled. + */ +static void +rwdefaultusers(void) +{ + char *u, *c, *p, *np; + static int once; + + if(once++ > 0) + return; + + u = strdup(defaultusers); + if(catcherror()){ + free(u); + error(nil); + } + p = u; + do{ + np = utfrune(p, '\n'); + if(np != nil) + *np++ = 0; + c = utfrune(p, '#'); + if(c != nil) + *c = 0; + if(*p == 0) + continue; + if(catcherror()){ + warn("users: %r\n"); + consprint("users: %r\n"); + continue; + } + addusr(p); + noerror(); + }while((p = np) != nil); + + if(dbg['d']){ + dprint("users:\n"); + dumpusers(); + dprint("\n"); + } + noerror(); + free(u); + +} + +/* + * This should be called at start time and whenever + * the user updates /active/adm/users, to rewrite it according to our + * in memory data base. + */ +void +rwusers(Memblk *uf) +{ + static char ubuf[512]; + char *p, *nl, *c; + uvlong off; + long tot, nr, nw; + int i; + Usr *usr; + + xrwlock(&ulk, Wr); + if(catcherror()){ + warn("users: %r"); + goto update; + } + if(uf == nil){ + rwdefaultusers(); + xrwunlock(&ulk, Wr); + return; + } + tot = 0; + p = nil; + for(off = 0; off < uf->d.length; off += nr){ + nr = dfpread(uf, ubuf + tot, sizeof ubuf - tot - 1, off); + tot += nr; + ubuf[tot] = 0; + for(p = ubuf; p != nil && *p != 0 && p - ubuf < tot; p = nl){ + nl = utfrune(p, '\n'); + if(nl == nil){ + tot = strlen(p); + memmove(ubuf, p, tot+1); + break; + } + *nl++ = 0; + c = utfrune(p, '#'); + if(c != nil) + *c = 0; + if(*p != 0) + addusr(p); + } + } + if(p != nil && *p != 0){ + warn("last line in users is not a full line"); + warn("[%s]", p); + } + noerror(); + if(uf->frozen){ /* loaded at boot time */ + xrwunlock(&ulk, Wr); + return; + } + +update: + if(catcherror()){ + xrwunlock(&ulk, Wr); + warn("users: %r\n"); + return; /* what could we do? */ + } + ismelted(uf); + isrwlocked(uf, Wr); + wstatint(uf, "length", 0); + off = 0; + dprint("users updated:\n"); + for(i = 0; i < uidgen; i++) + if((usr=finduid(i)) != nil){ + dprint("%A\n", usr); + p = seprint(ubuf, ubuf+sizeof ubuf, "%A\n", usr); + nw = dfpwrite(uf, ubuf, p - ubuf, &off); + off += nw; + } + noerror(); + xrwunlock(&ulk, Wr); +} + +int +writedenied(int uid) +{ + int r; + + if(uwrite == nil) + return 0; + xrwlock(&ulk, Rd); + r = findmember(uwrite, uid) == 0; + xrwunlock(&ulk, Rd); + return r; +} + +int +allowed(int uid) +{ + Usr *u; + int r; + + xrwlock(&ulk, Rd); + u = finduid(uid); + r = 0; + if(u) + r = u->allow; + xrwunlock(&ulk, Rd); + return r; +} + +/* + * TODO: register multiple fids for the cons file by keeping a list + * of console channels. + * consread will have to read from its per-fid channel. + * conprint will have to bcast to all channels. + * + * With that, multiple users can share the same console. + * Although perhaps it would be easier to use C in that case. + */ + +void +consprint(char *fmt, ...) +{ + va_list arg; + char *s, *x; + + va_start(arg, fmt); + s = vsmprint(fmt, arg); + va_end(arg); + /* consume some message if the channel is full */ + while(nbsendp(fs->consc, s) == 0) + if((x = nbrecvp(fs->consc)) != nil) + free(x); +} + +long +consread(char *buf, long count) +{ + char *s; + int tot, nr; + + if(count <= 0) /* shouldn't happen */ + return 0; + quiescent(Yes); + s = recvp(fs->consc); + quiescent(No); + tot = 0; + do{ + nr = strlen(s); + if(tot + nr > count) + nr = count - tot; + memmove(buf+tot, s, nr); + tot += nr; + free(s); + }while((s = nbrecvp(fs->consc)) != nil && tot + 80 < count); + /* + * +80 to try to guarantee that we have enough room in the user + * buffer for the next received string, or we'd drop part of it. + * Most of the times each string is a rune typed by the user. + * Other times, it's the result of a consprint() call. + */ + return tot; +} + +static void +cdump(int argc, char *argv[]) +{ + switch(argc){ + case 1: + fsdump(0, strcmp(argv[0], "dumpall") == 0); + break; + case 2: + if(strcmp(argv[1], "-l") == 0){ + fsdump(1, strcmp(argv[0], "dumpall") == 0); + break; + } + /*fall*/ + default: + error("usage: %s [-l]\n", argv[0]); + } +} + +static void +csync(int, char**) +{ + fssync(); + consprint("synced\n"); +} + +static void +chalt(int, char**) +{ + fssync(); + fs->halt = 1; +} + +static void +cusers(int, char *[]) +{ + int i; + Usr *usr; + + xrwlock(&ulk, Rd); + if(catcherror()){ + xrwunlock(&ulk, Rd); + error(nil); + } + for(i = 0; i < uidgen; i++) + if((usr=finduid(i)) != nil) + consprint("%A\n", usr); + noerror(); + xrwunlock(&ulk, Rd); +} + +static void +cstats(int argc, char *argv[]) +{ + int clr, verb; + char *s; + + clr = verb = 0; + if(argc == 2 && *argv[1] == '-'){ + for(s = argv[1]+1; *s; s++) + switch(*s){ + case -'c': + clr = 1; + break; + case 'v': + verb = 1; + break; + default: + usage: + error("usage: %s [-cv]\n", argv[0]); + return; + } + argc--; + } + if(argc != 1) + goto usage; + consprint("%s\n", updatestats(clr, verb)); +} + +static void +cdebug(int, char *argv[]) +{ + char *f; + char flags[50]; + int i; + + f = argv[1]; + if(strcmp(f, "on") == 0){ + dbg['D'] = 1; + return; + } + if(strcmp(f, "off") == 0){ + memset(dbg, 0, sizeof dbg); + return; + } + if(*f != '+' && *f != '-') + memset(dbg, 0, sizeof dbg); + else + f++; + for(; *f != 0; f++){ + dbg[*f] = 1; + if(*argv[1] == '-') + dbg[*f] = 0; + } + f = flags; + for(i = 0; i < nelem(dbg) && f < flags+nelem(flags)-1; i++) + if(dbg[i]) + *f++ = i; + *f = 0; + consprint("debug = '%s'\n", flags); + +} + +static void +clocks(int, char *argv[]) +{ + if(strcmp(argv[1], "on") == 0) + lockstats(1); + else if(strcmp(argv[1], "off") == 0) + lockstats(0); + else if(strcmp(argv[1], "dump") == 0) + dumplockstats(); + else + error("usage: %s [on|off|dump]\n", argv[0]); +} + +static void +cfids(int, char**) +{ + dumpfids(); +} + +static void +crwerr(int, char *argv[]) +{ + if(*argv[0] == 'r'){ + swreaderr = atoi(argv[1]); + warn("sw read err count = %d", swreaderr); + }else{ + swwriteerr = atoi(argv[1]); + warn("sw write err count = %d", swwriteerr); + } +} + +static void +ccheck(int argc, char *argv[]) +{ + switch(argc){ + case 1: + fscheck(); + break; + case 2: + if(strcmp(argv[1], "-v") == 0){ + if(fscheck() > 0) + fsdump(1, 0); + }else + error("usage: %s [-v]\n", argv[0]); + break; + default: + error("usage: %s [-v]\n", argv[0]); + } +} + +static void +clru(int, char**) +{ + fslru(); +} + + +static void +creclaim(int, char**) +{ + fsreclaim(); +} + +static void +callow(int argc, char *argv[]) +{ + Usr *u, *usr; + int i; + + usr = nil; + switch(argc){ + case 1: + if(*argv[0] == 'd') + for(i = 0; i < nelem(uids); i++) + for(u = uids[i]; u != nil; u = u->inext) + u->allow = 0; + break; + case 2: + xrwlock(&ulk, Wr); + usr = finduname(argv[1], Dontmk); + if(usr == nil){ + xrwunlock(&ulk, Wr); + consprint("user not found\n"); + return; + } + usr->allow = (*argv[0] == 'a'); + xrwunlock(&ulk, Wr); + break; + default: + consprint("usage: %s [uid]\n", argv[0]); + return; + } + xrwlock(&ulk, Rd); + for(i = 0; i < nelem(uids); i++) + for(u = uids[i]; u != nil; u = u->inext) + if(u->allow) + consprint("user '%s' is allowed\n", u->name); + else if(u == usr) + consprint("user '%s' is not allowed\n", u->name); + xrwunlock(&ulk, Rd); +} + +static void +clink(int, char *argv[]) +{ + Path *dp, *sp; + Memblk *df, *sf, *nf; + daddrt naddr; + + if(fs->worm == 0) + error("%s allowed only in worm mode\n", argv[0]); + sp = walkto(argv[1], nil); + if(catcherror()){ + putpath(sp); + error(nil); + } + dp = walkto(argv[2], nil); + if(catcherror()){ + putpath(sp); + error(nil); + } + meltedpath(&dp, dp->nf, 1); + sf = sp->f[sp->nf-1]; + df = dp->f[dp->nf-1]; + naddr = 0; + if(!catcherror()){ + nf = dfwalk(df, sf->mf->name); + naddr = nf->addr; + mbput(nf); + noerror(); + } + dfchdentry(df, naddr, sf->addr); + noerror(); + noerror(); + putpath(sp); + putpath(dp); +} + +static void +cwho(int, char**) +{ + consprintclients(); +} + +static void chelp(int, char**); + +static Cmd cmds[] = +{ + {"dump", cdump, 0, "dump [-l]"}, + {"dumpall", cdump, 0, "dumpall [-l]"}, + {"stats", cstats, 0, "stats [-c]"}, + {"sync", csync, 1, "sync"}, + {"halt", chalt, 1, "halt"}, + {"users", cusers, 1, "users"}, + {"debug", cdebug, 2, "cdebug [+-]FLAGS | on | off"}, + {"locks", clocks, 2, "locks [on|off|dump]"}, + {"fids", cfids, 1, "fids"}, + {"rerr", crwerr, 2, "rerr n"}, + {"werr", crwerr, 2, "werr n"}, + {"check", ccheck, 0, "check"}, + {"lru", clru, 1, "lru"}, + {"reclaim", creclaim, 1, "reclaim"}, + {"allow", callow, 0, "allow [uid]"}, + {"disallow", callow, 0, "disallow [uid]"}, + {"who", cwho, 1, "who"}, + {"link", clink, 3, "link old new"}, + {"?", chelp, 1, "?"}, +}; + +static void +chelp(int, char**) +{ + int i; + + consprint("commands:\n"); + for(i = 0; i < nelem(cmds); i++) + if(strcmp(cmds[i].name, "?") != 0) + consprint("> %s\n", cmds[i].usage); +} + +void +consinit(void) +{ + consprint("creepy> "); +} + +long +conswrite(char *ubuf, long count) +{ + char *c, *p, *np, *args[5]; + int nargs, i, nr; + Rune r; + static char buf[80]; + static char *s, *e; + + if(count <= 0) + return 0; + if(s == nil){ + s = buf; + e = buf + sizeof buf; + } + for(i = 0; i < count && s < e-UTFmax-1; i += nr){ + nr = chartorune(&r, ubuf+i); + memmove(s, ubuf+i, nr); + s += nr; + consprint("%C", r); + } + *s = 0; + if(s == e-1){ + s = buf; + *s = 0; + error("command is too large"); + } + if(utfrune(buf, '\n') == 0) + return count; + p = buf; + do{ + np = utfrune(p, '\n'); + if(np != nil) + *np++ = 0; + c = utfrune(p, '#'); + if(c != nil) + *c = 0; + nargs = tokenize(p, args, nelem(args)); + if(nargs < 1) + continue; + for(i = 0; i < nelem(cmds); i++){ + if(strcmp(args[0], cmds[i].name) != 0) + continue; + quiescent(Yes); + if(catcherror()){ + quiescent(No); + consprint("%r\n"); + break; + } + if(cmds[i].nargs != 0 && cmds[i].nargs != nargs) + consprint("usage: %s\n", cmds[i].usage); + else + cmds[i].f(nargs, args); + noerror(); + quiescent(No); + break; + } + if(i == nelem(cmds)) + consprint("'%s'?\n", args[0]); + }while((p = np) != nil); + s = buf; + *s = 0; + consprint("creepy> "); + return count; +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,473 @@ +#include "all.h" + +/* + * fs checks + */ +void +checktag(u64int tag, uint type, daddrt addr) +{ + + if((tag|DFdir) != TAG(type, DFdir, addr)){ + if(type == DBref) + if((tag|DFdir) == TAG(type, DFdir, addr+Dblksz)) + return; /* odd refs */ + warn("bad tag: %#ullx %#ullx\n", + tag, TAG(type, 0, addr)); + error("bad tag"); + } +} + +static int +validaddr(daddrt addr) +{ + if(addr&Fakeaddr) + return 0; + if(addr == 0) + return 1; + return addr >= Dblk0addr && addr < fs->super->d.eaddr; +} + +void +checkblk(Memblk *b) +{ + int i; + daddrt eaddr, *de; + long doff, sz; + + checktag(b->d.tag, b->type, b->addr); + switch(b->type){ + case DBfree: + warnerror("free block on disk"); + break; + case DBref: + eaddr = fs->super->d.eaddr; + for(i = 0; i < Drefperblk; i++) + if(b->d.ref[i] >= eaddr) + warnerror("ref out of range"); + break; + case DBsuper: + if(b->d.magic != MAGIC) + warnerror("super: magic"); + if(b->d.eaddr >= fs->limit || b->d.eaddr < Dblk0addr) + warnerror("super: eaddr out of range"); + if(b->d.free >= b->d.eaddr || (b->d.free && b->d.free < Dblk0addr)) + warnerror("super: free out of range"); + if(b->d.root >= b->d.eaddr || b->d.root < Dblk0addr) + warnerror("super: root out of range"); + break; + case DBattr: + if(!validaddr(b->d.next)) + warnerror("attr: next out of range"); + break; + case DBdata: + if(DBDIR(b) == 0) + break; + for(i = 0; i < Dblkdatasz/Daddrsz; i++) + if(!validaddr(b->d.ptr[i])) + warnerror("dentry out of range"); + break; + case DBfile: + if(!validaddr(b->d.aptr)) + warnerror("file: attr out of range"); + for(i = 0; i < nelem(b->d.dptr); i++) + if(!validaddr(b->d.dptr[i])) + warnerror("file: dptr out of range"); + for(i = 0; i < nelem(b->d.iptr); i++) + if(!validaddr(b->d.iptr[i])) + warnerror("file: iptr out of range"); + if(DBDIR(b) != 0){ + doff = embedattrsz(b); + if(doff > Embedsz) + warnerror("file: wrong attr size"); + sz = Embedsz-doff; + de = (daddrt*)(b->d.embed+doff); + for(i = 0; i < sz/Daddrsz; i++) + if(!validaddr(de[i])) + warnerror("file: dentry out of range"); + } + break; + default: + if(b->type < DBptr0 || b->type >= DBptr0 + Niptr) + warnerror("unknown block type"); + for(i = 0; i < Dptrperblk; i++) + if(!validaddr(b->d.ptr[i])) + warnerror("ptr: address out of range"); + } +} + +static uvlong +clearrefs(int disktoo) +{ + Memblk *b; + daddrt addr, eaddr; + uvlong nhash; + int i; + + nhash = 0; + for(i = 0; i < nelem(fs->fhash); i++) + for(b = fs->fhash[i].b; b != nil; b = b->next){ + nhash++; + b->d.cnt = 0; + } + if(disktoo == 0) + return nhash; + + eaddr = fs->super->d.eaddr; + for(addr = Dblk0addr+2*Dblksz; addr < eaddr; addr += Dblksz*Nblkgrpsz){ + if(catcherror()){ + warn("clearrefs: %r"); + return 1; + } + b = dbget(DBref, addr); + memset(b->d.data, 0, Dblkdatasz); + mbput(b); + noerror(); + } + return nhash; +} + +static int +mbcounted(Memblk *b) +{ + if(b == nil) + return 0; + if(b < fs->blk || b >= fs->blk + fs->nablk) + fatal("mbcountref: m%#p not in global array", b); + if(b->ref != b->d.cnt){ + warn("check: m%#p: found %ulld != ref %ud\n%H", + b, b->d.cnt, b->ref, b); + return 1; + } + return 0; +} + +daddrt +dbcounted(daddrt addr) +{ + Memblk *rb; + daddrt n, raddr; + int i; + + raddr = refaddr(addr, &i) + 2*Dblksz; + if(catcherror()){ + warn("dbcounted: %r"); + return 1; + } + rb = dbget(DBref, raddr); + noerror(); + n = rb->d.ref[i]; + mbput(rb); + return n; +} + +daddrt +mbcountref(Memblk *b) +{ + daddrt old; + + if(b == nil) + return 0; + if(b < fs->blk || b >= fs->blk + fs->nablk) + fatal("mbcountref: m%#p not in global array", b); + old = b->d.cnt++; + if(old == 0 && b->type == DBfile) + mbcountref(b->mf->melted); + return old; +} + +u64int +dbcountref(daddrt addr) +{ + Memblk *rb; + daddrt n, raddr; + int i; + + raddr = refaddr(addr, &i) + 2*Dblksz; + if(catcherror()){ + warn("dbcountref: %r"); + return 1; + } + rb = dbget(DBref, raddr); + noerror(); + n = rb->d.ref[i]++; + mbput(rb); + return n; +} + +static int +bcountrefs(Memblk *b, void*) +{ + if(dbcountref(b->addr) != 0) /* already counted; prune */ + return -1; + return 0; +} + +static int +dbcountfree(daddrt addr, int oktohash) +{ + Memblk *rb; + daddrt n, raddr; + int i; + + if(!oktohash && mbhashed(addr)){ + warn("check: d%#010ullx: free block in use", addr); + return 1; + } + raddr = refaddr(addr, &i) + 2*Dblksz; + if(catcherror()){ + warn("dbcountref: %r"); + return 1; + } + rb = dbget(DBref, raddr); + noerror(); + n = rb->d.ref[i]; + if(n != 0){ + warn("check: d%#010ullx: double free", addr); + mbput(rb); + return 1; + } + rb->d.ref[i] = ~0; + mbput(rb); + return 0; +} + +static long dfcountrefs(Memblk*); + +static int +fcountref(Memblk *, daddrt *de, void *a) +{ + Memblk *b; + long *nfails; + + nfails = a; + if(*de == 0) + return 0; + + if(catcherror()){ + warn("check: d%#010ullx %r", *de); + (*nfails)++; + }else{ + b = dbget(DBfile, *de); + (*nfails) += dfcountrefs(b); + noerror(); + mbput(b); + } + return 0; +} + +static long +dfcountrefs(Memblk *f) +{ + int i; + long nfails; + + nfails = 0; + isfile(f); + if((f->addr&Fakeaddr) == 0 && f->addr >= fs->limit){ + warn("check: '%s' d%#010ullx: out of range", f->mf->name, f->addr); + return 1; + } + if((f->addr&Fakeaddr) == 0) + if(dbcountref(f->addr) != 0) /* already visited */ + return 0; /* skip children */ + rwlock(f, Rd); + if(catcherror()){ + warn("check: '%s' d%#010ullx: data: %r", f->mf->name, f->addr); + rwunlock(f, Rd); + return 1; + } + for(i = 0; i < nelem(f->d.dptr); i++) + ptrmap(f->d.dptr[i], 0, bcountrefs, nil, Disk); + for(i = 0; i < nelem(f->d.iptr); i++) + ptrmap(f->d.iptr[i], i+1, bcountrefs, nil, Disk); + if(DBDIR(f)) + dfdirmap(f, fcountref, &nfails, Rd); + noerror(); + rwunlock(f, Rd); + return nfails; +} + +static long +fscheckrefs(void) +{ + long nfails; + int i; + Memblk *b; + + dprint("mblk refs...\n"); + clearrefs(Mem); + mbcountref(fs->super); + mbcountref(fs->root); + mbcountref(fs->active); + mbcountref(fs->archive); + mbcountref(fs->cons); + mbcountref(fs->stats); + mbcountref(fs->fzsuper); + countfidrefs(); + for(i = 0; i < nelem(fs->fhash); i++) + for(b = fs->fhash[i].b; b != nil; b = b->next) + mbcountref(b); + + nfails = 0; + for(i = 0; i < nelem(fs->fhash); i++) + for(b = fs->fhash[i].b; b != nil; b = b->next) + nfails += mbcounted(b); + nfails += mbcounted(fs->super); + nfails += mbcounted(fs->root); + nfails += mbcounted(fs->active); + nfails += mbcounted(fs->archive); + nfails += mbcounted(fs->cons); + nfails += mbcounted(fs->stats); + nfails += mbcounted(fs->fzsuper); + + if(nfails > 0 && dbg['D']){ + dprint("fscheckrefs: %ld fails. sleeping\n", nfails); + fsdump(1, 0); + while(1)sleep(5000); + } + return nfails; +} + +static void +dfcountfree(void) +{ + daddrt addr; + + dprint("list...\n"); + addr = fs->super->d.free; + while(addr != 0){ + if(addr < Dblksz){ + warn("check: d%#010ullx in free list", addr); + break; + } + if(addr >fs->limit){ + warn("check: d%#010ullx: free overflow", addr); + break; + } + dbcountfree(addr, 0); + addr = dbgetref(addr); + } + /* DBref blocks */ + dprint("refs...\n"); + for(addr = Dblk0addr; addr < fs->super->d.eaddr; addr += Dblksz*Nblkgrpsz){ + dbcountfree(addr, 1); /* even DBref */ + dbcountfree(addr+Dblksz, 1); /* odd DBref */ + dbcountfree(addr+2*Dblksz, 1); /* check DBref */ + } +} + +static uvlong +mleaks(void) +{ + uvlong nblk, nfails, n; + Memblk *p; + + dprint("mblk leaks...\n"); + nfails = 0; + if(fs->nblk != fs->nmused + fs->nmfree){ + warn("block leaks: %ulld blks != %ulld used + %ulld free", + fs->nblk, fs->nmused, fs->nmfree); + nfails++; + } + nblk = fs->clean.n + fs->dirty.n + fs->refs.n; + nblk++; /* super */ + nblk++; /* cons */ + nblk++; /* root */ + nblk++; /* stats */ + if(nblk != fs->nmused){ + warn("check: %ulld blocks linked != %ulld blocks used", + nblk, fs->nmused); + fs->super->unlinkpc = 0; + fs->root->unlinkpc = 0; + fs->cons->unlinkpc = 0; + fs->stats->unlinkpc = 0; + for(p = fs->free; p != nil; p = p->next) + p->unlinkpc = 0; + for(p = fs->dirty.hd; p != nil; p = p->lnext) + p->unlinkpc = 0; + for(p = fs->clean.hd; p != nil; p = p->lnext) + p->unlinkpc = 0; + for(p = fs->refs.hd; p != nil; p = p->lnext) + p->unlinkpc = 0; + for(n = 0; n < fs->nblk; n++) + if(fs->blk[n].unlinkpc != 0){ + warn("check: block unlinked at %#p:\n%H", + fs->blk[n].unlinkpc, &fs->blk[n]); + nfails++; + } + } + return nfails; +} + +static uvlong +dleaks(void) +{ + daddrt n, addr, c; + long nfails; + + dprint("dblk leaks...\n"); + clearrefs(Disk); + nfails = dfcountrefs(fs->root); + dfcountfree(); + + for(addr = Dblk0addr; addr < fs->super->d.eaddr; addr += Dblksz){ + c = dbcounted(addr); + if(c == 0){ + warn("check: d%#010ullx: leak", addr); + nfails++; + continue; + } + if(addr < Dblk0addr || c == ~0) + continue; + n = dbgetref(addr); + if(n != c){ + warn("check: d%#010ullx: found %ulld != ref %ulld", + addr, c, n); + nfails++; + } + } + return nfails; +} +/* + * Failed checks are reported but not fixed (but for leaked blocks). + * The user is expected to format the partition and restore contents from venti. + * We might easily remove the dir entries for corrupt files, and restore + */ +int +fscheck(void) +{ + long nfails; + + xqlock(&fs->fzlk); + xrwlock(&fs->quiescence, Wr); + nfails = 0; + if(catcherror()){ + xrwunlock(&fs->quiescence, Wr); + xqunlock(&fs->fzlk); + warn("check: %r"); + nfails++; + return nfails; + } + + warn("check..."); + + nfails += mleaks(); + nfails += fscheckrefs(); + if(nfails == 0) + nfails += dleaks(); + + xrwunlock(&fs->quiescence, Wr); + xqunlock(&fs->fzlk); + noerror(); + if(nfails > 0 && dbg['D']){ + dprint("fscheck: %ld fails. sleeping\n", nfails); + fsdump(0, 1); + while(1)sleep(5000); + } + if(nfails) + warn("check fails"); + else + warn("check passes"); + return nfails; +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,61 @@ + + +enum +{ + KiB = 1024UL, + MiB = KiB * 1024UL, + GiB = MiB * 1024UL, + +#ifdef TESTING + Incr = 1, + Fsysmem = 800*KiB, /* size for in-memory block array */ + + /* disk parameters; don't change */ + Dblksz = 512UL, /* disk block size */ + Ndptr = 2, /* # of direct data pointers */ + Niptr = 2, /* # of indirect data pointers */ +#else + Incr = 16, + Fsysmem = 1*GiB+GiB/2UL, /* size for in-memory block array */ + + /* disk parameters; don't change */ + Dblksz = 8*KiB, /* disk block size */ + Ndptr = 8, /* # of direct data pointers */ + Niptr = 4, /* # of indirect data pointers */ + +#endif + + Syncival = 5*60, /* desired sync intervals (s) */ + Mmaxdirtypcent = 50, /* Max % of blocks dirty in mem */ + Mminfree = 200, /* # blocks when low on mem blocks */ + Dminfree = 2000, /* # blocks when low on disk blocks */ + Dminattrsz = Dblksz/2, /* min size for attributes */ + + Nahead = 10 * Dblksz, /* # of bytes to read ahead */ + + /* + * Caution: Stack and Errstack also limit the max tree depth, + * because of recursive routines (in the worst case). + */ + Stack = 128*KiB, /* stack size for threads */ + Errstack = 128, /* max # of nested error labels */ + Npathels = 64, /* max depth (only used by walkto) */ + + Fhashsz = 7919, /* size of file hash (plan9 has 35454 files). */ + Fidhashsz = 97, /* size of the fid hash size */ + Uhashsz = 97, + + Rpcspercli = 20, /* != 0 places a limit */ + + Nlstats = 1009, /* # of lock profiling entries */ + + Mmaxfree = 10*Mminfree, /* high on mem blocks */ + Dmaxfree = 2*Dminfree, /* high on disk blocks */ + Mzerofree = 10, /* out of memory blocks */ + Dzerofree = 10, /* out of disk blocks */ + + Unamesz = 20, + Statsbufsz = 1024, + +}; + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,43 @@ +#include +#include +#include + +char dbg[256]; +static char sdbg[256]; +static Ref nodbg; + +void +nodebug(void) +{ + incref(&nodbg); + if(nodbg.ref == 1) + memmove(sdbg, dbg, sizeof dbg); + memset(dbg, 0, sizeof dbg); +} + +void +debug(void) +{ + if(decref(&nodbg) == 0) + memmove(dbg, sdbg, sizeof dbg); +} + +int +setdebug(void) +{ + int r; + + r = nodbg.ref; + if(r > 0) + memmove(dbg, sdbg, sizeof dbg); + return r; +} + +void +rlsedebug(int r) +{ + nodbg.ref = r; + if(r > 0) + memset(dbg, 0, sizeof dbg); +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,30 @@ +/* + * '9': 9p + * 'N': mblk/dblk alloc/free chdentry, drefs + * 'D': disk + * 'E': fids + * 'F': slices, indirects, dirnth + * 'K': reclaim + * 'M': mblk/dblk gets puts + * 'P': procs + * 'R': block read + * 'W': block write + * 'X': ix + * 'd': general debug + * 'O': lru blocks out + * 'Z': policy + */ +#define d9print(...) if(!dbg['9']){}else fprint(2, __VA_ARGS__) +#define dNprint(...) if(!dbg['N']){}else fprint(2, __VA_ARGS__) +#define dEprint(...) if(!dbg['E']){}else fprint(2, __VA_ARGS__) +#define dFprint(...) if(!dbg['F']){}else fprint(2, __VA_ARGS__) +#define dKprint(...) if(!dbg['K']){}else fprint(2, __VA_ARGS__) +#define dMprint(...) if(!dbg['M']){}else fprint(2, __VA_ARGS__) +#define dPprint(...) if(!dbg['P']){}else fprint(2, __VA_ARGS__) +#define dRprint(...) if(!dbg['R']){}else fprint(2, __VA_ARGS__) +#define dWprint(...) if(!dbg['W']){}else fprint(2, __VA_ARGS__) +#define dXprint(...) if(!dbg['X']){}else fprint(2, __VA_ARGS__) +#define dOprint(...) if(!dbg['O']){}else fprint(2, __VA_ARGS__) +#define dZprint(...) if(!dbg['Z']){}else fprint(2, __VA_ARGS__) +#define dprint(...) if(!dbg['d']){}else fprint(2, __VA_ARGS__) +extern char dbg[256]; --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,700 @@ +#include "all.h" + +/* + * disk blocks, built upon memory blocks provided by mblk.c + * see dk.h + */ + +int swreaderr, swwriteerr; + +static void +okaddr(daddrt addr) +{ + if((addr&Fakeaddr) == 0 && (addr < Dblksz || addr >= fs->limit)) + warnerror("bad address d%#010ullx", addr); +} + +static void +okdiskaddr(daddrt addr) +{ + if((addr&Fakeaddr) != 0 || addr < Dblksz || addr >= fs->limit) + fatal("okdiskaddr %#ullx", addr); +} + +void +dbclear(u64int tag, daddrt addr) +{ + static Diskblk d; + static QLock lk; + + dWprint("dbclear d%#ullx\n", addr); + xqlock(&lk); + d.tag = tag; + if(pwrite(fs->fd, &d, sizeof d, addr) != Dblksz){ + xqunlock(&lk); + warnerror("dbclear: d%#ullx: %r", addr); + } + xqunlock(&lk); +} + +void +meltedref(Memblk *rb) +{ + if(canqlock(&fs->refs)) + fatal("meltedref rlk"); + if(rb->frozen){ + dWprint("melted ref dirty=%d\n", rb->dirty); + dbwrite(rb); + rb->frozen = 0; + } +} + +static daddrt +newblkaddr(void) +{ + daddrt addr, naddr; + + xqlock(fs); + if(catcherror()){ + xqunlock(fs); + error(nil); + } +Again: + if(fs->super == nil) + addr = Dblksz; + else if(fs->super->d.free != 0){ + addr = fs->super->d.free; + okdiskaddr(addr); + /* + * Caution: can't acquire new locks while holding the fs lock, + * but dbgetref may allocate blocks. + */ + xqunlock(fs); + if(catcherror()){ + xqlock(fs); /* restore the default in this fn. */ + error(nil); + } + naddr = dbgetref(addr); /* acquires locks */ + if(naddr != 0) + okdiskaddr(naddr); + noerror(); + xqlock(fs); + if(addr != fs->super->d.free){ + /* had a race */ + goto Again; + } + fs->super->d.free = naddr; + fs->super->d.ndfree--; + changed(fs->super); + }else if(fs->super->d.eaddr < fs->limit){ + addr = fs->super->d.eaddr; + fs->super->d.eaddr += Dblksz; + changed(fs->super); + /* + * ref blocks are allocated and initialized on demand, + * and they must be zeroed before used. + * do this holding the lock so others find everything + * initialized. + */ + if(((addr-Dblk0addr)/Dblksz)%Nblkgrpsz == 0){ + dprint("new ref blk addr = d%#ullx\n", addr); + /* on-demand fs initialization */ + dbclear(TAG(DBref, 0, addr), addr); + dbclear(TAG(DBref, 0, addr+Dblksz), addr+Dblksz); + dbclear(TAG(DBref, 0, addr+2*Dblksz), addr+2*Dblksz); + addr += 3*Dblksz; + fs->super->d.eaddr += 3*Dblksz; + if(fs->super->d.eaddr > fs->limit) + sysfatal("disk is full"); + } + }else{ + addr = 0; + /* preserve backward compatibility with fossil */ + sysfatal("disk is full"); + } + + noerror(); + xqunlock(fs); + okaddr(addr); + dNprint("newblkaddr = d%#ullx\n", addr); + return addr; +} + +daddrt +addrofref(daddrt refaddr, int idx) +{ + return refaddr + idx*Dblksz; +} + +daddrt +refaddr(daddrt addr, int *idx) +{ + daddrt bno, refaddr; + + addr -= Dblk0addr; + bno = addr/Dblksz; + *idx = bno%Nblkgrpsz; + refaddr = Dblk0addr + bno/Nblkgrpsz * Nblkgrpsz * Dblksz; + return refaddr; +} + +/* + * db*ref() functions update the on-disk reference counters. + * memory blocks use Memblk.Ref instead. Beware. + */ +static daddrt +dbaddref(daddrt addr, int delta, int set, Memblk **rbp, int *ip) +{ + Memblk *rb; + daddrt raddr, ref; + int i; + + if(addr == 0) + return 0; + if(addr&Fakeaddr) /* root and ctl files don't count */ + return 0; + + raddr = refaddr(addr, &i); + rb = dbget(DBref, raddr); + + xqlock(&fs->refs); + if(catcherror()){ + mbput(rb); + xqunlock(&fs->refs); + debug(); + error(nil); + } + if(delta != 0 || set != 0){ + if(delta != 0){ + if(rb->d.ref[i] >= Dblksz) + fatal("dbaddref: d%#010ullx in free list", rb->d.ref[i]); + if(rb->d.ref[i] == 0) + fatal("dbaddref: d%#010ullx double free", rb->d.ref[i]); + } + meltedref(rb); + if(set) + rb->d.ref[i] = set; + else + rb->d.ref[i] += delta; + rb->dirty = 1; + } + ref = rb->d.ref[i]; + noerror(); + xqunlock(&fs->refs); + if(rbp == nil) + mbput(rb); + else + *rbp = rb; + if(ip != nil) + *ip = i; + return ref; +} + +daddrt +dbgetref(daddrt addr) +{ + if(fs->worm) + return 6ULL; + return dbaddref(addr, 0, 0, nil, nil); +} + +void +dbsetref(daddrt addr, int ref) +{ + daddrt n; + + if(fs->worm) + return; + n = dbaddref(addr, 0, ref, nil, nil); + dNprint("dbsetref %#010ullx -> %ulld\tpc %#p\n", addr, n, getcallerpc(&addr)); +} + +daddrt +dbincref(daddrt addr) +{ + daddrt n; + + if(fs->worm) + return 6ULL; + n = dbaddref(addr, +1, 0, nil, nil); + dNprint("dbincref %#010ullx -> %ulld\tpc %#p\n", addr, n, getcallerpc(&addr)); + return n; +} + +static daddrt +dbdecref(daddrt addr, Memblk **rb, int *idx) +{ + daddrt n; + + if(fs->worm) + return 6ULL; + n = dbaddref(addr, -1, 0, rb, idx); + dNprint("dbdecref %#010ullx -> %ulld\tpc %#p\n", addr, n, getcallerpc(&addr)); + return n; +} + +static void +nodoublefree(daddrt addr) +{ + daddrt a; + + if(addr == 0) + return; + for(a = fs->super->d.free; a != 0; a = dbgetref(a)) + if(a == addr) + fatal("double free for addr d%#ullx", addr); +} + +static long xdbput(Memblk *b, int type, daddrt addr, int isdir); + +static long +dropdentries(void *p, int n) +{ + int i; + daddrt *d; + long tot; + + tot = 0; + d = p; + for(i = 0; i < n; i++) + if(d[i] != 0) + tot += xdbput(nil, DBfile, d[i], 0); + return tot; +} + +/* + * Drop a on-disk reference. + * When no references are left, the block is unlinked from the hash + * (and its hash ref released), and disk references to blocks pointed to by + * this blocks are also decremented (and perhaps such blocks released). + * + * More complex than needed, because we don't want to read a data block + * just to release a reference to it, unless it's a data block for a directory. + * + * b may be nil if type and addr are given, for recursive calls. + */ + +static long +xdbput(Memblk *b, int type, daddrt addr, int isdir) +{ + daddrt ref; + Memblk *mb, *rb; + int i, idx, sz; + uvlong doff; + long tot; + + if(b == nil && addr == 0) + return 0; + if(fs->worm) + return 1; + okdiskaddr(addr); + ref = dbgetref(addr); + if(ref > 2*Dblksz) + fatal("dbput: d%#010ullx: double free", addr); + + ref = dbdecref(addr, &rb, &idx); + if(ref != 0){ + dKprint("dbput: d%#010ullx dr %#ullx type %s\n", + addr, ref, tname(type)); + mbput(rb); + return 0; + } + /* + * Gone from disk, be sure it's also gone from memory. + */ + if(catcherror()){ + mbput(rb); + error(nil); + } + mb = b; + if(mb == nil){ + if(isdir || type != DBdata) + mb = dbget(type, addr); + else + mb = mbget(type, addr, Dontmk); + } + + dKprint("dbput: free: %H\n", mb); + tot = 1; + if(mb != nil){ + isdir |= DBDIR(mb); + assert(type == mb->type && addr == mb->addr && mb->ref > 1); + mbunhash(mb, 0); + } + if(catcherror()){ + if(mb != b) + mbput(mb); + error(nil); + } + switch(type){ + case DBsuper: + case DBref: + fatal("dbput: super or ref"); + case DBdata: + if(isdir) + tot += dropdentries(mb->d.data, Dblkdatasz/Daddrsz); + break; + case DBattr: + break; + case DBfile: + if(isdir) + assert(mb->d.mode&DMDIR); + else + assert((mb->d.mode&DMDIR) == 0); + tot += xdbput(nil, DBattr, mb->d.aptr, 0); + for(i = 0; i < nelem(mb->d.dptr); i++){ + tot += xdbput(nil, DBdata, mb->d.dptr[i], isdir); + mb->d.dptr[i] = 0; + } + for(i = 0; i < nelem(mb->d.iptr); i++){ + tot += xdbput(nil, DBptr0+i, mb->d.iptr[i], isdir); + mb->d.iptr[i] = 0; + } + if(isdir){ + doff = embedattrsz(mb); + sz = Embedsz-doff; + tot += dropdentries(mb->d.embed+doff, sz/Daddrsz); + } + break; + default: + if(type < DBptr0 || type >= DBptr0+Niptr) + fatal("dbput: type %d", type); + for(i = 0; i < Dptrperblk; i++){ + tot += xdbput(nil, mb->type-1, mb->d.ptr[i], isdir); + mb->d.ptr[i] = 0; + } + } + noerror(); + + if(mb != b) + mbput(mb); + + if(dbg['d']) + assert(mbget(type, addr, Dontmk) == nil); + + if(dbg['K']) + nodoublefree(addr); + xqlock(fs); + xqlock(&fs->refs); + rb->d.ref[idx] = fs->super->d.free; + fs->super->d.free = addr; + fs->super->d.ndfree++; + xqunlock(&fs->refs); + xqunlock(fs); + noerror(); + mbput(rb); + + return tot; +} + +long +dbput(Memblk *b) +{ + if(b == nil) + return 0; + return xdbput(b, b->type, b->addr, DBDIR(b)); +} + +static daddrt +newfakeaddr(void) +{ + static daddrt addr = ~0; + daddrt n; + + xqlock(fs); + addr -= Dblksz; + n = addr; + xqunlock(fs); + return n|Fakeaddr; +} + +Memblk* +dballocz(uint type, int dbit, int zeroit) +{ + Memblk *b; + daddrt addr; + int ctl; + + ctl = type == DBctl; + if(ctl){ + type = DBfile; + addr = newfakeaddr(); + }else + addr = newblkaddr(); + b = mballocz(addr, zeroit); + b->d.tag = TAG(type, dbit, b->addr); + b->type = type; + if(catcherror()){ + mbput(b); + debug(); + error(nil); + } + if((addr&Fakeaddr) == 0 && addr >= Dblk0addr) + dbsetref(addr, 1); + if(type == DBfile) + b->mf = anew(&mfalloc); + b = mbhash(b); + changed(b); + noerror(); + dNprint("dballoc %s -> %H\n", tname(type), b); + return b; +} + +/* + * BUG: these should ensure that all integers are converted between + * little endian (disk format) and the machine endianness. + * We know the format of all blocks and the type of all file + * attributes. Those are the integers to convert to fix the bug. + */ +static Memblk* +hosttodisk(Memblk *b) +{ + if(catcherror()) + fatal("hosttodisk: bad tag"); + checktag(b->d.tag, b->type, b->addr); + noerror(); + incref(b); + return b; +} + +static void +disktohost(Memblk *b) +{ + static union + { + u64int i; + uchar m[BIT64SZ]; + } u; + + u.i = 0x1122334455667788ULL; + if(u.m[0] != 0x88) + fatal("fix hosttodisk/disktohost for big endian"); + checkblk(b); +} + +static int +isfakeref(daddrt addr) +{ + addr -= Dblk0addr; + addr /= Dblksz; + return (addr%Nblkgrpsz) == 2; +} + +/* + * Write the block a b->addr. + * DBrefs are written at even (b->addr) or odd (b->addr+DBlksz) + * reference blocks as indicated by the frozen super block to be written. + */ +long +dbwrite(Memblk *b) +{ + Memblk *nb; + static int nw; + daddrt addr; + + if(b->addr&Fakeaddr) + fatal("dbwrite: fake addr %H", b); + if(b->dirty == 0) + return 0; + addr = b->addr; + /* + * super switches between even/odd DBref blocks, plus there's a + * fake DBref block used just for fscheck() counters. + */ + if(b->type == DBref){ + assert(fs->fzsuper != nil); + if(fs->fzsuper->d.oddrefs && !isfakeref(b->addr)) + addr += Dblksz; + } + dWprint("dbwriting at d%#010ullx %H\n",addr, b); + nb = hosttodisk(b); + if(swwriteerr != 0 && ++nw > swwriteerr){ + written(b); /* what can we do? */ + mbput(nb); + warnerror("dbwrite: sw fault"); + } + if(pwrite(fs->fd, &nb->d, sizeof nb->d, addr) != Dblksz){ + written(b); /* what can we do? */ + mbput(nb); + warnerror("dbwrite: d%#ullx: %r", b->addr); + } + written(b); + mbput(nb); + + return Dblksz; +} + +long +dbread(Memblk *b) +{ + static int nr; + long tot, n; + uchar *p; + daddrt addr; + + if(b->addr&Fakeaddr) + fatal("dbread: fake addr %H", b); + p = b->d.ddata; + addr = b->addr; + /* + * super switches between even/odd DBref blocks, plus there's a + * fake DBref block used just for fscheck() counters. + */ + if(b->type == DBref && fs->super->d.oddrefs && !isfakeref(b->addr)) + addr += Dblksz; + for(tot = 0; tot < Dblksz; tot += n){ + if(swreaderr != 0 && ++nr > swreaderr) + warnerror("dbread: sw fault"); + n = pread(fs->fd, p+tot, Dblksz-tot, addr + tot); + if(n == 0) + werrstr("eof on disk file"); + if(n <= 0) + warnerror("dbread: d%#ullx: %r", b->addr); + } + assert(tot == sizeof b->d && tot == Dblksz); + + dRprint("dbread from d%#010ullx tag %#ullx %H\n", addr, b->d.tag, b); + disktohost(b); + if(b->type != DBref) + b->frozen = 1; + + return tot; +} + +Memblk* +dbget(uint type, daddrt addr) +{ + Memblk *b; + + dMprint("dbget %s d%#010ullx\n", tname(type), addr); + okaddr(addr); + b = mbget(type, addr, Mkit); + if(b == nil) + error("i/o error"); + if(b->loading == 0) + return b; + + /* the file is new, must read it */ + if(catcherror()){ + xqunlock(&b->newlk); /* awake those waiting for it */ + mbunhash(b, 0); /* put our ref and the hash ref */ + mbput(b); + error(nil); + } + dbread(b); + checktag(b->d.tag, type, addr); + assert(b->type == type); + if(type == DBfile){ + assert(b->mf == nil); + b->mf = anew(&mfalloc); + gmeta(b, b->d.embed, Embedsz); + if(b->d.mode&DMDIR) + assert(DBDIR(b)); + else + assert(!DBDIR(b)); + } + b->loading = 0; + noerror(); + xqunlock(&b->newlk); + return b; +} + +static void +dupdentries(void *p, int n) +{ + int i; + daddrt *d; + + d = p; + for(i = 0; i < n; i++) + if(d[i] != 0){ + dNprint("add ref on dup d%#ullx\n", d[i]); + dbincref(d[i]); + } +} + +/* + * caller responsible for locking. + * On errors we may leak disk blocks because of added references. + * Isdir flags that the block belongs to a dir, so we could add references + * to dir entries. + */ +Memblk* +dbdup(Memblk *b) +{ + Memblk *nb; + int i; + ulong doff, sz; + + nb = dballocz(b->type, DBDIR(b), 0); + if(catcherror()){ + mbput(nb); + error(nil); + } + switch(b->type){ + case DBfree: + case DBref: + case DBsuper: + case DBattr: + fatal("dbdup: %s", tname(b->type)); + case DBdata: + memmove(nb->d.data, b->d.data, Dblkdatasz); + if(DBDIR(b) != 0) + dupdentries(b->d.data, Dblkdatasz/Daddrsz); + break; + case DBfile: + if(!b->frozen) + isrwlocked(b, Rd); + nb->d.asize = b->d.asize; + nb->d.aptr = b->d.aptr; + nb->d.ndents = b->d.ndents; + if(nb->d.aptr != 0) + dbincref(b->d.aptr); + for(i = 0; i < nelem(b->d.dptr); i++){ + nb->d.dptr[i] = b->d.dptr[i]; + if(nb->d.dptr[i] != 0) + dbincref(b->d.dptr[i]); + } + for(i = 0; i < nelem(b->d.iptr); i++){ + nb->d.iptr[i] = b->d.iptr[i]; + if(nb->d.iptr[i] != 0) + dbincref(b->d.iptr[i]); + } + nb->d.Dmeta = b->d.Dmeta; + memmove(nb->d.embed, b->d.embed, Embedsz); + gmeta(nb, nb->d.embed, Embedsz); + if(DBDIR(b) != 0){ + doff = embedattrsz(nb); + sz = Embedsz-doff; + dupdentries(nb->d.embed+doff, sz/Daddrsz); + } + /* + * no race: caller takes care. + */ + if(b->frozen && b->mf->melted == nil){ + incref(nb); + b->mf->melted = nb; + } + break; + default: + if(b->type < DBptr0 || b->type >= DBptr0 + Niptr) + fatal("dbdup: bad type %d", b->type); + for(i = 0; i < Dptrperblk; i++){ + nb->d.ptr[i] = b->d.ptr[i]; + if(nb->d.ptr[i] != 0) + dbincref(nb->d.ptr[i]); + } + } + changed(nb); + noerror(); + + /* when b is a frozen block, it's likely we won't use it more, + * because we now have a melted one. + * pretend it's the lru one. + */ + if(b->frozen) + mbunused(b); + + return nb; +} + Binary files /sys/src/cmd/creepydir/disk and creepy/disk differ Binary files /sys/src/cmd/creepydir/disk1 and creepy/disk1 differ --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,563 @@ +typedef struct Ddatablk Ddatablk; +typedef struct Dptrblk Dptrblk; +typedef struct Drefblk Drefblk; +typedef struct Dattrblk Dattrblk; +typedef struct Dfileblk Dfileblk; +typedef struct Dsuperblk Dsuperblk; +typedef struct Dsuperdata Dsuperdata; +typedef union Diskblk Diskblk; +typedef struct Diskblkhdr Diskblkhdr; +typedef struct Memblk Memblk; +typedef struct Fsys Fsys; +typedef struct Dmeta Dmeta; +typedef struct Blksl Blksl; +typedef struct Mfile Mfile; +typedef struct Cmd Cmd; +typedef struct Path Path; +typedef struct Alloc Alloc; +typedef struct Next Next; +typedef struct Lstat Lstat; +typedef struct List List; +typedef struct Link Link; +typedef struct Usr Usr; +typedef struct Member Member; +typedef struct Meminfo Meminfo; + +/* + * Conventions: + * + * References: + * - Ref is used for in-memory RCs. This has nothing to do with on-disk refs. + * - Mem refs include the reference from the hash. That one keeps the file + * loaded in memory while unused. + * - The hash ref also accounts for refs from the lru/ref/dirty lists. + * - Disk refs count only references within the tree on disk. + * - There are two copies of disk references, even, and odd. + * Only one of them is active. Every time the system is written, + * the inactive copy becomes active and vice-versa. Upon errors, + * the active copy on disk is always coherent because the super is + * written last. + * - Children do not add refs to parents; parents do not add ref to children. + * - 9p, fscmd, ix, and other top-level shells for the fs are expected to + * keep Paths for files in use, so that each file in the path + * is referenced once by the path + * - example, on debug fsdump()s: + * r=2 -> 1 (from hash) + 1 (while dumping the file info). + * (block is cached, in the hash, but unused otherwise). + * r=3 in /active: 1 (hash) + 1(fs->active) + 1(dump) + * r is greater: + * - some fid is referencing the block + * - it's a melt and the frozen f->mf->melted is a ref. + * - some rpc is using it (reading/writing/...) + * + * Assumptions: + * - /active is *never* found on disk, it's memory-only. + * - b->addr is worm. + * - parents of files loaded in memory are also in memory. + * (but this does not hold for pointer and data blocks). + * - We try not to hold more than one lock, using the + * reference counters when we need to be sure that + * an unlocked resource does not vanish. + * - reference blocks are never removed from memory. + * - disk refs are frozen while waiting to go to disk during a fs freeze. + * in which case db*ref functions write the block in place and melt it. + * - frozen blocks are quiescent. + * - mb*() functions do not raise errors. + * + * Locking: + * - the caller to functions in [mbf]blk.c acquires the locks before + * calling them, and makes sure the file is melted if needed. + * This prevents races and deadlocks. + * - blocks are locked by the file responsible for them, when not frozen. + * - next fields in blocks are locked by the list they are used for. + * + * Lock order: + * - fs, super,... : while locked can't acquire fs or blocks. + * - parent -> child + * (but a DBfile protects all ptr and data blocks under it). + * - block -> ref block + * + * All the code assumes outofmemoryexits = 1. + */ + +/* + * these are used by several functions that have flags to indicate + * mem-only, also on disk; and read-access/write-access. (eg. dfmap). + */ +enum{ + Mem=0, + Disk, + + Rd=0, + Wr, + + Dontmk = 0, + Mkit, + + Tqlock = 0, + Trwlock, + Tlock, + + No = 0, + Yes, + + Normal = 0, + Worm, + + Pre = 0, + Post, + + /* mtime is ns in creepy, but s in 9p */ + NSPERSEC = 1000000000ULL, +}; + + +struct Lstat +{ + int type; + uintptr pc; + int ntimes; + int ncant; + vlong wtime; +}; + +enum +{ + DMUSERS = 0x01000000ULL, + DMBITS = DMDIR|DMAPPEND|DMEXCL|DMTMP|0777, +}; + +#define HOWMANY(x, y) (((x)+((y)-1))/(y)) +#define ROUNDUP(x, y) (HOWMANY((x), (y))*(y)) + +/* + * ##### On disk structures. ##### + * + * All on-disk integer values are little endian. + * + * blk 0: unused + * blk 1: super + * even ref blk + odd ref blk + check ref blk + Nblkgrpsz-3 blocks + * ... + * even ref blk + odd ref blk + check ref blk + Nblkgrpsz-3 blocks + * + * The code assumes these structures are packed. + * Be careful if they are changed to make things easy for the + * compiler and keep them naturally aligned. + */ + +/* + * header for all disk blocks. + */ +struct Diskblkhdr +{ + u64int tag; /* block tag */ + u64int cnt; /* ref counter during checks */ +}; + +enum +{ + /* block types */ + DBfree = 0, + DBref, + DBattr, + DBfile, + DBsuper, + DBdata, /* 5: data block */ + DBptr0 = DBdata+1, /* simple-indirect block */ + /* double */ + /* triple */ + /*...*/ + DBctl = ~0, /* DBfile, never on disk. arg for dballoc */ + + Daddrsz = BIT64SZ, + Dblkhdrsz = sizeof(Diskblkhdr), + Nblkgrpsz = (Dblksz - Dblkhdrsz) / Daddrsz, + Dblk0addr = 2*Dblksz, + +}; + +typedef u64int daddrt; /* disk addreses and sizes */ + +struct Ddatablk +{ + uchar data[1]; /* raw memory */ +}; + +struct Dptrblk +{ + daddrt ptr[1]; /* array of block addresses */ +}; + +struct Drefblk +{ + daddrt ref[1]; /* disk RC or next block in free list */ +}; + +struct Dattrblk +{ + daddrt next; /* next block used for attribute data */ + uchar attr[1]; /* raw attribute data */ +}; + +struct Dmeta /* mandatory metadata */ +{ + u64int id; /* ctime, actually */ + u64int mode; + u64int atime; + u64int mtime; + u64int length; + u64int uid; + u64int gid; + u64int muid; + /* name\0 */ +}; + +/* + * The trailing part of the file block is used to store attributes + * and initial file data. + * At least Dminattrsz is reserved for attributes, at most + * all the remaining embedded space. + * Past the attributes, starts the file data. + * If more attribute space is needed, an attribute block is allocated. + * For huge attributes, it is suggested that a file is allocated and + * the attribute value refers to that file. + * The pointer in iptr[n] is an n-indirect data pointer. + * + * Directories are also files, but their data is simply an array of + * disk addresses for files. + * + * To ensure embed is a multiple of dir entries, we declare it here as [8] + * and not as [1]. + */ +struct Dfileblk +{ + u64int asize; /* attribute size */ + u64int ndents; /* # of directory entries, for dirs */ + daddrt aptr; /* attribute block pointer */ + daddrt dptr[Ndptr]; /* direct data pointers */ + daddrt iptr[Niptr]; /* indirect data pointers */ + Dmeta; /* predefined attributes, followed by name */ + uchar embed[Daddrsz]; /* embedded attrs and data */ +}; + +#define MAGIC 0x6699BCB06699BCB0ULL +/* + * Superblock. + * The stored tree is: + * archive/ root of the archived tree + * + * ... + * (/ and /active are only memory and never on disk, parts + * under /active that are on disk are shared with entries in /archive) + * + * It contains two copies of the information, Both should be identical. + * If there are errors while writing this block, the one with the + * oldest epoch should be ok. + */ +struct Dsuperdata +{ + u64int magic; /* MAGIC */ + u64int epoch; + daddrt free; /* first free block on list */ + daddrt eaddr; /* end of the assigned disk portion */ + daddrt root; /* address of /archive in disk */ + u64int oddrefs; /* use odd ref blocks? or even ref blocks? */ + u64int ndfree; /* # of blocks in free list */ + u64int maxuid; /* 1st available uid */ + u64int dblksz; /* only for checking */ + u64int nblkgrpsz; /* only for checking */ + u64int dminattrsz; /* only for checking */ + u64int ndptr; /* only for checking */ + u64int niptr; /* only for checking */ + u64int _avail_; /* now unused */ + u64int embedsz; /* only for checking */ + u64int dptrperblk; /* only for checking */ +}; + +struct Dsuperblk +{ + union{ + Dsuperdata; + uchar align[Dblksz/2]; + }; + Dsuperdata dup; +}; + +enum +{ + /* addresses for ctl files and / have this bit set, and are never + * found on disk. + */ + Fakeaddr = 0x8000000000000000ULL, + Noaddr = ~0ULL, +}; + +enum +{ + DFdir = 0x80U, /* flag for directories in tags */ + DFreg = 0, /* flag for files in tags */ +}; +#define TAG(type,dir,addr) ((addr)<<8|((dir)&DFdir)|((type)&0x7F)) +#define TAGTYPE(t) ((t)&0x7F) +#define TAGDIR(t) ((t)&DFdir) +#define DBDIR(b) ((b)->d.tag&DFdir) +#define TAGADDROK(t,addr) (((t)&~0xFF) == ((addr)<<8)) + +/* + * disk blocks + */ + +union Diskblk +{ + struct{ + Diskblkhdr; + union{ + Ddatablk; /* data block */ + Dptrblk; /* pointer block */ + Drefblk; /* reference counters block */ + Dattrblk; /* attribute block */ + Dfileblk; /* file block */ + Dsuperblk; + }; + }; + uchar ddata[Dblksz]; +}; + +/* + * These are derived. + * Embedsz must compensate that embed[] was declared as embed[Daddrsz], + * to make it easy for the compiler to keep things aligned on 64 bits. + */ +enum +{ + Dblkdatasz = sizeof(Diskblk) - sizeof(Diskblkhdr), + Embedsz = Dblkdatasz - sizeof(Dfileblk) + Daddrsz, + Dptrperblk = Dblkdatasz / Daddrsz, + Drefperblk = Dblkdatasz / Daddrsz, +}; + + +/* + * File attributes are name/value pairs. + * By now, only mandatory attributes are implemented, and + * have names implied by their position in the Dmeta structure. + */ + +/* + * ##### On memory structures. ##### + */ + +/* + * On memory file information. + */ +struct Mfile +{ + Mfile* next; /* in free list */ + RWLock; + + char *uid; /* reference to the user table */ + char *gid; /* reference to the user table */ + char *muid; /* reference to the user table */ + char *name; /* reference to the disk block */ + + Memblk* melted; /* next version for this one, if frozen */ + ulong lastbno; /* last accessed block nb within this file */ + ulong sequential; /* access has been sequential */ + + int open; /* for DMEXCL */ + int users; /* is this /active/users? */ + uvlong raoffset; /* we did read ahead up to this offset */ +}; + +struct List +{ + QLock; + Memblk *hd; + Memblk *tl; + long n; +}; + +struct Link +{ + Memblk *lprev; + Memblk *lnext; +}; + +/* + * memory block + */ +struct Meminfo +{ + Ref; + daddrt addr; /* block address */ + Memblk *next; /* in hash or free list */ + + Link; /* clean / dirty / ref lists */ + + Mfile *mf; /* DBfile on-memory info. */ + + int type; + Lock dirtylk; + int dirty; /* must be written */ + int frozen; /* is frozen */ + int loading; /* block is being read */ + int changed; /* for freerefs/writerefs */ + QLock newlk; /* only to wait on DBnew blocks */ + + uintptr unlinkpc; +}; + +struct Memblk +{ + Meminfo; + Diskblk d; +}; + +/* + * Slice into a block, used to read/write file blocks. + */ +struct Blksl +{ + Memblk *b; + void *data; + long len; +}; + +struct Fsys +{ + QLock; + + struct{ + QLock; + Memblk *b; + } fhash[Fhashsz]; /* hash of blocks by address */ + + Memblk *blk; /* static global array of memory blocks */ + uvlong nblk; /* # of entries used */ + uvlong nablk; /* # of entries allocated */ + uvlong nmused; /* blocks in use */ + uvlong nmfree; /* free blocks */ + Memblk *free; /* free list of unused blocks in blk */ + + List clean; /* hd: mru; tl: lru */ + List dirty; /* dirty blocks */ + List refs; /* DBref blocks, neither in clean nor dirty lists */ + + QLock mlk; + Mfile *mfree; /* unused list */ + + + Memblk *super; /* locked by blklk */ + Memblk *root; /* only in memory */ + Memblk *active; /* /active */ + Memblk *archive; /* /archive */ + Memblk *cons; /* /cons */ + Memblk *stats; /* /stats */ + Channel *consc; /* of char*; output for /cons */ + + Memblk *fzsuper; /* frozen super */ + + char *dev; /* name for disk */ + int fd; /* of disk */ + daddrt limit; /* address for end of disk */ + daddrt ndblk; /* # of disk blocks in dev */ + + int nindirs[Niptr]; /* stats */ + int nmelts; + + QLock fzlk; /* freeze, melt, check, write */ + RWLock quiescence; /* any activity rlocks() this */ + QLock policy; /* fspolicy */ + + uvlong atime; /* updated on each request */ + uvlong wtime; /* time for last fswrite */ + + int profile; /* measure times for rpcs */ + int halt; /* user wants to halt */ + int worm; /* operate in worm mode */ + int mode; /* Rd | Wr */ +}; + +/* + * Misc tools. + */ + +struct Cmd +{ + char *name; + void (*f)(int, char**); + int nargs; + char *usage; +}; + +struct Next +{ + Next *next; +}; + +struct Alloc +{ + QLock; + Next *free; + ulong nfree; + ulong nalloc; + usize elsz; + int zeroing; +}; + +/* + * Used to keep references to parents crossed to + * reach files, to be able to build a melted version of the + * children. Also to know the parent of a file for things like + * removals. + */ +struct Path +{ + Path* next; /* in free list */ + Ref; + Memblk** f; + int nroot; + int nf; + int naf; +}; + +struct Member +{ + Member *next; + Usr *u; +}; + +struct Usr +{ + Usr *nnext; /* next by name */ + Usr *inext; /* next by id */ + + int id; + int enabled; + int allow; + Usr *lead; + char name[Unamesz]; + Member *members; +}; + + +#pragma varargck type "H" Memblk* +#pragma varargck type "A" Usr* +#pragma varargck type "P" Path* +#pragma varargck argpos fatal 1 +#pragma varargck argpos warn 1 +#pragma varargck argpos warnerror 1 + +/* used in debug prints to print just part of huge values */ +#define EP(e) ((e)&0xFFFFFFFFUL) + +typedef int(*Blkf)(Memblk*, void*); +typedef int (*Dirf)(Memblk *b, daddrt *de, void *p); + + +extern Fsys*fs; +extern uvlong maxfsz; +extern Alloc mfalloc, pathalloc; +extern int swreaderr, swwriteerr; +extern int fatalaborts; --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,1017 @@ +#include "all.h" + +/* + * File blocks. + * see dk.h + */ + +Path* +walkpath(Memblk *f, char *elems[], int nelems) +{ + int i; + Memblk *nf; + Path *p; + + p = newpath(f); + if(catcherror()){ + putpath(p); + error(nil); + } + isfile(f); + for(i = 0; i < nelems; i++){ + if((f->d.mode&DMDIR) == 0) + error("not a directory"); + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + error("walk: %r"); + } + nf = dfwalk(f, elems[i]); + rwunlock(f, Rd); + addelem(&p, nf); + mbput(nf); + f = nf; + USED(&f); /* in case of error() */ + noerror(); + } + noerror(); + return p; +} + +Path* +walkto(char *a, char **lastp) +{ + char *els[Npathels]; + int nels, n; + + n = strlen(a); + nels = gettokens(a, els, nelem(els), "/"); + if(nels < 1) + error("invalid path"); + if(lastp != nil){ + *lastp = a + n - strlen(els[nels-1]); + return walkpath(fs->root, els, nels-1); + }else + return walkpath(fs->root, els, nels); +} + +void +rwlock(Memblk *f, int iswr) +{ + xrwlock(f->mf, iswr); +} + +void +rwunlock(Memblk *f, int iswr) +{ + xrwunlock(f->mf, iswr); +} + +void +isfile(Memblk *f) +{ + if((f->d.mode&DMDIR) != 0) + assert(DBDIR(f)); + else + assert(!DBDIR(f)); + if(f->type != DBfile || f->mf == nil) + fatal("isfile: not a file at pc %#p", getcallerpc(&f)); +} + +void +isrwlocked(Memblk *f, int iswr) +{ + if(f->type != DBfile || f->mf == nil) + fatal("isrwlocked: not a file at pc %#p", getcallerpc(&f)); + if((iswr && canrlock(f->mf)) || (!iswr && canwlock(f->mf))) + fatal("is%clocked at pc %#p", iswr?'w':'r', getcallerpc(&f)); +} + +static void +isdir(Memblk *f) +{ + if((f->d.mode&DMDIR) != 0) + assert(DBDIR(f)); + else + assert(!DBDIR(f)); + if(f->type != DBfile || f->mf == nil) + fatal("isdir: not a file at pc %#p", getcallerpc(&f)); + if((f->d.mode&DMDIR) == 0) + fatal("isdir: not a dir at pc %#p", getcallerpc(&f)); +} + +/* for dfblk only */ +static Memblk* +getmelted(uint type, uint dbit, daddrt *addrp, int *chg) +{ + Memblk *b, *nb; + + *chg = 0; + if(*addrp == 0){ + b = dballocz(type, dbit, 1); + *addrp = b->addr; + *chg = 1; + return b; + } + + b = dbget(type, *addrp); + assert(DBDIR(b) == dbit); + nb = nil; + if(!b->frozen) + return b; + if(catcherror()){ + mbput(b); + mbput(nb); + error(nil); + } + nb = dbdup(b); + USED(&nb); /* for error() */ + *addrp = nb->addr; + *chg = 1; + dbput(b); + noerror(); + mbput(b); + return nb; +} + +/* + * Get a file data block, perhaps allocating it on demand + * if mkit. The file must be r/wlocked and melted if mkit. + * + * Adds disk refs for dir entries copied during melts and + * considers that /archive is always melted. + * + * Read-ahead is not considered here. The file only records + * the last accessed block number, to help the caller do RA. + * + */ +static Memblk* +dfblk(Memblk *f, ulong bno, int mkit) +{ + ulong prev, nblks; + int i, idx, nindir, type, chg; + Memblk *b, *pb; + daddrt *addrp; + + if(mkit) + ismelted(f); + + if(bno != f->mf->lastbno){ + f->mf->sequential = (!mkit && bno == f->mf->lastbno + 1); + f->mf->lastbno = bno; + } + + /* + * bno: block # relative to the the block we are looking at. + * prev: # of blocks before the current one. + */ + prev = 0; + chg = 0; + + /* + * Direct block? + */ + if(bno < nelem(f->d.dptr)){ + if(mkit) + b = getmelted(DBdata, DBDIR(f), &f->d.dptr[bno], &chg); + else + b = dbget(DBdata, f->d.dptr[bno]); + if(chg) + changed(f); + return b; + } + bno -= nelem(f->d.dptr); + prev += nelem(f->d.dptr); + + /* + * Indirect block + * nblks: # of data blocks addressed by the block we look at. + */ + nblks = Dptrperblk; + for(i = 0; i < nelem(f->d.iptr); i++){ + if(bno < nblks) + break; + bno -= nblks; + prev += nblks; + nblks *= Dptrperblk; + } + if(i == nelem(f->d.iptr)) + error("offset exceeds file capacity"); + ainc(&fs->nindirs[i]); + type = DBptr0+i; + dFprint("dfblk: indirect %s nblks %uld (ppb %ud) bno %uld\n", + tname(type), nblks, Dptrperblk, bno); + + addrp = &f->d.iptr[i]; + if(mkit) + b = getmelted(type, DBDIR(f), addrp, &chg); + else + b = dbget(type, *addrp); + if(chg) + changed(f); + pb = b; + if(catcherror()){ + mbput(pb); + error(nil); + } + + /* at the loop header: + * pb: parent of b + * b: DBptr block we are looking at. + * addrp: ptr to b within fb. + * nblks: # of data blocks addressed by b + */ + for(nindir = i+1; nindir >= 0; nindir--){ + chg = 0; + dFprint("indir %s d%#ullx nblks %uld ptrperblk %d bno %uld\n\n", + tname(DBdata+nindir), *addrp, nblks, Dptrperblk, bno); + idx = 0; + if(nindir > 0){ + nblks /= Dptrperblk; + idx = bno/nblks; + } + if(*addrp == 0 && !mkit){ + /* hole */ + warn("HOLE"); + b = nil; + }else{ + assert(type >= DBdata); + if(mkit) + b = getmelted(type, DBDIR(f), addrp, &chg); + else + b = dbget(type, *addrp); + if(chg) + changed(pb); + addrp = &b->d.ptr[idx]; + } + mbput(pb); + pb = b; + USED(&b); /* force to memory in case of error */ + USED(&pb); /* force to memory in case of error */ + bno -= idx * nblks; + prev += idx * nblks; + type--; + } + noerror(); + return b; +} + +/* + * Remove [bno:bend) file data blocks. + * The file must be r/wlocked and melted. + */ +void +dfdropblks(Memblk *f, ulong bno, ulong bend) +{ + Memblk *b; + + isrwlocked(f, Wr); + ismelted(f); + assert(!DBDIR(f)); + + dprint("dfdropblks: could remove d%#ullx[%uld:%uld]\n", + f->addr, bno, bend); + /* + * Instead of releasing the references on the data blocks, + * considering that the file might grow again, we keep them. + * Consider recompiling again and again and... + * + * The length has been adjusted and data won't be returned + * before overwritten. + * + * We only have to zero the data, because the file might + * grow using holes and the holes must read as zero, and also + * because directories assume all data blocks are initialized. + */ + for(; bno < bend; bno++){ + if(catcherror()) + continue; + b = dfblk(f, bno, Dontmk); + noerror(); + memset(b->d.data, 0, Dblkdatasz); + changed(b); + mbput(b); + } +} + +/* + * block # for the given offset (first block in file is 0). + * embedded data accounts also as block #0. + * If boffp is not nil it returns the offset within that block + * for the given offset. + */ +ulong +dfbno(Memblk *f, uvlong off, ulong *boffp) +{ + ulong doff, dlen; + + doff = embedattrsz(f); + dlen = Embedsz - doff; + if(off < dlen){ + *boffp = doff + off; + return 0; + } + off -= dlen; + if(boffp != nil) + *boffp = off%Dblkdatasz; + return off/Dblkdatasz; +} + +/* + * Return a block slice for data in f. + * The slice returned is resized to keep in a single block. + * If there's a hole in the file, Blksl.data == nil && Blksl.len > 0. + * + * If mkit, the data block (and any pointer block crossed) + * is allocated/melted if needed, but the file length is NOT updated. + * + * The file must be r/wlocked by the caller, and melted if mkit. + * The block is returned referenced but unlocked, + * (it's still protected by the file lock.) + */ +Blksl +dfslice(Memblk *f, ulong len, uvlong off, int iswr) +{ + Blksl sl; + ulong boff, doff, dlen, bno; + + memset(&sl, 0, sizeof sl); + + dFprint("slice m%#p[%#ullx:+%#ulx]%c...\n",f, off, len, iswr?'w':'r'); + if(iswr) + ismelted(f); + else + if(off >= f->d.length) + goto done; + + doff = embedattrsz(f); + dlen = Embedsz - doff; + + if(off < dlen){ + sl.b = f; + incref(f); + sl.data = f->d.embed + doff + off; + sl.len = dlen - off; + }else{ + bno = (off-dlen) / Dblkdatasz; + boff = (off-dlen) % Dblkdatasz; + sl.b = dfblk(f, bno, iswr); + if(iswr) + ismelted(sl.b); + if(sl.b != nil) + sl.data = sl.b->d.data + boff; + sl.len = Dblkdatasz - boff; + } + + if(sl.len > len) + sl.len = len; + if(off + sl.len > f->d.length) + if(!iswr) + sl.len = f->d.length - off; + /* else the file size will be updated by the caller */ +done: + if(sl.b == nil) + dFprint("slice m%#p[%#ullx:+%#ulx]%c -> 0[%#ulx]\n", + f, off, len, iswr?'w':'r', sl.len); + else + dFprint("slice m%#p[%#ullx:+%#ulx]%c -> m%#p:%#uld[%#ulx]\n", + f, off, len, iswr?'w':'r', + sl.b, (uchar*)sl.data - sl.b->d.data, sl.len); + assert(sl.b == nil || sl.b->ref > 1); + return sl; +} + + +uvlong +dfdirmap(Memblk *d, Dirf dirf, void *arg, int iswr) +{ + Blksl sl; + daddrt *de; + uvlong off; + int i; + + isdir(d); + assert(d->d.length/Daddrsz >= d->d.ndents); + if(iswr){ + isrwlocked(d, iswr); + ismelted(d); + } + off = 0; + for(;;){ + sl = dfslice(d, Dblkdatasz, off, iswr); + if(sl.len == 0) + break; + if(sl.b == nil) + continue; + if(catcherror()){ + mbput(sl.b); + error(nil); + } + de = sl.data; + for(i = 0; i < sl.len/Daddrsz; i++) + if(dirf(sl.b, &de[i], arg) < 0){ + noerror(); + mbput(sl.b); + return off + i*Daddrsz; + } + off += sl.len; + noerror(); + mbput(sl.b); + } + return Noaddr; +} + +static int +chdentryf(Memblk *b, daddrt *de, void *p) +{ + daddrt *addrs, addr, naddr; + + addrs = p; + addr = addrs[0]; + naddr = addrs[1]; + if(*de != addr) + return 0; /* continue searching */ + + if(naddr != addr){ + *de = naddr; + changed(b); + } + return -1; /* found: stop */ +} + +/* + * Find a dir entry for addr (perhaps 0 == avail) and change it to + * naddr. If iswr, the entry is allocated if needed and the blocks + * melted on demand. + * Return the offset for the entry in the file or Noaddr + * Does not adjust disk refs. + */ +uvlong +dfchdentry(Memblk *d, daddrt addr, daddrt naddr) +{ + uvlong off; + daddrt addrs[2] = {addr, naddr}; + + dNprint("dfchdentry d%#010ullx -> d%#010ullx\nin %H\n", addr, naddr, d); + off = dfdirmap(d, chdentryf, addrs, Wr); + if(addr == 0 && naddr != 0){ + if(d->d.length < off+Daddrsz) + d->d.length = off+Daddrsz; + d->d.ndents++; + changed(d); + }else if(addr != 0 && naddr == 0){ + d->d.ndents--; + changed(d); + } + return off; +} + +typedef +struct Walkarg +{ + char *name; + Memblk *f; +} Walkarg; + +static int +findname(Memblk*, daddrt *de, void *p) +{ + Walkarg *w; + + w = p; + if(*de == 0) + return 0; + + w->f = dbget(DBfile, *de); + if(strcmp(w->f->mf->name, w->name) != 0){ + mbput(w->f); + return 0; + } + + /* found */ + dprint("dfwalk '%s' -> %H\n", w->name, w->f); + return -1; +} + +/* + * Walk to a child and return it referenced. + */ +Memblk* +dfwalk(Memblk *d, char *name) +{ + Walkarg w; + + if(strcmp(name, "..") == 0) + fatal("dfwalk: '..'"); + w.name = name; + w.f = nil; + if(dfdirmap(d, findname, &w, Rd) == Noaddr) + error("file not found"); + return w.f; +} + +/* + * Return the last version for *fp, rwlocked, be it frozen or melted. + */ +void +dflast(Memblk **fp, int iswr) +{ + Memblk *f; + + f = *fp; + isfile(f); + rwlock(f, iswr); + while(f->mf->melted != nil){ + incref(f->mf->melted); + *fp = f->mf->melted; + rwunlock(f, iswr); + mbput(f); + f = *fp; + rwlock(f, iswr); + if(!f->frozen) + return; + } +} + +/* + * Return *fp melted, by melting it if needed, and wlocked. + * The reference from the (already melted) parent is adjusted, + * as are the memory and disk references for the old file *fp. + * + * The parent is wlocked by the caller and unlocked upon return. + */ +Memblk* +dfmelt(Memblk *parent, Memblk **fp) +{ + Memblk *of, *nf; + + ismelted(parent); + isrwlocked(parent, Wr); + dflast(fp, Wr); + of = *fp; + if(of->frozen == 0){ + rwunlock(parent, Wr); + return of; + } + if(catcherror()){ + rwunlock(of, Wr); + rwunlock(parent, Wr); + error(nil); + } + nf = dbdup(of); + noerror(); + + rwlock(nf, Wr); + rwunlock(of, Wr); + if(catcherror()){ + rwunlock(nf, Wr); + mbput(nf); + error(nil); + } + dfchdentry(parent, of->addr, nf->addr); + dbput(of); + mbput(of); + *fp = nf; + noerror(); + rwunlock(parent, Wr); + return nf; +} + +void +dfused(Path *p) +{ + Memblk *f; + + f = p->f[p->nf-1]; + isfile(f); + rwlock(f, Wr); + f->d.atime = fstime(0); + rwunlock(f, Wr); +} + +/* + * Report that a file has been modified. + * Modification times propagate up to the root of the file tree. + * But frozen files are never changed. + */ +void +dfchanged(Path *p, int muid) +{ + Memblk *f; + u64int t; + int i; + + t = fstime(0); + for(i = 0; i < p->nf; i++){ + f = p->f[i]; + rwlock(f, Wr); + if(f->frozen == 0) + if(!catcherror()){ + f->d.mtime = t; + f->d.atime = t; + f->d.muid = muid; + changed(f); + noerror(); + } + rwunlock(f, Wr); + } +} + +/* + * May be called with null parent, for root and ctl files. + * The first call with a null parent is root, all others are ctl + * files linked at root. + */ +Memblk* +dfcreate(Memblk *parent, char *name, int uid, ulong mode) +{ + Memblk *nf; + Mfile *m; + int isctl; + + if(fsfull()) + error("file system full"); + isctl = parent == nil; + if(parent == nil) + parent = fs->root; + + if(parent != nil){ + dprint("dfcreate '%s' %M at\n%H\n", name, mode, parent); + isdir(parent); + isrwlocked(parent, Wr); + ismelted(parent); + }else + dprint("dfcreate '%s' %M", name, mode); + + if(isctl) + nf = dballocz(DBctl, (mode&DMDIR)?DFdir:0, 1); + else + nf = dballocz(DBfile, (mode&DMDIR)?DFdir:0, 1); + + if(catcherror()){ + mbput(nf); + if(parent != nil) + rwunlock(parent, Wr); + error(nil); + } + + m = nf->mf; + nf->d.id = fstime(0); + nf->d.mode = mode; + nf->d.mtime = nf->d.id; + nf->d.atime = nf->d.id; + nf->d.length = 0; + m->uid = usrname(uid); + nf->d.uid = uid; + m->gid = m->uid; + nf->d.gid = nf->d.uid; + m->muid = m->uid; + nf->d.muid = nf->d.uid; + m->name = name; + nf->d.asize = pmeta(nf->d.embed, Embedsz, nf); + changed(nf); + + if(parent != nil){ + m->gid = parent->mf->gid; + nf->d.gid = parent->d.gid; + dfchdentry(parent, 0, nf->addr); + } + noerror(); + dprint("dfcreate-> %H\n within %H\n", nf, parent); + return nf; +} + +void +dfremove(Memblk *p, Memblk *f) +{ + vlong n; + + /* funny as it seems, we may need extra blocks to melt */ + if(fsfull()) + error("file system full"); + + isrwlocked(f, Wr); + isrwlocked(p, Wr); + ismelted(p); + if(DBDIR(f) != 0 && f->d.ndents > 0) + error("directory not empty"); + incref(p); + if(catcherror()){ + mbput(p); + error(nil); + } + dfchdentry(p, f->addr, 0); + /* shouldn't fail now. it's unlinked */ + + if(p->d.ndents == 0 && p->d.length > 0){ /* all gone, make it public */ + p->d.length = 0; + changed(p); + } + + noerror(); + rwunlock(f, Wr); + if(!catcherror()){ + n = dbput(f); + noerror(); + } + mbput(f); + mbput(p); +} + +/* + * It's ok if a is nil, for reading ahead. + */ +ulong +dfpread(Memblk *f, void *a, ulong count, uvlong off) +{ + Blksl sl; + ulong tot; + char *p; + + p = a; + isrwlocked(f, Rd); + for(tot = 0; tot < count; tot += sl.len){ + sl = dfslice(f, count-tot, off+tot, Rd); + if(sl.len == 0){ + assert(sl.b == nil); + break; + } + if(sl.data == nil){ + memset(p+tot, 0, sl.len); + assert(sl.b == nil); + continue; + } + if(p != nil) + memmove(p+tot, sl.data, sl.len); + mbput(sl.b); + } + return tot; +} + +ulong +dfpwrite(Memblk *f, void *a, ulong count, uvlong *off) +{ + Blksl sl; + ulong tot; + char *p; + + if(fsfull()) + error("file system full"); + + isrwlocked(f, Wr); + ismelted(f); + p = a; + if(f->d.mode&DMAPPEND) + *off = f->d.length; + for(tot = 0; tot < count;){ + sl = dfslice(f, count-tot, *off+tot, Wr); + if(sl.len == 0 || sl.data == nil) + fatal("dfpwrite: bug"); + memmove(sl.data, p+tot, sl.len); + changed(sl.b); + mbput(sl.b); + tot += sl.len; + if(*off+tot > f->d.length){ + f->d.length = *off+tot; + changed(f); + } + } + return tot; +} + +int +ptrmap(daddrt addr, int nind, Blkf f, void *a, int isdisk) +{ + int i; + Memblk *b; + long tot; + + if(addr == 0) + return 0; + if(isdisk) + b = dbget(DBdata+nind, addr); + else{ + b = mbget(DBdata+nind, addr, Dontmk); + if(b == nil) + return 0; /* on disk */ + } + if(catcherror()){ + mbput(b); + error(nil); + } + tot = 0; + if(f == nil || f(b, a) == 0){ + tot++; + if(nind > 0){ + for(i = 0; i < Dptrperblk; i++) + tot += ptrmap(b->d.ptr[i], nind-1, f, a, isdisk); + } + } + noerror(); + mbput(b); + return tot; +} + +static int +dumpf(Memblk*, daddrt *de, void *p) +{ + int isdisk; + Memblk *f; + + if(*de == 0) + return 0; + + isdisk = *(int*)p; + if(isdisk) + f = dbget(DBfile, *de); + else + f = mbget(DBfile, *de, Dontmk); + if(f != nil){ + if(catcherror()){ + mbput(f); + error(nil); + } + dfdump(f, isdisk); + noerror(); + mbput(f); + } + return 0; +} + +void +dfdump(Memblk *f, int isdisk) +{ + int i; + extern int mbtab; + + isfile(f); + /* visit the blocks to fetch them if needed, so %H prints them. */ + for(i = 0; i < nelem(f->d.dptr); i++) + ptrmap(f->d.dptr[i], 0, nil, nil, isdisk); + for(i = 0; i < nelem(f->d.iptr); i++) + ptrmap(f->d.iptr[i], i+1, nil, nil, isdisk); + fprint(2, "%H\n", f); + if(DBDIR(f) != 0){ + mbtab++; + if(!catcherror()){ + dfdirmap(f, dumpf, &isdisk, Rd); + noerror(); + } + mbtab--; + } +} + +static void +freezeaddr(daddrt addr) +{ + Memblk *f; + + if(addr == 0) + return; + f = mbget(DBfile, addr, Dontmk); + if(f == nil) /* must be frozen */ + return; + if(catcherror()){ + mbput(f); + error(nil); + } + dffreeze(f); + noerror(); + mbput(f); + +} + +static int +bfreeze(Memblk *b, void*) +{ + int i; + + if(b->frozen) + return -1; + b->frozen = 1; + if(b->type == DBdata && DBDIR(b)) + for(i = 0; i < Dblkdatasz/Daddrsz; i++) + freezeaddr(b->d.ptr[i]); + return 0; +} + +long +dffreeze(Memblk *f) +{ + int i; + long tot; + ulong doff; + + isfile(f); + if(f->frozen) + return 0; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + f->frozen = 1; + tot = 1; + if(DBDIR(f)) + for(doff = embedattrsz(f); doff < Embedsz; doff += Daddrsz) + freezeaddr(*(daddrt*)(f->d.embed+doff)); + for(i = 0; i < nelem(f->d.dptr); i++) + tot += ptrmap(f->d.dptr[i], 0, bfreeze, nil, Mem); + for(i = 0; i < nelem(f->d.iptr); i++) + tot += ptrmap(f->d.iptr[i], i+1, bfreeze, nil, Mem); + noerror(); + rwunlock(f, Wr); + return tot; +} + + +/* + * Caller walked down p, and now requires the nth element to be + * melted, and wlocked for writing. (nth count starts at 1); + * + * Return the path with the version of f that we must use, + * locked for writing and melted. + * References kept in the path are traded for the ones returned. + * + * Calls from user requests wait until /archive is melted. + * Calls from fsfreeze(), fsreclaim(), etc. melt /archive. + */ +Path* +meltedpath(Path **pp, int nth, int user) +{ + int i; + Memblk *f, **fp; + Path *p; + + ownpath(pp); + p = *pp; + assert(nth >= 1 && p->nf >= nth && p->nf >= 2); + assert(p->f[0] == fs->root); + fp = &p->f[nth-1]; + + /* + * 1. Optimistic: Try to get a loaded melted version for f. + */ + dflast(fp, Wr); + f = *fp; + if(!f->frozen) + return p; + ainc(&fs->nmelts); + rwunlock(f, Wr); + + /* + * 2. Realistic: + * walk down the path, melting every frozen thing until we + * reach f. Keep wlocks so melted files are not frozen while we walk. + * /active is special, because it's only frozen temporarily while + * creating a frozen version of the tree. Instead of melting it, + * we should just wait for it if the call is from a user RPC. + * p[0] is / + * p[1] is /active or /archive + */ + if(!user){ + rwlock(p->f[0], Wr); + i = 1; + }else{ + for(;;){ + dflast(&p->f[1], Wr); + if(p->f[1]->frozen == 0) + break; + rwunlock(p->f[1], Wr); + yield(); + } + i = 2; + } + for(; i < nth; i++) + dfmelt(p->f[i-1], &p->f[i]); + return p; +} + +/* + * Advance path to use the most recent version of each file. + */ +Path* +lastpath(Path **pp, int nth) +{ + Memblk *f; + Path *p; + int i; + + p = *pp; + for(i = 0; i < nth; i++){ + f = p->f[i]; + if(f != nil && f->mf != nil && f->mf->melted != nil) + break; + } + if(i == nth) + return p; /* all files have the last version */ + + ownpath(pp); + p = *pp; + for(i = 0; i < nth; i++){ + dflast(&p->f[i], Rd); + rwunlock(p->f[i], Rd); + } + return p; +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,28 @@ +#!/bin/rc +# fault injection check +if( ~ $#* 0 1 2){ + echo usage: $0 'r|w' n cmd... >[1=2] + exit usage +} + +fn testfault { + what=$1; shift + n=$1 ; shift + broke|rc + echo ; echo '**********' testing with $what^err '=' $n ':' $* + 8.fsfmt -y && 8.fscmd dbg!WRD $what^err!$n $* + if(ps|grep 8.fscmd>/dev/null) + tstack 8.fscmd + echo ; echo '**********' checking after $what^err '=' $n ':' $* + 8.fscmd check +} +what=w +if( ~ $1 r) + what=r +max=$2 +shift +shift +8.fsfmt && 8.fscmd dumpall +for(n in `{seq $max}){ + testfault $what $n $* +} --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,1217 @@ +#include "all.h" + +static RWLock fidhashlk; +static Fid *fidshd, *fidstl; +static Fid *fidhash[Fidhashsz]; +static uint fidgen; + +int noauth; + +Alloc fidalloc = +{ + .elsz = sizeof(Fid), + .zeroing = 1, +}; +Alloc rpcalloc = +{ + .elsz = sizeof(Largerpc), + .zeroing = 0, +}; + +Alloc clialloc = +{ + .elsz = sizeof(Cli), + .zeroing = 1, +}; + +static QLock clientslk; +static Cli *clients; + +static void +fidlink(Fid *fid) +{ + fid->next = fidshd; + fid->prev = nil; + if(fidshd != nil) + fidshd->prev = fid; + else + fidstl = fid; + fidshd = fid; +} + +static void +fidunlink(Fid *fid) +{ + if(fid->prev != nil) + fid->prev->next = fid->next; + else + fidshd = fid->next; + if(fid->next != nil) + fid->next->prev = fid->prev; + else + fidstl = fid->prev; + fid->next = nil; + fid->prev = nil; +} + +int +fidfmt(Fmt *fmt) +{ + Fid *fid; + Path *p; + int i; + + fid = va_arg(fmt->args, Fid*); + if(fid == nil) + return fmtprint(fmt, ""); + fmtprint(fmt, "fid %#p no %d r%d, omode %d arch %d", + fid, fid->no, fid->ref, fid->omode, fid->archived); + p = fid->p; + if(p == nil) + return 0; + fmtprint(fmt, " path"); + for(i = 0; i < p->nf; i++) + fmtprint(fmt, " d%#ullx", p->f[i]->addr); + return fmtprint(fmt, "\n=>%H", p->f[p->nf-1]); +} + +void +dumpfids(void) +{ + Fid *fid; + int n; + + xrwlock(&fidhashlk, Rd); + fprint(2, "fids:\n"); + n = 0; + for(fid = fidshd; fid != nil; fid = fid->next) + fprint(2, "[%d] = %X\n", n++, fid); + xrwunlock(&fidhashlk, Rd); +} + +/* + * Similar to lastpath(), but does not need to lock anything, + * because the fs is quiescent + */ +static int +meltpath(Path *p) +{ + int i, n; + Memblk *f; + + n = 0; + for(i = 0; i < p->nf; i++) + while((f = p->f[i]->mf->melted) != nil){ + n++; + incref(f); + mbput(p->f[i]); + p->f[i] = f; + } + return n; +} + +void +meltfids(void) +{ + Fid *fid; + int n; + + xrwlock(&fidhashlk, Rd); + n = 0; + for(fid = fidshd; fid != nil; fid = fid->next) + if(canqlock(fid)){ + if(!fid->archived && fid->p != nil) + n += meltpath(fid->p); + qunlock(fid); + }else + warn("meltfids: couldn't lock"); + xrwunlock(&fidhashlk, Rd); + dprint("meltfids: %d fids advanced\n", n); +} + +void +countfidrefs(void) +{ + Fid *fid; + Path *p; + int i; + + xrwlock(&fidhashlk, Rd); + for(fid = fidshd; fid != nil; fid = fid->next){ + p = fid->p; + for(i = 0; i < p->nf; i++) + mbcountref(p->f[i]); + } + xrwunlock(&fidhashlk, Rd); +} + +Rpc* +newrpc(void) +{ + Rpc *rpc; + + rpc = anew(&rpcalloc); + rpc->next = nil; + rpc->cli = nil; + rpc->fid = nil; + rpc->flushed = 0; + rpc->closed = 0; + rpc->chan = ~0; + rpc->rpc0 = nil; + /* ouch! union. */ + if(sizeof(Fcall) > sizeof(IXcall)){ + memset(&rpc->t, 0, sizeof rpc->t); + memset(&rpc->r, 0, sizeof rpc->r); + }else{ + memset(&rpc->xt, 0, sizeof rpc->xt); + memset(&rpc->xr, 0, sizeof rpc->xr); + } + return rpc; +} + +void +freerpc(Rpc *rpc) +{ + afree(&rpcalloc, rpc); +} + +Fid* +newfid(Cli* cli, int no) +{ + Fid *fid, **fidp; + + xrwlock(&fidhashlk, Wr); + if(catcherror()){ + xrwunlock(&fidhashlk, Wr); + error(nil); + } + if(no < 0) + no = fidgen++; + for(fidp = &fidhash[no%Fidhashsz]; *fidp != nil; fidp = &(*fidp)->hnext) + if((*fidp)->cli == cli && (*fidp)->no == no) + error("fid in use"); + fid = anew(&fidalloc); + *fidp = fid; + fid->hnext = nil; + fid->omode = -1; + fid->no = no; + fid->cli = cli; + fid->ref = 2; /* one for the caller; another because it's kept */ + fidlink(fid); + noerror(); + xrwunlock(&fidhashlk, Wr); + dEprint("new fid %X\n", fid); + return fid; +} + +Fid* +getfid(Cli* cli, int no) +{ + Fid *fid; + + xrwlock(&fidhashlk, Rd); + if(catcherror()){ + xrwunlock(&fidhashlk, Rd); + error(nil); + } + for(fid = fidhash[no%Fidhashsz]; fid != nil; fid = fid->hnext) + if(fid->cli == cli && fid->no == no){ + incref(fid); + noerror(); + dEprint("getfid %d -> %X\n", no, fid); + xrwunlock(&fidhashlk, Rd); + return fid; + } + error("fid not found"); + return fid; +} + +void +putfid(Fid *fid) +{ + Fid **fidp; + + if(fid == nil || decref(fid) > 0) + return; + d9print("clunk %X\n", fid); + putpath(fid->p); + fid->p = nil; + xrwlock(&fidhashlk, Wr); + if(catcherror()){ + xrwunlock(&fidhashlk, Wr); + warn("putfid: %r"); + error(nil); + } + for(fidp = &fidhash[fid->no%Fidhashsz]; *fidp != nil; fidp = &(*fidp)->hnext) + if(*fidp == fid){ + *fidp = fid->hnext; + fidunlink(fid); + noerror(); + xrwunlock(&fidhashlk, Wr); + afree(&fidalloc, fid); + return; + } + fatal("putfid: fid not found"); +} + +/* keeps addr, does not copy it */ +static Cli* +newcli(char *addr, int fd, int cfd) +{ + Cli *cli; + + cli = anew(&clialloc); + cli->fd = fd; + cli->cfd = cfd; + cli->addr = addr; + cli->ref = 1; + cli->uid = -1; + + xqlock(&clientslk); + cli->next = clients; + clients = cli; + xqunlock(&clientslk); + return cli; +} + +void +putcli(Cli *cli) +{ + Cli **cp; + + if(decref(cli) == 0){ + xqlock(&clientslk); + for(cp = &clients; *cp != nil; cp = &(*cp)->next) + if(*cp == cli) + break; + if(*cp == nil) + fatal("client not found"); + *cp = cli->next; + xqunlock(&clientslk); + close(cli->fd); + close(cli->cfd); + free(cli->addr); + afree(&clialloc, cli); + } +} + +void +consprintclients(void) +{ + Cli *c; + + xqlock(&clientslk); + for(c = clients; c != nil; c = c->next) + consprint("%s!%s\n", c->addr, usrname(c->uid)); + xqunlock(&clientslk); +} + +void +setfiduid(Fid *fid, char *uname) +{ + if(uname[0] == 0) + error("null uid"); + + fid->uid = usrid(uname); + + /* + * The owner of the process must be able to attach, if only to + * define users and administer the file system. + */ + if(fid->uid < 0 && strcmp(uname, getuser()) == 0){ + warn("no owner uid '%s'. attaching as 'elf'", uname); + fid->uid = usrid("elf"); + } + + if(fid->uid < 0){ + warn("unknown user '%s'. using 'none'", uname); + fid->uid = usrid("none"); + } + + if(fs->worm && strcmp(uname, getuser()) != 0) + error("user '%s' can't access rip main", uname); +} + +void +fidattach(Fid *fid, char *aname, char *uname) +{ + Path *p; + + setfiduid(fid, uname); + p = newpath(fs->root); + fid->p = p; + + if(fs->worm){ + if(strcmp(aname, "main") == 0){ + aname = ""; + }else if(strcmp(aname, "archive") != 0) + error("unknown tree '%s'", aname); + }else if(strcmp(aname, "main") == 0) + aname = ""; + else if(strncmp(aname, "main/", 5) == 0) + aname += 5; + + if(aname[0] != 0) + if(strcmp(aname, "active") == 0){ + addelem(&p, fs->active); + }else if(strcmp(aname, "archive") == 0){ + addelem(&p, fs->archive); + fid->archived = 1; + }else + error("unknown tree '%s'", aname); + p->nroot = p->nf; +} + +Fid* +fidclone(Cli *cli, Fid *fid, int no) +{ + Fid *nfid; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + nfid = newfid(cli, no); + nfid->p = clonepath(fid->p); + nfid->uid = fid->uid; + nfid->archived = fid->archived; + nfid->consopen = fid->consopen; + nfid->buf = fid->buf; + noerror(); + xqunlock(fid); + return nfid; +} + +static Memblk* +pickarch(Memblk *d, uvlong t) +{ + Blksl sl; + daddrt *de; + uvlong off; + int i; + uvlong cmtime; + daddrt cdaddr; + Memblk *f; + + off = 0; + cmtime = 0; + cdaddr = 0; + for(;;){ + sl = dfslice(d, Dblkdatasz, off, Rd); + if(sl.len == 0){ + assert(sl.b == nil); + break; + } + if(sl.b == nil) + continue; + if(catcherror()){ + mbput(sl.b); + error(nil); + } + for(i = 0; i < sl.len/Daddrsz; i++){ + de = sl.data; + de += i; + if(*de == 0) + continue; + f = dbget(DBfile, *de); + if(f->d.mtime > cmtime && f->d.mtime < t){ + cmtime = f->d.mtime; + cdaddr = *de; + } + mbput(f); + } + noerror(); + mbput(sl.b); + off += sl.len; + } + if(cdaddr == 0) + error("file not found"); + return dbget(DBfile, cdaddr); +} + +static int +digs(char **sp, int n) +{ + char b[8]; + char *s; + + s = *sp; + if(strlen(s) < n) + return 0; + assert(n < sizeof b - 1); + strecpy(b, b+n+1, *sp); + *sp += n; + return strtoul(b, nil, 10); +} + +/* + * convert symbolic time into a valid /archive wname. + * yyyymmddhhmm, yyyymmdd, mmdd, or hh:mm + */ +static Memblk* +archwalk(Memblk *f, char *wname) +{ + char *s; + Tm *tm; + uvlong t; + static QLock tmlk; + int wl; + + s = wname; + qlock(&tmlk); /* localtime is not reentrant! */ + tm = localtime(time(nil)); + wl = strlen(wname); + switch(wl){ + case 12: /* yyyymmddhhmm */ + case 8: /* yyyymmdd */ + tm->year = digs(&s, 4) - 1900; + case 4: /* mmdd */ + tm->mon = digs(&s, 2) - 1; + tm->mday = digs(&s, 2); + if(wl == 8) + break; + /* else fall */ + case 5: /* hh:mm */ + tm->hour = digs(&s, 2); + if(wl == 5 && s[0] != 0) + s++; + tm->min = digs(&s, 2); + break; + default: + qunlock(&tmlk); + error("file not found"); + } + dprint("archwalk to %d/%d/%d %d:%d:%d\n", + tm->year, tm->mday, tm->mon, tm->hour, tm->min, tm->sec); + t = tm2sec(tm); + t *= NSPERSEC; + qunlock(&tmlk); + return pickarch(f, t); +} + + +/* + * We are at /active/... or /archive/x/... + * Walk to /archive/T/... and return it so it's added to the + * path by the caller. + */ +static Memblk* +timewalk(Path *p, char *wname, int uid) +{ + Memblk *f, *nf, *pf, *arch, *af; + int i, isarch; + + if(p->nf < 2 || (p->f[1] == fs->archive && p->nf < 3)) + error("file not found"); + assert(p->f[0] == fs->root); + isarch = p->f[1] == fs->archive; + assert(p->f[1] == fs->active || isarch); + + arch = fs->archive; + rwlock(arch, Rd); + if(catcherror()){ + rwunlock(arch, Rd); + error(nil); + } + af = archwalk(arch, wname); + noerror(); + rwunlock(arch, Rd); + if(catcherror()){ + mbput(af); + error(nil); + } + i = 2; + if(isarch) + i++; + f = af; + incref(f); + nf = nil; + for(; i < p->nf; i++){ + pf = p->f[i]; + rwlock(f, Rd); + rwlock(pf, Rd); + if(catcherror()){ + rwunlock(pf, Rd); + rwunlock(f, Rd); + mbput(f); + error(nil); + } + dfaccessok(f, uid, AEXEC); + nf = dfwalk(f, pf->mf->name); + noerror(); + rwunlock(pf, Rd); + rwunlock(f, Rd); + mbput(f); + f = nf; + } + noerror(); + mbput(af); + USED(f); + USED(nf); + + if((f->d.mode&DMDIR) == 0){ /* it was not a dir at that time! */ + mbput(f); + error("file not found"); + } + return f; +} + +/* + * For walks into /archive, wname may be any time value (ns) or + * yyyymmddhhmm, yyyymmdd, mmdd, or hh:mm, + * and the walk proceeds to the archived file + * with the bigger mtime not greater than the specified time. + * If there's no such time, walk reports a file not found error. + * + * walks using @ lead to the corresponding dir in the archive. + */ +void +fidwalk(Fid *fid, char *wname) +{ + Path *p; + Memblk *f, *nf; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + p = lastpath(&fid->p, fid->p->nf); + if(strcmp(wname, ".") == 0) + goto done; + if(strcmp(wname, "..") == 0){ + if(p->nf > p->nroot) + p = dropelem(&fid->p); + goto done; + } + f = p->f[p->nf-1]; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + dfaccessok(f, fid->uid, AEXEC); + + nf = nil; + if(catcherror()){ + if(f == fs->archive) + nf = archwalk(f, wname); + else if(wname[0] == '@' && f != fs->cons && f != fs->stats) + nf = timewalk(p, wname+1, fid->uid); + else + error(nil); + fid->archived = 1; /* BUG: clear archived on .. */ + }else{ + nf = dfwalk(f, wname); + noerror(); + } + + rwunlock(f, Rd); + noerror(); + p = addelem(&fid->p, nf); + decref(nf); +done: + f = p->f[p->nf-1]; + if(isro(f)) + fid->archived = f != fs->cons && f != fs->stats; + else if(f == fs->active) + fid->archived = 0; + dfused(p); + noerror(); + xqunlock(fid); +} + +void +wstatint(Memblk *f, char *name, u64int v) +{ + char buf[20]; + + seprint(buf, buf+sizeof buf, "%#018ullx", v); + dfwattr(f, name, buf); +} + +void +fidopen(Fid *fid, int mode) +{ + int fmode, amode; + Memblk *f; + Path *p; + + if(fid->omode != -1) + error("fid already open"); + + /* check this before we try to melt it */ + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + p = fid->p; + f = p->f[p->nf-1]; + if(mode != OREAD){ + if(f == fs->root || f == fs->archive || fid->archived) + error("can't write archived or built-in files"); + if(fs->mode == Rd) + error("read only file system"); + if(writedenied(fid->uid)) + error("user can't write"); + } + amode = 0; + if((mode&3) != OREAD || (mode&OTRUNC) != 0) + amode |= AWRITE; + if((mode&3) != OWRITE) + amode |= AREAD; + if(amode != AREAD) + if(f == fs->cons) + rwlock(f, Wr); + else{ + p = meltedpath(&fid->p, fid->p->nf, 1); + f = p->f[p->nf-1]; + } + else{ + p = lastpath(&fid->p, fid->p->nf); + rwlock(f, Rd); + } + if(catcherror()){ + rwunlock(f, (amode!=AREAD)?Wr:Rd); + error(nil); + } + fmode = f->d.mode; + if(mode != OREAD){ + if(f != fs->root && p->f[p->nf-2]->d.mode&DMAPPEND) + error("directory is append only"); + if((fmode&DMDIR) != 0) + error("wrong open mode for a directory"); + } + dfaccessok(f, fid->uid, amode); + if(mode&ORCLOSE){ + if(fid->archived || isro(f)) + error("can't remove an archived or built-in file"); + if(f->d.mode&DMUSERS) + error("can't remove /users"); + dfaccessok(p->f[p->nf-2], fid->uid, AWRITE); + fid->rclose++; + } + if((fmode&DMEXCL) != 0 && f->mf->open) + if(f != fs->cons || amode != AWRITE) /* ok to write cons */ + error("exclusive use file already open"); + if((mode&OTRUNC) != 0&& f != fs->cons && f != fs->stats){ + wstatint(f, "length", 0); + if(f->d.mode&DMUSERS){ + f->d.mode = 0664|DMUSERS; + f->d.uid = usrid("adm"); + f->d.gid = usrid("adm"); + f->mf->uid = "adm"; + f->mf->gid = "adm"; + changed(f); + } + } + if(f == fs->stats) + fid->buf = updatestats(mode&OTRUNC, 1); + f->mf->open++; + fid->omode = mode&3; + fid->loff = 0; + fid->lidx = 0; + fid->consopen = f == fs->cons; + noerror(); + rwunlock(f, (amode!=AREAD)?Wr:Rd); + if(mode&OTRUNC) + dfchanged(p, fid->uid); + else + dfused(p); + noerror(); + xqunlock(fid); +} + +void +fidcreate(Fid *fid, char *name, int mode, ulong perm) +{ + Path *p; + Memblk *f, *nf; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + if(fsdiskfree() < Dzerofree) + error("disk full"); + if(fid->omode != -1) + error("fid already open"); + if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) + error("that file name scares me"); + if(utfrune(name, '/') != nil) + error("that file name is too creepy"); + if((perm&DMDIR) != 0 && mode != OREAD) + error("wrong open mode for a directory"); + if(fs->mode == Rd) + error("read only file system"); + if(writedenied(fid->uid)) + error("user can't write"); + if(fid->archived) + error("file is archived or builtin"); + if((perm&DMBITS) != perm) + error("unknown bit set in perm %M %#ulx", perm, perm); + p = fid->p; + f = p->f[p->nf-1]; + if(mode&ORCLOSE) + if(f->d.mode&DMUSERS) + error("can't remove the users file"); + if((f->d.mode&DMDIR) == 0) + error("not a directory"); + p = meltedpath(&fid->p, fid->p->nf, 1); + f = p->f[p->nf-1]; + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + dfaccessok(f, fid->uid, AWRITE); + if(!catcherror()){ + mbput(dfwalk(f, name)); + error("file already exists"); + } + + + nf = dfcreate(f, name, fid->uid, perm); + p = addelem(&fid->p, nf); + if(f == fs->active && strcmp(name, "users") == 0){ + nf->d.mode = 0664|DMUSERS; + nf->d.uid = usrid("adm"); + nf->d.gid = usrid("adm"); + nf->mf->uid = "adm"; + nf->mf->gid = "adm"; + changed(nf); + } + decref(nf); + nf->mf->open++; + noerror(); + rwunlock(f, Wr); + fid->omode = mode&3; + fid->loff = 0; + fid->lidx = 0; + if(mode&ORCLOSE) + fid->rclose++; + dfchanged(p, fid->uid); + noerror(); + xqunlock(fid); +} + +typedef struct Rarg +{ + Fid *fid; + int n; + uchar *data; + ulong ndata; + Packmeta pack; + ulong tot; + long ndents; +}Rarg; + +static int +readdirf(Memblk*, daddrt *de, void *a) +{ + Rarg *ra; + Memblk *f; + ulong nr; + + ra = a; + if(ra->tot+2 >= ra->ndata || ra->fid->lidx >= ra->ndents) + return -1; + if(*de == 0) + return 0; + if(ra->n > 0){ + ra->n--; + return 0; + } + f = dbget(DBfile, *de); + if(catcherror()){ + mbput(f); + error(nil); + } + nr = ra->pack(f, ra->data+ra->tot, ra->ndata-ra->tot); + noerror(); + mbput(f); + if(nr <= 2) + return -1; + ra->tot += nr; + ra->fid->lidx++; + return 0; +} + +static ulong +readdir(Fid *fid, uchar *data, ulong ndata, uvlong, Packmeta pack) +{ + Memblk *d; + Rarg ra = {fid, fid->lidx, data, ndata, pack, 0, 0}; + + d = fid->p->f[fid->p->nf-1]; + ra.ndents = d->d.ndents; + dfdirmap(d, readdirf, &ra, Rd); + return ra.tot; +} + +static long +strread(char *s, void *data, ulong count, vlong offset) +{ + long n; + + n = strlen(s); + if(offset >= n) + return 0; + s += offset; + n -= offset; + if(count > n) + count = n; + if(count > 0) + memmove(data, s, count); + return count; +} + +long +fidread(Fid *fid, void *data, ulong count, vlong offset, Packmeta pack) +{ + Memblk *f; + Path *p; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + if(fid->omode == -1) + error("fid not open"); + if(fid->omode == OWRITE) + error("fid not open for reading"); + if(offset < 0) + error("negative offset"); + p = lastpath(&fid->p, fid->p->nf); + f = p->f[p->nf-1]; + if(f == fs->cons){ + noerror(); + xqunlock(fid); + return consread(data, count); + } + if(f == fs->stats){ + noerror(); + assert(fid->buf != nil); + count = strread(fid->buf, data, count, offset); + xqunlock(fid); + return count; + } + rwlock(f, Rd); + noerror(); + xqunlock(fid); + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + if(f->d.mode&DMDIR){ + if(fid->loff != offset) + error("non-sequential dir read not supported"); + count = readdir(fid, data, count, offset, pack); + fid->loff += count; + }else + count = dfpread(f, data, count, offset); + noerror(); + rwunlock(f, Rd); + dfused(p); + return count; +} + +long +fidwrite(Fid *fid, void *data, ulong count, uvlong *offset) +{ + Memblk *f; + Path *p; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + if(fs->mode == Rd) + error("read only file system"); + if(writedenied(fid->uid)) + error("user can't write"); + if(fsdiskfree() < Dzerofree) + error("disk full"); + if(fid->omode == -1) + error("fid not open"); + if(fid->omode == OREAD) + error("fid not open for writing"); + p = fid->p; + f = p->f[p->nf-1]; + if(f == fs->cons){ + xqunlock(fid); + noerror(); + return conswrite(data, count); + } + if(f == fs->stats){ + xqunlock(fid); + noerror(); + return count; + } + + p = meltedpath(&fid->p, fid->p->nf, 1); + f = p->f[p->nf-1]; + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + count = dfpwrite(f, data, count, offset); + noerror(); + rwunlock(f, Wr); + + dfchanged(p, fid->uid); + noerror(); + xqunlock(fid); + + return count; +} + +void +fidclose(Fid *fid) +{ + Memblk *f, *fp; + Path *p; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + p = fid->p; + f = p->f[p->nf-1]; + rwlock(f, Wr); + f->mf->open--; + if((f->d.mode&DMUSERS) && (fid->omode&3) != OREAD) + rwusers(f); + rwunlock(f, Wr); + fid->omode = -1; + if(fid->rclose){ + lastpath(&fid->p, fid->p->nf); + p = meltedpath(&fid->p, fid->p->nf-1, 1); + fp = p->f[p->nf-2]; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + mbput(f); + }else{ + dfremove(fp, f); + fid->p->nf--; + noerror(); + } + rwunlock(fp, Wr); + dfchanged(p, fid->uid); + } + putpath(fid->p); + fid->p = nil; + fid->consopen = 0; + noerror(); + xqunlock(fid); +} + +void +fidremove(Fid *fid) +{ + Memblk *f, *fp; + Path *p; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + if(fs->mode == Rd) + error("read only file system"); + if(writedenied(fid->uid)) + error("user can't write"); + p = fid->p; + f = p->f[p->nf-1]; + if(fid->archived || isro(f)) + error("can't remove archived or built-in files"); + lastpath(&fid->p, fid->p->nf); + p = meltedpath(&fid->p, fid->p->nf-1, 1); + fp = p->f[p->nf-2]; + f = p->f[p->nf-1]; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + rwunlock(fp, Wr); + error(nil); + } + if(fp->d.mode&DMAPPEND) + error("directory is append only"); + if(f->d.mode&DMUSERS) + error("can't remove the users file"); + dfaccessok(fp, fid->uid, AWRITE); + fid->omode = -1; + dfremove(fp, f); + fid->p->nf--; + noerror(); + rwunlock(fp, Wr); + dfchanged(fid->p, fid->uid); + putpath(fid->p); + fid->p = nil; + noerror(); + xqunlock(fid); +} + +void +replied(Rpc *rpc) +{ + Rpc **rl; + + xqlock(&rpc->cli->rpclk); + for(rl = &rpc->cli->rpcs; (*rl != nil); rl = &(*rl)->next) + if(*rl == rpc){ + *rl = rpc->next; + break; + } + rpc->cli->nrpcs--; + xqunlock(&rpc->cli->rpclk); + rpc->next = nil; + assert(rpc->fid == nil); + putcli(rpc->cli); + rpc->cli = nil; + +} + +/* + * Read ahead policy: to be called after replying to an ok. read RPC. + * + * We try to keep at least Nahead more bytes in the file if it seems + * that's ok. + */ +void +rahead(Memblk *f, uvlong offset) +{ + Mfile *m; + + rwlock(f, Rd); + m = f->mf; + if(m->sequential == 0 || m->raoffset > offset + Nahead){ + rwunlock(f, Rd); + return; + } + if(catcherror()){ + rwunlock(f, Rd); + warn("rahead: %r"); + return; + } + m->raoffset = offset + Nahead; + d9print("rahead d%#ullx off %#ullx\n", f->addr, m->raoffset); + for(; offset < m->raoffset; offset += Maxmdata) + if(dfpread(f, nil, Maxmdata, offset) != Maxmdata) + break; + noerror(); + rwunlock(f, Rd); +} + +static void +postfd(char *name, int pfd) +{ + int fd; + + remove(name); + fd = create(name, OWRITE|ORCLOSE|OCEXEC, 0600); + if(fd < 0) + fatal("postfd: %r\n"); + if(fprint(fd, "%d", pfd) < 0){ + close(fd); + fatal("postfd: %r\n"); + } + close(pfd); +} + +static char* +getremotesys(char *ndir) +{ + char buf[128], *serv, *sys; + int fd, n; + + snprint(buf, sizeof buf, "%s/remote", ndir); + sys = nil; + fd = open(buf, OREAD); + if(fd >= 0){ + n = read(fd, buf, sizeof(buf)-1); + if(n>0){ + buf[n-1] = 0; + serv = strchr(buf, '!'); + if(serv) + *serv = 0; + sys = strdup(buf); + } + close(fd); + } + if(sys == nil) + sys = strdup("unknown"); + return sys; +} + +void +srv9pix(char *srv, char* (*cliworker)(void *arg, void **aux)) +{ + Cli *cli; + int fd[2]; + char *name; + + name = smprint("/srv/%s", srv); + if(pipe(fd) < 0) + fatal("pipe: %r"); + postfd(name, fd[0]); + warn("listen %s", srv); + consprint("listen %s\n", srv); + cli = newcli(name, fd[1], -1); + getworker(cliworker, cli, nil); +} + +static void +listenproc(void *a) +{ + Cli *cli; + Channel *c; + int data, nctl; + char *dir, ndir[NETPATHLEN], *addr; + char* (*cliworker)(void *arg, void **aux); + + c = a; + addr = recvp(c); + dir = recvp(c); + cliworker = recvp(c); + chanfree(c); + threadsetname("listenproc %s", addr); + for(;;){ + nctl = listen(dir, ndir); + if(nctl < 0) + fatal("listen %s: %r", addr); + data = accept(nctl, ndir); + if(data < 0){ + warn("accept %s: %r", ndir); + continue; + } + cli = newcli(getremotesys(ndir), data, nctl); + getworker(cliworker, cli, nil); + } +} + +void +listen9pix(char *addr, char* (*cliworker)(void *arg, void **aux)) +{ + char *dir; + Channel *c; + int ctl; + + dir = mallocz(NETPATHLEN, 1); + ctl = announce(addr, dir); + if(ctl < 0) + fatal("announce %s: %r", addr); + consprint("listen %s\n", addr); + warn("listen %s\n", addr); + c = chancreate(sizeof(void*), 2); + proccreate(listenproc, c, Stack); + sendp(c, addr); + sendp(c, dir); + sendp(c, cliworker); +} --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,170 @@ +extern Path* addelem(Path **pp, Memblk *f); +extern daddrt addrofref(daddrt refaddr, int idx); +extern void afree(Alloc *a, void *nd); +extern int allowed(int uid); +extern int allowed(int); +extern int allowed(int); +extern void* anew(Alloc *a); +extern void changed(Memblk *b); +extern void checkblk(Memblk *b); +extern void checktag(u64int tag, uint type, daddrt addr); +extern char* cliworker9p(void *v, void**aux); +extern Path* clonepath(Path *p); +extern void consinit(void); +extern void consprint(char *fmt, ...); +extern void consprintclients(void); +extern long consread(char *buf, long count); +extern long conswrite(char *ubuf, long count); +extern void countfidrefs(void); +extern void countfidrefs(void); +extern void countfidrefs(void); +extern Memblk* dballocz(uint type, int dbit, int zeroit); +extern void dbclear(u64int tag, daddrt addr); +extern daddrt dbcounted(daddrt addr); +extern u64int dbcountref(daddrt addr); +extern Memblk* dbdup(Memblk *b); +extern Memblk* dbget(uint type, daddrt addr); +extern daddrt dbgetref(daddrt addr); +extern daddrt dbincref(daddrt addr); +extern long dbput(Memblk *b); +extern long dbread(Memblk *b); +extern void dbsetref(daddrt addr, int ref); +extern long dbwrite(Memblk *b); +extern void debug(void); +extern void dfaccessok(Memblk *f, int uid, int bits); +extern ulong dfbno(Memblk *f, uvlong off, ulong *boffp); +extern void dfcattr(Memblk *f, int op, char *name, char *val); +extern void dfchanged(Path *p, int muid); +extern uvlong dfchdentry(Memblk *d, daddrt addr, daddrt naddr); +extern Memblk* dfcreate(Memblk *parent, char *name, int uid, ulong mode); +extern uvlong dfdirmap(Memblk *d, Dirf dirf, void *arg, int iswr); +extern void dfdropblks(Memblk *f, ulong bno, ulong bend); +extern void dfdump(Memblk *f, int isdisk); +extern long dffreeze(Memblk *f); +extern void dflast(Memblk **fp, int iswr); +extern Memblk* dfmelt(Memblk *parent, Memblk **fp); +extern ulong dfpread(Memblk *f, void *a, ulong count, uvlong off); +extern ulong dfpwrite(Memblk *f, void *a, ulong count, uvlong *off); +extern long dfrattr(Memblk *f, char *name, char *val, long count); +extern void dfremove(Memblk *p, Memblk *f); +extern Blksl dfslice(Memblk *f, ulong len, uvlong off, int iswr); +extern void dfused(Path *p); +extern Memblk* dfwalk(Memblk *d, char *name); +extern long dfwattr(Memblk *f, char *name, char *val); +extern Path* dropelem(Path **pp); +extern void dumpfids(void); +extern void dumplockstats(void); +extern ulong embedattrsz(Memblk *f); +extern void fatal(char *fmt, ...); +extern void fidattach(Fid *fid, char *aname, char *uname); +extern Fid* fidclone(Cli *cli, Fid *fid, int no); +extern void fidclose(Fid *fid); +extern void fidcreate(Fid *fid, char *name, int mode, ulong perm); +extern int fidfmt(Fmt *fmt); +extern void fidopen(Fid *fid, int mode); +extern long fidread(Fid *fid, void *data, ulong count, vlong offset, Packmeta pack); +extern void fidremove(Fid *fid); +extern void fidwalk(Fid *fid, char *wname); +extern long fidwrite(Fid *fid, void *data, ulong count, uvlong *offset); +extern void freerpc(Rpc *rpc); +extern int fscheck(void); +extern uvlong fsdiskfree(void); +extern void fsdump(int full, int disktoo); +extern void fsfmt(char *dev, int force); +extern int fsfull(void); +extern int fslru(void); +extern uvlong fsmemfree(void); +extern void fsopen(char *dev, int worm, int canwr); +extern void fspolicy(int); +extern int fsreclaim(void); +extern void fssync(void); +extern void fssyncproc(void*); +extern uvlong fstime(uvlong t); +extern Fid* getfid(Cli* cli, int no); +extern void gmeta(Memblk *f, void *buf, ulong nbuf); +extern void isfile(Memblk *f); +extern void ismelted(Memblk *b); +extern int isro(Memblk *f); +extern void isrwlocked(Memblk *f, int iswr); +extern int ixcallfmt(Fmt *fmt); +extern uint ixpack(IXcall *f, uchar *ap, uint nap); +extern uint ixpackedsize(IXcall *f); +extern char* ixstats(char *s, char *e, int clr, int verb); +extern char* ixstats(char *s, char*, int, int); +extern char* ixstats(char *s, char*, int, int); +extern uint ixunpack(uchar *ap, uint nap, IXcall *f); +extern Path* lastpath(Path **pp, int nth); +extern int leader(int gid, int lead); +extern void listen9pix(char *addr, char* (*cliworker)(void *arg, void **aux)); +extern void lockstats(int on); +extern Memblk* mballocz(daddrt addr, int zeroit); +extern daddrt mbcountref(Memblk *b); +extern int mbfmt(Fmt *fmt); +extern Memblk* mbget(int type, daddrt addr, int mkit); +extern Memblk* mbhash(Memblk *b); +extern int mbhashed(daddrt addr); +extern void mbput(Memblk *b); +extern int mbunhash(Memblk *b, int isreclaim); +extern void mbunused(Memblk *b); +extern Path* meltedpath(Path **pp, int nth, int user); +extern void meltedref(Memblk *rb); +extern void meltfids(void); +extern void meltfids(void); +extern void meltfids(void); +extern int member(int uid, int member); +extern int member(int uid, int member); +extern int member(int uid, int member); +extern List mfilter(List *bl, int(*f)(Memblk*)); +extern void mlistdump(char *tag, List *l); +extern void munlink(List *l, Memblk *b, int isreclaim); +extern Fid* newfid(Cli* cli, int no); +extern Path* newpath(Memblk *root); +extern Rpc* newrpc(void); +extern char* ninestats(char *s, char *e, int clr, int verb); +extern char* ninestats(char *s, char*, int, int); +extern char* ninestats(char *s, char*, int, int); +extern void nodebug(void); +extern void ownpath(Path **pp); +extern int pathfmt(Fmt *fmt); +extern ulong pmeta(void *buf, ulong nbuf, Memblk *f); +extern int ptrmap(daddrt addr, int nind, Blkf f, void *a, int isdisk); +extern void putcli(Cli *cli); +extern void putfid(Fid *fid); +extern void putpath(Path *p); +extern void quiescent(int y); +extern void rahead(Memblk *f, uvlong offset); +extern daddrt refaddr(daddrt addr, int *idx); +extern void replied(Rpc *rpc); +extern void rlsedebug(int r); +extern int rpcfmt(Fmt *fmt); +extern void rwlock(Memblk *f, int iswr); +extern void rwunlock(Memblk *f, int iswr); +extern void rwusers(Memblk *uf); +extern void rwusers(Memblk*); +extern void rwusers(Memblk*); +extern int setdebug(void); +extern void setfiduid(Fid *fid, char *uname); +extern void srv9pix(char *srv, char* (*cliworker)(void *arg, void **aux)); +extern void timeproc(void*); +extern char* tname(int t); +extern char* updatestats(int clr, int verb); +extern int usrfmt(Fmt *fmt); +extern int usrid(char *n); +extern int usrid(char*); +extern int usrid(char*); +extern char* usrname(int uid); +extern char* usrname(int); +extern char* usrname(int); +extern Path* walkpath(Memblk *f, char *elems[], int nelems); +extern Path* walkto(char *a, char **lastp); +extern void warn(char *fmt, ...); +extern void warnerror(char *fmt, ...); +extern int writedenied(int uid); +extern void written(Memblk *b); +extern void wstatint(Memblk *f, char *name, u64int v); +extern int xcanqlock(QLock *q); +extern void xqlock(QLock *q); +extern void xqunlock(QLock *q); +extern void xrwlock(RWLock *rw, int iswr); +extern void xrwunlock(RWLock *rw, int iswr); +extern long wname(Memblk *f, char *val); --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,474 @@ +#include "all.h" + +/* + * HUGE warning: + * these commands do not perform checks at all. + * that means you know what you are doing if you use them. + * e.g., you can create multiple files with the same name + * in the same directory. + * + * This tool is only an aid for testing and debugging. + */ + +enum +{ + Nels = 64 +}; + +static char *fsdir; +static int verb; + +int +member(int uid, int member) +{ + return uid == member; +} + +int +allowed(int) +{ + return 1; +} + +int +usrid(char*) +{ + return 0; +} + +char* +usrname(int) +{ + return getuser(); +} + +void +meltfids(void) +{ +} + +void +rwusers(Memblk*) +{ +} + +char* +ninestats(char *s, char*, int, int) +{ + return s; +} + +char* +ixstats(char *s, char*, int, int) +{ + return s; +} + +void +countfidrefs(void) +{ +} + +/* + * Walks elems starting at f. + * Ok if nelems is 0. + */ +static char* +fsname(char *p) +{ + if(p[0] == '/') + return strdup(p); + if(fsdir) + return smprint("%s/%s", fsdir, p); + return strdup(p); +} + +static void +fscd(int, char *argv[]) +{ + free(fsdir); + fsdir = strdup(argv[1]); +} + +/* + * This is unrealistic in that it keeps the file locked + * during the entire put. This means that we can only give + * fslru() a chance before each put, and not before each + * write, because everything is going to be in use and dirty if + * we run out of memory. + */ +static void +fsput(int, char *argv[]) +{ + int fd; + char *fn; + Memblk *m, *f; + Dir *d; + char buf[4096]; + uvlong off; + long nw, nr; + Path *p; + char *nm; + + fd = open(argv[1], OREAD); + if(fd < 0) + error("open: %r\n"); + d = dirfstat(fd); + if(d == nil){ + error("dirfstat: %r\n"); + } + nm = fsname(argv[2]); + if(catcherror()){ + free(nm); + close(fd); + free(d); + error(nil); + } + p = walkto(nm, &fn); + if(catcherror()){ + putpath(p); + error(nil); + } + meltedpath(&p, p->nf, 1); + m = p->f[p->nf-1]; + if(catcherror()){ + rwunlock(m, Wr); + error(nil); + } + f = dfcreate(m, fn, usrid(d->uid), d->mode&(DMDIR|0777)); + noerror(); + addelem(&p, f); + decref(f); /* kept now in p */ + rwlock(f, Wr); + rwunlock(m, Wr); + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + if((d->mode&DMDIR) == 0){ + off = 0; + for(;;){ + if(fsmemfree() < Mminfree) + fslru(); + nr = read(fd, buf, sizeof buf); + if(nr <= 0) + break; + nw = dfpwrite(f, buf, nr, &off); + dprint("wrote %ld of %ld bytes\n", nw, nr); + off += nr; + } + } + noerror(); + noerror(); + noerror(); + if(verb) + print("created %H\nat %H\n", f, m); + rwunlock(f, Wr); + free(nm); + putpath(p); + close(fd); + free(d); +} + +static void +fscat(int, char *argv[]) +{ + Memblk *f; + Mfile *m; + char buf[4096], *nm; + uvlong off; + long nr; + Path *p; + + nm = fsname(argv[2]); + if(catcherror()){ + free(nm); + error(nil); + } + p = walkto(nm, nil); + f = p->f[p->nf-1]; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + putpath(p); + error(nil); + } + m = f->mf; + print("cat %-30s\t%M\t%5ulld\t%s %ulld refs\n", + m->name, (ulong)f->d.mode, f->d.length, m->uid, dbgetref(f->addr)); + if((f->d.mode&DMDIR) == 0){ + off = 0; + for(;;){ + if(fsmemfree() < Mminfree) + fslru(); + nr = dfpread(f, buf, sizeof buf, off); + if(nr <= 0) + break; + write(1, buf, nr); + off += nr; + } + } + noerror(); + noerror(); + rwunlock(f, Rd); + putpath(p); + free(nm); +} + +static void +fsget(int, char *argv[]) +{ + Memblk *f; + Mfile *m; + char buf[4096], *nm; + uvlong off; + long nr; + int fd; + Path *p; + + fd = create(argv[1], OWRITE, 0664); + if(fd < 0) + error("create: %r\n"); + nm = fsname(argv[2]); + if(catcherror()){ + free(nm); + close(fd); + error(nil); + } + p = walkto(nm, nil); + f = p->f[p->nf-1]; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + putpath(p); + error(nil); + } + m = f->mf; + print("get %-30s\t%M\t%5ulld\t%s %ulld refs\n", + m->name, (ulong)f->d.mode, f->d.length, m->uid, dbgetref(f->addr)); + if((f->d.mode&DMDIR) == 0){ + off = 0; + for(;;){ + if(fsmemfree() < Mminfree) + fslru(); + nr = dfpread(f, buf, sizeof buf, off); + if(nr <= 0) + break; + if(write(fd, buf, nr) != nr){ + fprint(2, "%s: error: %r\n", argv[0]); + break; + } + off += nr; + } + } + close(fd); + noerror(); + noerror(); + rwunlock(f, Rd); + putpath(p); + free(nm); +} + +static void +fssnap(int, char**) +{ + fssync(); +} + +static void +fsrcl(int, char**) +{ + fsreclaim(); + fssync(); /* commit changes to disk */ +} + +static void +fsdmp(int, char *argv[]) +{ + fsdump(*argv[0] == 'l', strstr(argv[0], "all") != 0); +} + +static void +fsdbg(int, char *argv[]) +{ + char *s; + + memset(dbg, 0, sizeof dbg); + for(s = argv[1]; *s; s++) + dbg['D'] = dbg[*s] = 1; +} + +static void +fsout(int, char*[]) +{ + fslru(); +} + +static void +fsrm(int, char *argv[]) +{ + Memblk *f, *pf; + Path *p; + char *nm; + + nm = fsname(argv[1]); + if(catcherror()){ + free(nm); + error(nil); + } + p = walkto(nm, nil); + if(catcherror()){ + putpath(p); + error(nil); + } + if(p->nf < 2) + error("short path for rm"); + meltedpath(&p, p->nf-1, 1); + f = p->f[p->nf-1]; + pf = p->f[p->nf-2]; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + rwunlock(pf, Wr); + error(nil); + } + dfremove(pf, f); + p->f[p->nf-1] = nil; + noerror(); + noerror(); + noerror(); + rwunlock(pf, Wr); + putpath(p); + free(nm); +} + +static void +fsst(int, char**) +{ + fprint(2, "%s\n", updatestats(0, 1)); +} + +static void +fschk(int, char**) +{ + if(fscheck() != 0) + error("check failed"); +} + +static void +fserr(int, char *argv[]) +{ + if(*argv[0] == 'r'){ + swreaderr = atoi(argv[1]); + print("sw read err count = %d\n", swreaderr); + }else{ + swwriteerr = atoi(argv[1]); + print("sw write err count = %d\n", swwriteerr); + } +} + +static void +fspol(int, char**) +{ + fspolicy(Post); +} + +static void +usage(void) +{ + fprint(2, "usage: %s [-DFLAGS] [-dv] [-f disk] cmd...\n", argv0); + exits("usage"); +} + +static Cmd cmds[] = +{ + {"cd", fscd, 2, "cd!where"}, + {"put", fsput, 3, "put!src!dst"}, + {"get", fsget, 3, "get!dst!src"}, + {"cat", fscat, 3, "cat!what"}, + {"dump", fsdmp, 1, "dump"}, + {"dumpall", fsdmp, 1, "dumpall"}, + {"ldump", fsdmp, 1, "ldump"}, + {"ldumpall", fsdmp, 1, "ldumpall"}, + {"sync", fssnap, 1, "sync"}, + {"snap", fssnap, 1, "snap"}, + {"rcl", fsrcl, 1, "rcl"}, + {"dbg", fsdbg, 2, "dbg!n"}, + {"out", fsout, 1, "out"}, + {"rm", fsrm, 2, "rm!what"}, + {"stats", fsst, 1, "stats"}, + {"check", fschk, 1, "check"}, + {"rerr", fserr, 2, "rerr!n"}, + {"werr", fserr, 2, "werr!n"}, + {"pol", fspol, 1, "pol"}, +}; + +void +threadmain(int argc, char *argv[]) +{ + char *dev; + char *args[Nels]; + int i, j, nargs, check; + + dev = "disk"; + check = 0; + ARGBEGIN{ + case 'c': + check++; + break; + case 'v': + verb++; + break; + case 'f': + dev = EARGF(usage()); + break; + default: + if(ARGC() >= 'A' && ARGC() <= 'Z'){ + dbg['d'] = 1; + dbg[ARGC()] = 1; + }else + usage(); + }ARGEND; + if(argc == 0) + usage(); + fatalaborts = 1; + fmtinstall('H', mbfmt); + fmtinstall('M', dirmodefmt); + fmtinstall('P', pathfmt); + errinit(Errstack); + if(catcherror()){ + fprint(2, "cmd failed: %r\n"); + threadexitsall("failed"); + } + fsopen(dev, Normal, Wr); + for(i = 0; i < argc; i++){ + if(verb>1) + fsdump(0, Mem); + print("%% %s\n", argv[i]); + nargs = gettokens(argv[i], args, Nels, "!"); + for(j = 0; j < nelem(cmds); j++){ + if(strcmp(cmds[j].name, argv[i]) != 0) + continue; + if(cmds[j].nargs != 0 && cmds[j].nargs != nargs) + fprint(2, "usage: %s\n", cmds[j].usage); + else + cmds[j].f(nargs, args); + break; + } + if(j == nelem(cmds)){ + fprint(2, "no such command\n"); + for(j = 0; j < nelem(cmds); j++) + fprint(2, "\t%s\n", cmds[j].usage); + break; + } + if(check){ + print("%% check\n"); + fscheck(); + } + } + if(verb>1) + fsdump(0, Mem); + noerror(); + threadexitsall(nil); +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,102 @@ +#include "all.h" + +int +usrid(char*) +{ + return 3; +} + +char* +usrname(int) +{ + return "sys"; +} + +int +member(int uid, int member) +{ + return uid == member; +} + +int +allowed(int) +{ + return 1; +} + +void +meltfids(void) +{ +} + +void +rwusers(Memblk*) +{ +} + +char* +ninestats(char *s, char*, int, int) +{ + return s; +} + +char* +ixstats(char *s, char*, int, int) +{ + return s; +} + +void +countfidrefs(void) +{ +} + +static void +usage(void) +{ + fprint(2, "usage: %s [-DFLAGS] [-vy] [disk]\n", argv0); + exits("usage"); +} + +void +threadmain(int argc, char *argv[]) +{ + char *dev; + int verb, force; + + dev = "disk"; + verb = force = 0; + ARGBEGIN{ + case 'v': + verb = 1; + break; + case 'y': + force = 1; + break; + default: + if((ARGC() >= 'A' && ARGC() <= 'Z') || ARGC() == '9'){ + dbg['d'] = 1; + dbg[ARGC()] = 1; + fatalaborts = 1; + }else + usage(); + }ARGEND; + if(argc == 1) + dev = argv[0]; + else if(argc > 0) + usage(); + fmtinstall('P', pathfmt); + fmtinstall('H', mbfmt); + fmtinstall('M', dirmodefmt); + errinit(Errstack); + if(catcherror()) + fatal("error: %r"); + fsfmt(dev, force); + if(verb) + fsdump(0, Mem); + else + print("%lld %ldK blocks\n", fs->ndblk, Dblksz/1024); + noerror(); + exits(nil); +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,1233 @@ +#include "all.h" + +/* + * All the code assumes outofmemoryexits = 1. + */ + +enum +{ + Lru = 0, + Freeze, + Write, + Nfsops, +}; + + + +BUG: +issuing a halt calls a freeze and starts the write but does NOT +wait for the write to complete before killing all threads, so data +is lost because not everything is written. + +Check out that halt indeed waits for the write to complete!! + +freezing fs... +freezing fs... +fs frozen +writing fs... +creepy/9pix: /dev/sdC0/creepy: halted + +Fsys *fs; +uvlong maxfsz; + +vlong fsoptime[Nfsops]; +ulong nfsopcalls[Nfsops]; + +static char* fsopname[] = +{ +[Lru] "lru", +[Freeze] "freeze", +[Write] "write", +}; + +char statstext[Statsbufsz], *statsp; + +void +quiescent(int y) +{ + if(y == No) + xrwlock(&fs->quiescence, Rd); + else + xrwunlock(&fs->quiescence, Rd); +} + +uvlong +fsdiskfree(void) +{ + uvlong nfree; + + xqlock(fs); + nfree = fs->super->d.ndfree; + nfree += (fs->limit - fs->super->d.eaddr)/Dblksz; + xqunlock(fs); + return nfree; +} + +static char* +fsstats(char *s, char *e, int clr, int verb) +{ + int i; + + s = seprint(s, e, "mblks:\t%4ulld nblk %4ulld nablk %4ulld mused %4ulld mfree\n", + fs->nblk, fs->nablk, fs->nmused, fs->nmfree); + s = seprint(s, e, "lists:\t%4uld clean %#4uld dirty %#4uld refs %4uld total\n", + fs->clean.n, fs->dirty.n, fs->refs.n, + fs->clean.n + fs->dirty.n + fs->refs.n); + s = seprint(s, e, "dblks:\t %4ulld nblk %4ulld nfree (%ulld list + %ulld rem)\n", + fs->limit/Dblksz - 1, fsdiskfree(), fs->super->d.ndfree, + (fs->limit - fs->super->d.eaddr)/Dblksz); + s = seprint(s, e, "paths:\t%4uld alloc %4uld free (%4uld bytes)\n", + pathalloc.nalloc, pathalloc.nfree, pathalloc.elsz); + s = seprint(s, e, "mfs:\t%4uld alloc %4uld free (%4uld bytes)\n", + mfalloc.nalloc, mfalloc.nfree, mfalloc.elsz); + + if(verb == 0) + return s; + s = seprint(s, e, "nmelts:\t%d\n", fs->nmelts); + s = seprint(s, e, "nindirs:\t"); + for(i = 0; i < nelem(fs->nindirs); i++){ + s = seprint(s, e, "%d ", fs->nindirs[i]); + if(clr) + fs->nindirs[i] = 0; + } + s = seprint(s, e, "\n"); + s = seprint(s, e, "\n"); + s = seprint(s, e, "Fsysmem:\t%uld\n", Fsysmem); + s = seprint(s, e, "Mzerofree:\t%d\tMminfree:\t%d\tMmaxfree:\t%d\n", + Mzerofree, Mminfree, Mmaxfree); + s = seprint(s, e, "Dzerofree:\t%d\tDminfree:\t%d\tDmaxfree:\t%d\n", + Dzerofree, Dminfree, Dmaxfree); + s = seprint(s, e, "Mmaxdirtypcent:\t%d\n", Mmaxdirtypcent); + s = seprint(s, e, "Dblksz: \t%uld\n", Dblksz); + s = seprint(s, e, "Mblksz: \t%ud\n", sizeof(Memblk)); + s = seprint(s, e, "Dminattrsz:\t%uld\n", Dminattrsz); + s = seprint(s, e, "Nblkgrpsz:\t%uld\n", Nblkgrpsz); + s = seprint(s, e, "Dblkdatasz:\t%d\n", Dblkdatasz); + s = seprint(s, e, "Embedsz:\t%d\n", Embedsz); + s = seprint(s, e, "Dentryperblk:\t%d\n", Dblkdatasz/Daddrsz); + s = seprint(s, e, "Dptrperblk:\t%d\n\n", Dptrperblk); + + for(i = 0; i < nelem(nfsopcalls); i++){ + if(nfsopcalls[i] == 0) + s = seprint(s, e, "%s:\t0 calls\t0 µs\n", fsopname[i]); + else + s = seprint(s, e, "%s:\t%uld calls\t%ulld µs\n", fsopname[i], + nfsopcalls[i], (fsoptime[i]/nfsopcalls[i])/1000); + if(clr){ + nfsopcalls[i] = 0; + fsoptime[i] = 0; + } + } + return s; +} + +char* +updatestats(int clr, int verb) +{ + static QLock statslk; + + if(clr) + warn("clearing stats"); + xqlock(&statslk); + statsp = statstext; + *statsp = 0; + statsp = fsstats(statsp, statstext+sizeof statstext, clr, verb); + statsp = ninestats(statsp, statstext+sizeof statstext, clr, verb); + statsp = ixstats(statsp, statstext+sizeof statstext, clr, verb); + xqunlock(&statslk); + return statstext; +} + +int +isro(Memblk *f) +{ + return f == fs->archive || f == fs->root || f == fs->cons || f == fs->stats; +} + +/* + * NO LOCKS. debug only + * + */ +void +fsdump(int full, int disktoo) +{ + int i, n, x; + Memblk *b; + daddrt a; + extern int fullfiledumps; + + x = fullfiledumps; + fullfiledumps = full; + nodebug(); + if(fs != nil){ + fprint(2, "\n\nfsys '%s' limit %#ullx super m%#p root m%#p:\n", + fs->dev, fs->limit, fs->super, fs->root); + fprint(2, "%H\n", fs->super); + dfdump(fs->root, disktoo); + mlistdump("refs", &fs->refs); + if(1){ + n = 0; + fprint(2, "hash:"); + for(i = 0; i < nelem(fs->fhash); i++) + for(b = fs->fhash[i].b; b != nil; b = b->next){ + if(n++ % 5 == 0) + fprint(2, "\n\t"); + fprint(2, "d%#010ullx ", EP(b->addr)); + } + fprint(2, "\n"); + } + } + if(fs->super->d.free != 0){ + fprint(2, "free:"); + i = 0; + for(a = fs->super->d.free; a != 0; a = dbgetref(a)){ + if(i++ % 5 == 0) + fprint(2, "\n\t"); + fprint(2, "d%#010ullx ", EP(a)); + } + fprint(2, "\n"); + } + mlistdump("mru", &fs->clean); + mlistdump("dirty", &fs->dirty); + fprint(2, "%s\n", updatestats(0, 1)); + fullfiledumps = x; + debug(); +} + +static daddrt +disksize(int fd) +{ + Dir *d; + daddrt sz; + + d = dirfstat(fd); + if(d == nil) + return 0; + sz = d->length; + free(d); + return sz; +} + +/* + * To preserve coherency, blocks written are always frozen. + * DBref blocks with RCs and the free block list require some care: + * + * On disk, the super block indicates that even (odd) DBref blocks are active. + * On memory, the super selects even (odd) refs (we read refs from there.) + * To sync... + * 1. we make a frozen super to indicate that odd (even) DBrefs are active. + * 2. we write odd (even) DBref blocks. + * 3. the frozen super is written, indicating that odd (even) refs are in use. + * (The disk is coherent now, pretending to use odd (even) refs). + * 4. The memory super is udpated to select odd (even) DBref blocks. + * (from now on, we are loading refs from odd (even) blocks. + * 5. we update even (odd) DBref blocks, so we can get back to 1. + * with even/odd swapped. + * + */ + +static void +freezesuperrefs(void) +{ + Memblk *b, *rb; + + b = mballocz(fs->super->addr, 0); + xqlock(fs); + b->type = fs->super->type; + b->d = fs->super->d; + b->d.oddrefs = !fs->super->d.oddrefs; + assert(fs->fzsuper == nil); + fs->fzsuper = b; + b->frozen = 1; + b->dirty = 1; /* so it's written */ + xqlock(&fs->refs); + for(rb = fs->refs.hd; rb != nil; rb = rb->lnext){ + rb->frozen = 1; + rb->changed = rb->dirty; + } + xqunlock(&fs->refs); + xqunlock(fs); +} + +static Memblk* +readsuper(void) +{ + Memblk *super; + Dsuperdata *d1, *d2; + + if(catcherror()){ + error("not a creepy disk: %r"); + error(nil); + } + fs->super = dbget(DBsuper, Dblksz); + super = fs->super; + if(super->d.magic != MAGIC) + error("bad magic number"); + d1 = &fs->super->d.Dsuperdata; + d2 = &fs->super->d.dup; + if(memcmp(d1, d2, sizeof(Dsuperdata)) != 0){ + warn("partially written superblock, using old."); + if(fs->super->d.dup.epoch < fs->super->d.epoch) + fs->super->d.Dsuperdata = fs->super->d.dup; + } + if(super->d.dblksz != Dblksz) + error("bad Dblksz"); + if(super->d.nblkgrpsz != Nblkgrpsz) + error("bad Nblkgrpsz"); + if(super->d.dminattrsz != Dminattrsz) + error("bad Dminattrsz"); + if(super->d.ndptr != Ndptr) + error("bad ndptr"); + if(super->d.niptr != Niptr) + error("bad niptr"); + if(super->d.embedsz != Embedsz) + error("bad Embedsz"); + if(super->d.dptrperblk != Dptrperblk) + error("bad Dptrperblk"); + + noerror(); + return super; +} + +/* + * Return /archive/yyyy/mmdd melted and wlocked, create it if needed. + * Clear the arch addr in the super if a new archive should be taken. + */ +static Path* +currentarch(void) +{ + Path *p; + Memblk *f, *pf; + char yname[30], dname[30], *names[2]; + Tm *tm; + int i; + + tm = localtime(time(nil)); + seprint(yname, yname+sizeof yname, "%04d", tm->year + 1900); + seprint(dname, dname+sizeof dname, "%02d%02d", tm->mon + 1, tm->mday); + names[0] = yname; + names[1] = dname; + + p = newpath(fs->root); + addelem(&p, fs->archive); + for(i = 0; i < nelem(names); i++){ + if(catcherror()) + break; + pf = p->f[p->nf-1]; + rwlock(pf, Rd); + if(catcherror()){ + rwunlock(pf, Rd); + error(nil); + } + f = dfwalk(pf, names[i]); + addelem(&p, f); + mbput(f); + noerror(); + rwunlock(pf, Rd); + noerror(); + } + meltedpath(&p, p->nf, 0); + if(catcherror()){ + rwunlock(p->f[p->nf-1], Wr); + error(nil); + } + /* 0:/ 1:archive 2:yyyy 3:mmdd */ + for(i = p->nf-1; i < 3; i++){ + f = dfcreate(p->f[i], names[i-1], p->f[i]->d.uid, p->f[i]->d.mode); + rwlock(f, Wr); + rwunlock(p->f[i], Wr); + addelem(&p, f); + mbput(f); + } + noerror(); + + return p; +} + +static void +updateroot(Memblk *nf) +{ + if(fs->super->d.root != nf->addr){ + fs->archive = nf; + incref(nf); + fs->super->d.root = nf->addr; + changed(fs->super); + } +} + +/* + * Freeze the file tree, keeping active as a new melted file + * that refers to frozen children now in the archive. + * returns the just frozen tree or nil + * + * NB: This may be called from fsfmt(), with a melted archive, + * which violates the invariant that archive is always frozen, leading + * to a violation on the expected number of references to it (fsfmt leaks it). + */ +static Memblk* +fsfreeze(void) +{ + Path *p; + Memblk *na, *oa, *arch, *oarch; + char name[50]; + vlong t0; + + dZprint("freezing fs...\n"); + if(fs->profile) + t0 = fstime(nsec()); + xqlock(&fs->fzlk); + if(fs->fzsuper != nil){ + /* + * we did freeze/reclaim and are still writing, can't freeze now. + */ + xqunlock(&fs->fzlk); + return nil; + } + xrwlock(&fs->quiescence, Wr); /* not really required */ + nfsopcalls[Freeze]++; + if(catcherror()){ + /* + * There was an error during freeze. + * It's better not to continue to prevent disk corruption. + * The user is expected to restart from the last frozen + * version of the tree. + */ + fatal("freeze: %r"); + } + + /* 1. Move active into /archive/yyyy/mmdd/. + * We must add an extra disk ref to keep archive alive after melting + * it within currentarch() because "/" is a fake and there's no old + * frozen copy for "/" (keeping such ref). + * + * Dbput will unlink the block from the hash and move its address + * into the free list. However, we still have a mem ref from fs->archive + * and perhaps more from user paths, which must be advanced, so we can't + * release the reference on archive just yet. + * We will do the mbput corresponding to fs->archive after + * advancing all fids, so their archive moves to the new one. + */ + arch = fs->archive; + dbincref(arch->addr); + p = currentarch(); + updateroot(p->f[1]); + oarch = arch; + dbput(arch); + + arch = p->f[p->nf-1]; + oa = fs->active; + rwlock(oa, Wr); + seprint(name, name+sizeof(name), "%ulld", oa->d.mtime); + wname(oa, name); + dfchdentry(arch, 0, oa->addr); + + /* 2. Freeze it, plus any melted blocks in /active due to + * the link of the new archived tree. + */ + oa->d.mtime = fstime(0); + oa->d.atime = fstime(0); + rwunlock(oa, Wr); + changed(oa); + dffreeze(oa); + rwunlock(arch, Wr); + dffreeze(fs->archive); + + /* 2. Freeze the on-disk reference counters + * and the state of the super-block. + * After doing so, the state to be written on the disk is + * coherent and corresponds to now. + */ + dprint("freezing refs...\n"); + freezesuperrefs(); + + /* 3. Make a new active and replace the old one. + * defer the release of the old active until all fids are melted + * (see similar discussion in 1). + */ + na = dbdup(oa); + rwlock(na, Wr); + na->d.id = fstime(0); + wname(na, "active"); + fs->active = na; + rwunlock(na, Wr); + rwlock(fs->root, Wr); + dfchdentry(fs->root, oa->addr, na->addr); + rwunlock(fs->root, Wr); + assert(oa->ref > 1); /* release fs->active */ + + /* 4. Advance pahts in fids to their most recent melted files, + * to release refs to old frozen files, and to the now gone old + * "/archive". + */ + meltfids(); + mbput(oa); + mbput(oarch); + if(fs->profile) + fsoptime[Freeze] += nsec() - t0; + noerror(); + xrwunlock(&fs->quiescence, Wr); + xqunlock(&fs->fzlk); + putpath(p); + + dZprint("fs frozen\n"); + return na; +} + +static long +writerefs(void) +{ + Memblk *rb; + long n; + + n = 0; + xqlock(&fs->refs); + for(rb = fs->refs.hd; rb != nil; rb = rb->lnext){ + if((rb->addr - Dblk0addr)/Dblksz % Nblkgrpsz == 2){ + /* It's a fake DBref block used for checks: ignore. */ + rb->frozen = rb->dirty = 0; + continue; + } + if(rb->dirty && rb->frozen) + n++; + meltedref(rb); + } + xqunlock(&fs->refs); + return n; +} + +static int +mustwrite(Memblk *b) +{ + return b->frozen != 0; +} + +/* + * Written blocks become mru, perhaps we should + * consider keeping their location in the clean list, at the + * expense of visiting them while scanning for blocks to move out. + * We write only (dirty) blocks that are frozen or part of the "/archive" file. + */ +static long +writedata(void) +{ + Memblk *b; + long nw; + List dl; + + nw = 0; + dl = mfilter(&fs->dirty, mustwrite); + while((b = dl.hd) != nil){ + munlink(&dl, b, 1); + assert(b->dirty); + if((b->addr&Fakeaddr) != 0) + fatal("write data on fake address"); + dbwrite(b); + nw++; + } + return nw; +} + +static void +writezsuper(void) +{ + if(canqlock(&fs->fzlk)) + fatal("writezsuper: lock"); + assert(fs->fzsuper != nil); + fs->fzsuper->d.epoch = fstime(0); + fs->fzsuper->d.dup = fs->fzsuper->d.Dsuperdata; + dbwrite(fs->fzsuper); + dprint("writezsuper: %H\n", fs->fzsuper); + mbput(fs->fzsuper); + fs->fzsuper = nil; +} + +static void +syncref(daddrt addr) +{ + static Memblk b; + + b.addr = addr; + b.type = DBref; + dbread(&b); + if(fs->super->d.oddrefs == 0) /* then the old ones are odd */ + addr += Dblksz; + dWprint("syncref d%#010ullx at d%#010ullx\n", b.addr, addr); + if(pwrite(fs->fd, &b.d, sizeof b.d, addr) != sizeof b.d) + error("syncref: write: %r"); +} + +static void +syncrefs(void) +{ + Memblk *rb; + + fs->super->d.oddrefs = !fs->super->d.oddrefs; + xqlock(&fs->refs); + rb = fs->refs.hd; + xqunlock(&fs->refs); + for(; rb != nil; rb = rb->lnext){ + if(rb->changed) + syncref(rb->addr); + rb->changed = 0; + } +} + +/* + * Write any dirty frozen state after a freeze. + * Only this function and initialization routines (i.e., super, refs) + * may lead to writes. + */ +static void +fswrite(void) +{ + vlong t0; + long nr, nb; + + dZprint("writing fs...\n"); + if(fs->profile) + t0 = fstime(nsec()); + xqlock(&fs->fzlk); + nfsopcalls[Write]++; + if(fs->fzsuper == nil) + fatal("can't fswrite if we didn't fsfreeze"); + if(catcherror()){ + fsoptime[Write] += nsec() - t0; + xqunlock(&fs->fzlk); + error(nil); + } + nr = writerefs(); + nb = writedata(); + writezsuper(); + nb++; + syncrefs(); + noerror(); + if(fs->profile) + fsoptime[Write] += fstime(nsec()) - t0; + fs->wtime = fstime(nsec()); + xqunlock(&fs->fzlk); + dZprint("fs written (2*%ld refs %ld data)\n", nr, nb); +} + +static void +fsinit(char *dev, int nblk) +{ + uvlong fact, i; + void *p; + char *c, *e; + + /* this is an invariant that must hold for directories */ + assert(Embedsz % Daddrsz == 0); + maxfsz = Ndptr*Dblkdatasz; + fact = 1; + for(i = 0; i < Niptr; i++){ + maxfsz += Dptrperblk * fact; + fact *= Dptrperblk; + } + + fs = mallocz(sizeof *fs, 1); + fs->dev = strdup(dev); + fs->fd = open(dev, ORDWR); + if(fs->fd < 0) + fatal("can't open disk: %r"); + + fs->nablk = Fsysmem / sizeof(Memblk); + if(nblk > 0 && nblk < fs->nablk) + fs->nablk = nblk; + fs->limit = disksize(fs->fd); + fs->ndblk = fs->limit/Dblksz; + fs->limit = fs->ndblk*Dblksz; + if(fs->limit < 10*Dblksz) + fatal("buy a larger disk"); + if(fs->nablk > fs->ndblk){ + warn("using %ulld blocks and not %ulld (small disk)", + fs->ndblk, fs->nablk); + fs->nablk = fs->ndblk; + } + p = malloc(fs->nablk * sizeof fs->blk[0]); + fs->blk = p; + warn("prepaging..."); + c = p; + e = c + fs->nablk * sizeof fs->blk[0]; + for(; c < e; c += 4096) + *c = 0; /* prepage it */ + fstime(nsec()); + dprint("fsys '%s' init\n", fs->dev); +} + +void +fssync(void) +{ + if(fsfreeze()) + fswrite(); +} + +static int +confirm(char *msg) +{ + char buf[100]; + int n; + + fprint(2, "%s [y/n]: ", msg); + n = read(0, buf, sizeof buf - 1); + if(n <= 0) + return 0; + if(buf[0] == 'y') + return 1; + return 0; +} + +enum +{ + Fossiloff = 128*1024, + Fossilmagic = 0xffae7637, +}; + +static void +dontoverwrite(daddrt addr) +{ + Dsuperdata d; + static char buf[BIT64SZ]; + + if(pread(fs->fd, &d, sizeof d, addr + sizeof(Diskblkhdr)) != sizeof d) + return; + if(d.magic == MAGIC) + if(!confirm("disk has a creepy fs: continue?")){ + warn("aborting"); + threadexitsall("no"); + }else + return; + + if(pread(fs->fd, buf, sizeof buf, Fossiloff) != sizeof buf) + return; + if(GBIT32(buf) == Fossilmagic) + if(!confirm("disk has a fossil fs: continue?")){ + warn("aborting"); + threadexitsall("no"); + }else{ + memset(buf, 0, sizeof buf); + pwrite(fs->fd, buf, sizeof buf, Fossiloff); + } +} + +/* + * / is only in memory. It's `on-disk' address is Noaddr. + * + * /archive is the root on disk. + * /active is allocated on disk, but not on disk. It will be linked into + * /archive as a child in the future. + */ +void +fsfmt(char *dev, int force) +{ + Memblk *super; + int uid; + + fsinit(dev, Mmaxfree); /* enough # of blocks for fmt */ + + if(catcherror()) + fatal("fsfmt: error: %r"); + + fs->super = dballocz(DBsuper, DFreg, 1); + + if(!force) + dontoverwrite(fs->super->addr); + super = fs->super; + super->d.magic = MAGIC; + super->d.eaddr = fs->super->addr + Dblksz; + super->d.dblksz = Dblksz; + super->d.nblkgrpsz = Nblkgrpsz; + super->d.dminattrsz = Dminattrsz; + super->d.ndptr = Ndptr; + super->d.niptr = Niptr; + super->d.embedsz = Embedsz; + super->d.dptrperblk = Dptrperblk; + uid = usrid(getuser()); + fs->root = dfcreate(nil, "", uid, DMDIR|0555); + rwlock(fs->root, Wr); + fs->active = dfcreate(fs->root, "active", uid, DMDIR|0775); + fs->archive = dfcreate(fs->root, "archive", uid, DMDIR|0555); + rwunlock(fs->root, Wr); + super->d.root = fs->archive->addr; + fssync(); + + noerror(); +} + +void +timeproc(void*) +{ + threadsetname("timeproc"); + for(;;){ + sleep(1); + fstime(nsec()); + } +} + +/* + * If there are dirty blocks, call the policy once per Syncival. + */ +void +fssyncproc(void*) +{ + threadsetname("syncer"); + errinit(Errstack); + for(;;){ + sleep(Syncival*1000); + fspolicy(Post); + } +} + +typedef struct Parg +{ + int last; + Memblk *which; +} Parg; +enum{First = 0, Last = 1}; + +static int +pickf(Memblk*, daddrt *de, void *a) +{ + Parg *pa; + Memblk *c; + + pa = a; + if(*de == 0) + return 0; + c = dbget(DBfile, *de); + if(pa->which == nil || (pa->last && pa->which->d.mtime < c->d.mtime) || + (!pa->last && pa->which->d.mtime > c->d.mtime)){ + mbput(pa->which); + pa->which = c; + incref(c); + } + mbput(c); + return 0; +} + +/* + * Return the first or last children, for selecting archives. + */ +static Memblk* +pickchild(Memblk *f, int last) +{ + Parg pa; + + pa.which = nil; + pa.last = last; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + mbput(pa.which); + error(nil); + } + dfdirmap(f, pickf, &pa, Rd); + noerror(); + rwunlock(f, Rd); + return pa.which; +} + +static Path* +pickvictim(void) +{ + Path *p; + int i; + + if(fs->archive->d.ndents == 0) + return nil; + + p = newpath(fs->root); + if(catcherror()){ + putpath(p); + return nil; + } + addelem(&p, fs->archive); + + /* yyyy mmdd epoch */ + for(i = 0; i < 3 && p->f[p->nf-1]->d.ndents > 0; i++){ + addelem(&p, pickchild(p->f[p->nf-1], First)); + mbput(p->f[p->nf-1]); + } + for(i = 1; i < p->nf; i++) + if(p->f[i]->d.ndents > 1) + break; + + if(i == p->nf){ /* last snap; nothing to reclaim */ + putpath(p); + p = nil; + } + + noerror(); + return p; +} + +/* + * One process per file system, so consume all the memory + * for the cache. + * To open more file systems, use more processes! + */ +void +fsopen(char *dev, int worm, int canwr) +{ + Memblk *arch, *last, *c; + int uid; + + if(catcherror()) + fatal("fsopen: error: %r"); + + fsinit(dev, 0); + readsuper(); + fs->worm = worm; + fs->mode = canwr; + uid = usrid("sys"); + xqlock(&fs->fzlk); + fs->root = dfcreate(nil, "", uid, DMDIR|0555); + arch = dbget(DBfile, fs->super->d.root); + fs->archive = arch; + rwlock(fs->root, Wr); + rwlock(arch, Wr); + dfchdentry(fs->root, 0, arch->addr); + rwunlock(arch, Wr); + rwunlock(fs->root, Wr); + + last = pickchild(arch, Last); /* yyyy */ + if(last != nil){ + c = pickchild(last, Last); /* mmdd */ + mbput(last); + last = c; + } + if(last != nil){ + c = pickchild(last, Last); /* epoch */ + mbput(last); + last = c; + } + rwlock(fs->root, Wr); + if(last != nil){ + rwlock(last, Rd); + fs->active = dbdup(last); + rwunlock(last, Rd); + mbput(last->mf->melted); /* could keep it, but no need */ + last->mf->melted = nil; + wname(fs->active, "active"); + fs->active->d.id = fstime(nsec()); + rwlock(fs->active, Wr); + dfchdentry(fs->root, 0, fs->active->addr); + rwunlock(fs->active, Wr); + mbput(last); + }else + fs->active = dfcreate(fs->root, "active", uid, DMDIR|0775); + + fs->cons = dfcreate(nil, "cons", uid, DMEXCL|0660); + fs->cons->d.gid = usrid("adm"); + fs->cons->mf->gid = "adm"; + changed(fs->cons); + fs->stats = dfcreate(nil, "stats", uid, 0664); + rwunlock(fs->root, Wr); + fs->consc = chancreate(sizeof(char*), 256); + xqunlock(&fs->fzlk); + + noerror(); + + /* + * Try to load the /active/users file, if any, + * but ignore errors. We already have a default table loaded + * and may operate using it. + */ + if(!catcherror()){ + c = dfwalk(fs->active, "users"); + rwlock(c, Wr); + if(catcherror()){ + rwunlock(c, Wr); + mbput(c); + error(nil); + } + rwusers(c); + noerror(); + rwunlock(c, Wr); + mbput(c); + noerror(); + fs->cons->d.uid = usrid(getuser()); + fs->cons->mf->uid = getuser(); + } + fs->wtime = fstime(nsec()); +} + +uvlong +fsmemfree(void) +{ + uvlong nfree; + + xqlock(fs); + nfree = fs->nablk - fs->nblk; + nfree += fs->nmfree; + xqunlock(fs); + return nfree; +} + +/* + * Check if we are low on memory and move some blocks out in that case. + * This does not acquire locks on blocks, so it's safe to call it while + * keeping some files/blocks locked. + */ +int +fslru(void) +{ + Memblk *b, *bprev; + vlong t0; + int x; + long target, tot, n, ign; + + x = setdebug(); + dZprint("fslru: low on memory %ulld free %d min\n", fsmemfree(), Mminfree); + tot = ign = 0; + do{ + target = Mmaxfree - fsmemfree(); + t0 = nsec(); + xqlock(&fs->clean); + nfsopcalls[Lru]++; + if(catcherror()){ + fsoptime[Lru] += t0 - nsec(); + xqunlock(&fs->clean); + warn("fslru: %r"); + break; + } + n = 0; + for(b = fs->clean.tl; b != nil && target > 0; b = bprev){ + bprev = b->lprev; + if(b->dirty) + fatal("fslru: dirty block on clean\n"); + switch(b->type){ + case DBfree: + /* can happen. but, does it? */ + fatal("fslru: DBfree on clean\n"); + case DBsuper: + case DBref: + fatal("fslru: type %d found on clean\n", b->type); + case DBfile: + if(b == fs->root || b == fs->active || b == fs->archive){ + ign++; + continue; + } + break; + } + if(b->ref > 1){ + ign++; + continue; + } + /* + * Blocks here have one ref because of the hash table, + * which means they are are not used. + * We release the hash ref to let them go. + * bprev can't move while we put b. + */ + dOprint("fslru: out: m%#p d%#010ullx\n", b, b->addr); + if(mbunhash(b, 1)){ + n++; + tot++; + target--; + } + } + noerror(); + fsoptime[Lru] += t0 - nsec(); + xqunlock(&fs->clean); + }while(n > 0 && target > 0); + if(tot == 0){ + warn("low on mem (0 out; %uld ignored)", ign); + tot = -1; + }else + dZprint("fslru: %uld out %uld ignored %ulld free %d min %d max\n", + tot, ign, fsmemfree(), Mminfree, Mmaxfree); + rlsedebug(x); + return tot; +} + +int +fsfull(void) +{ + if(fsdiskfree() > Dzerofree) + return 0; + + if(1){ + warn("file system full"); + fsdump(0, Mem); + fatal("aborting"); + } + return 1; +} + +int +fsreclaim(void) +{ + Memblk *victim, *arch; + long n, tot; + Path *p; + + xqlock(&fs->fzlk); + if(catcherror()){ + warn("reclaim: %r"); + xqunlock(&fs->fzlk); + return 0; + } + warn("%ulld free: reclaiming...", fsdiskfree()); + if(fs->fzsuper != nil){ + /* + * we did freeze/reclaim and are still writing, can't reclaim now. + */ + noerror(); + xqunlock(&fs->fzlk); + warn("write in progress. refusing to reclaim"); + return 0; + } + + tot = 0; + for(;;){ + + /* + * The logic regarding references for reclaim is similar + * to that described in fsfreeze(). + * Read that comment before this code. + */ + dprint("fsreclaim: reclaiming\n"); + p = pickvictim(); + if(p == nil){ + dprint("nothing to reclaim\n"); + break; + } + if(catcherror()){ + putpath(p); + error(nil); + } + assert(p->nf > 2); + victim = p->f[p->nf-1]; + warn("reclaiming '%s'", victim->mf->name); + dprint("%H\n", victim); + arch = fs->archive; + dbincref(arch->addr); /* see comment in fsfreeze() */ + meltedpath(&p, p->nf-1, 0); + updateroot(p->f[1]); + if(catcherror()){ + rwunlock(p->f[p->nf-2], Wr); + error(nil); + } + dfchdentry(p->f[p->nf-2], victim->addr, 0); + noerror(); + rwunlock(p->f[p->nf-2], Wr); + n = dbput(victim); + dbput(arch); + mbput(arch); + noerror(); + putpath(p); + dffreeze(fs->archive); + dprint("fsreclaim: %uld file%s reclaimed\n", n, n?"s":""); + tot += n; + + if(fsdiskfree() > Dmaxfree){ + dprint("fsreclaim: %d free: done\n", Dmaxfree); + break; + } + } + if(tot == 0){ + warn("low on disk: 0 files reclaimed %ulld blocks free", + fsdiskfree()); + tot = -1; + }else + warn("%uld file%s reclaimed %ulld blocks free", + tot, tot?"s":"", fsdiskfree()); + noerror(); + xqunlock(&fs->fzlk); + return tot; +} + +static int +fsdirtypcent(void) +{ + long n, ndirty; + + n = fs->clean.n; + ndirty = fs->dirty.n; + + return (ndirty*100)/(n + ndirty); +} + +/* + * Policy for memory and and disk block reclaiming. + * Called from the sync proc from time to time and also AFTER each RPC. + */ +void +fspolicy(int when) +{ + int lomem, lodisk, hidirty, longago; + + switch(when){ + case Pre: + if(fsmemfree() > Mzerofree && fsdiskfree() > Dzerofree) + return; + qlock(&fs->policy); + break; + case Post: + if(!canqlock(&fs->policy)) + return; + break; + } + if(catcherror()){ + qunlock(&fs->policy); + warn("fspolicy: %r"); + return; + } + + lomem = fsmemfree() < Mminfree; + lodisk = fsdiskfree() < Dminfree; + hidirty = fsdirtypcent() > Mmaxdirtypcent; + longago = (fstime(nsec()) - fs->wtime)/NSPERSEC > Syncival; + + /* Ideal sequence for [lomem lodisk hidirty] might be: + * 111: lru sync reclaim+sync lru + * 110: lru reclaim+sync + * 101: lru sync lru + * 100: lru + * 011: reclaim+sync + * 010: reclaim+sync + * 001: sync + * 000: - + * Plus: if we are still low on memory, after lru, try + * doing a sync to move blocks to the clean list, ie. fake "hidirty". + */ + + if(lomem || lodisk || hidirty || longago) + dZprint("fspolicy: lomem=%d (%ulld) lodisk=%d (%ulld)" + " hidirty=%d (%d%%) longago=%d\n", + lomem, fsmemfree(), lodisk, fsdiskfree(), + hidirty, fsdirtypcent(), longago); + if(lomem){ + fslru(); + lomem = fsmemfree() < Mminfree; + if(lomem) + hidirty++; + } + if(lodisk) + fsreclaim(); + + if(lodisk || hidirty || (longago && fs->dirty.n != 0)) + fssync(); + if(lomem && hidirty) + fslru(); + + noerror(); + qunlock(&fs->policy); +} + +uvlong +fstime(uvlong t) +{ + static Lock lk; + static uvlong last; + + lock(&lk); + if(t) + fs->atime = t; + t = fs->atime; + if(t == last) + fs->atime = ++t; + last = t; + unlock(&lk); + return t; +} --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,30 @@ +Rc !dd -if /dev/zero -of disk -bs 1024 -count 1024 +mk all +slay 8.fscmd 8.fsfmt 8.9pix 9pix fcheck|rc ; broke|rc +8.fsfmt -vy >[2=1] +cd /sys/src/cmd/creepy; 8.9pix -DZ9 disk >[2=1] +8.fscmd -cDKNMOZ ldumpall sync >[2=1] +8.fscmd -cZ ldump sync sync rcl sync ldump>[2=1] +tstack 9pix +;; ;; ;; iprev=0; for(i in `{seq 10}){ echo $i... ; cp /bin/ls /n/9pix/active/ls^$i ; echo sync >/n/9pix/cons; rm -f /n/9pix/active/ls^$iprev ; iprev=$i ; echo check>/n/9pix/cons} +fcheck w 30 put!/bin/echo!/active/ls2 sync >[2=1] +cp disk1 disk +echo halt >/n/9pix/cons +creepy/9pix -Z /dev/sdC0/creepy +mount -c /srv/9pix /n/9pix +echo allow nemo >/n/9pix/cons +dircp -v /n/nix /n/9pix/active + +This is after a copying /n/nix to the active tree. + +creepy> stats +mblks: 194890 nblk 194895 nablk 194139 mused 751 mfree +lists: 190929 clean 2574 dirty 632 refs 194135 total +dblks: 4806423 nblk 3975788 nfree (0 list + 3975788 rem) +paths: 6 alloc 3 free ( 24 bytes) +mfs: 57257 alloc 0 free ( 68 bytes) +fids: 6 alloc 3 free ( 96 bytes) +rpcs: 4 alloc 2 free (8804 bytes) +clis: 1 alloc 0 free (8284 bytes) +srpcs: 0 alloc 0 free ( 740 bytes) + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,652 @@ +#include "all.h" + +/* + * ix server for creepy + */ + +enum +{ + Tfirst = 0x8000U, + Tlast = 0x4000U, + Tmask = 0x3FFFU +}; + +static void rversion(Rpc*), rfid(Rpc*), rclone(Rpc*), + rattach(Rpc*), rwalk(Rpc*), + ropen(Rpc*), rcreate(Rpc*), + rread(Rpc*), rwrite(Rpc*), rclunk(Rpc*), + rremove(Rpc*), rattr(Rpc*), rwattr(Rpc*), + rcond(Rpc*), rmove(Rpc*); + +static int reply(Rpc*); + +static void (*ixcalls[])(Rpc*) = +{ + [IXTversion] rversion, + [IXTattach] rattach, + [IXTfid] rfid, + [IXTclone] rclone, + [IXTwalk] rwalk, + [IXTopen] ropen, + [IXTcreate] rcreate, + [IXTread] rread, + [IXTwrite] rwrite, + [IXTclunk] rclunk, + [IXTremove] rremove, + [IXTattr] rattr, + [IXTwattr] rwattr, + [IXTcond] rcond, + [IXTmove] rmove, +}; + +/* + * we consider T/R msgs that include uids, errors, and attributes + * as short. That places a limit on things like user names, element + * names, and error messages. The limit is lower than Minmdata. + * Declaring them here as large requests will remove those limits. + * For large messages, the request buffer is used instead of the + * per-client write buffer, and data is copied by the rpc code, + * to save an extra copy. + */ +static int largeix[IXTmax] = +{ + [IXTread] 1, /* uses its buf for the reply */ + [IXTwrite] 1, +}; + +Alloc srpcalloc = +{ + .elsz = sizeof(Shortrpc), + .zeroing = 0, +}; + +static int ixrreadhdrsz, ixrattrhdrsz; + +char* +ixstats(char *s, char *e, int clr, int verb) +{ + int i; + + s = seprint(s, e, "srpcs:\t%4uld alloc %4uld free (%4uld bytes)\n", + srpcalloc.nalloc, srpcalloc.nfree, srpcalloc.elsz); + if(verb == 0) + return s; + for(i = 0; i < nelem(ixcalls); i++) + if(ixcalls[i] != nil && ncalls[i] > 0){ + s = seprint(s, e, "%-8s\t%5uld calls\t%11ulld µs\n", + callname[i], ncalls[i], + (calltime[i]/ncalls[i])/1000); + if(clr){ + ncalls[i] = 0; + calltime[i] = 0; + } + } + return s; +} + + +static Rpc* +newsrpc(void) +{ + Rpc *rpc; + + rpc = anew(&srpcalloc); + rpc->next = nil; + rpc->cli = nil; + rpc->fid = nil; + rpc->flushed = 0; + rpc->closed = 0; + rpc->chan = ~0; + rpc->rpc0 = nil; + memset(&rpc->xt, 0, sizeof rpc->xt); + memset(&rpc->xr, 0, sizeof rpc->xr); + return rpc; +} + +static void +freesrpc(Rpc *rpc) +{ + afree(&srpcalloc, rpc); +} + +static void +freeixrpc(Rpc *rpc) +{ + rpc->closed = 0; + rpc->flushed = 0; + if(largeix[rpc->xt.type]) + freerpc(rpc); + else + freesrpc(rpc); +} + +static void +rversion(Rpc *rpc) +{ + rpc->xr.msize = rpc->xt.msize; + if(rpc->xr.msize > Maxmdata) + rpc->xr.msize = Maxmdata; + rpc->cli->msize = rpc->xr.msize; + if(strncmp(rpc->xt.version, "IX", 2) != 0) + error("unknown protocol version"); + rpc->xr.version = "IX"; +} + +static void +rattach(Rpc *rpc) +{ + putfid(rpc->fid); + rpc->rpc0->fid = newfid(rpc->cli, -1); + fidattach(rpc->rpc0->fid, rpc->xt.aname, rpc->xt.uname); +} + +static void +rfid(Rpc *rpc) +{ + + putfid(rpc->rpc0->fid); + rpc->rpc0->fid = getfid(rpc->cli, rpc->xt.fid); +} + +static void +rclone(Rpc *rpc) +{ + Fid *nfid; + + if(rpc->rpc0->fid == nil) + error("fid not set"); + nfid = fidclone(rpc->cli, rpc->rpc0->fid, -1); + putfid(rpc->rpc0->fid); + rpc->rpc0->fid = nfid; + nfid->cflags = rpc->xt.cflags; +} + +static void +rwalk(Rpc *rpc) +{ + if(rpc->rpc0->fid == nil) + error("fid not set"); + fidwalk(rpc->rpc0->fid, rpc->xt.wname); +} + +static void +ropen(Rpc *rpc) +{ + int cflags; + + if(rpc->rpc0->fid == nil) + error("fid not set"); + cflags = rpc->xt.mode&(OCERR|OCEND); + fidopen(rpc->rpc0->fid, rpc->xt.mode &~cflags); + rpc->rpc0->fid->cflags = cflags; +} + +static void +rcreate(Rpc *rpc) +{ + int cflags; + + if(rpc->rpc0->fid == nil) + error("fid not set"); + cflags = rpc->xt.mode&(OCERR|OCEND); + fidcreate(rpc->rpc0->fid, rpc->xt.name, rpc->xt.mode, rpc->xt.perm); + rpc->rpc0->fid->cflags = cflags; +} + +/* + * BUG: this cannot reply with user ids, it must include + * strings instead. + */ +static ulong +pixd(Memblk *f, uchar *buf, int nbuf) +{ + ulong n; + + if(nbuf < BIT32SZ) + return 0; + if(catcherror()) + return 0; + n = pmeta(buf+BIT32SZ, nbuf-BIT32SZ, f); + noerror(); + PBIT32(buf, n); + return n+BIT32SZ; +} + +static void +rread(Rpc *rpc) +{ + vlong off; + Fid *fid; + int nmsg; + + fid = rpc->rpc0->fid; + if(fid == nil) + error("fid not set"); + if(rpc->xt.count > rpc->cli->msize-ixrreadhdrsz) + rpc->xt.count = rpc->cli->msize-ixrreadhdrsz; + rpc->xr.data = rpc->data + ixrreadhdrsz; + + /* + * send all but the last reply, if we are given permissiong to + * send multiple replies back. + * Errors, eof, and flush terminate the sequence. + * As usual, the caller sends the last reply when we return. + */ + off = rpc->xt.offset; + nmsg = rpc->xt.nmsg; + for(;;){ + rpc->xr.count = fidread(fid, rpc->xr.data, rpc->xt.count, off, pixd); + if(rpc->xr.count == 0) + break; + if(nmsg-- <= 0) + break; + if(reply(rpc) < 0) + break; + if(rpc != rpc->rpc0) + freeixrpc(rpc); + off += rpc->xr.count; + } +} + +static void +rwrite(Rpc *rpc) +{ + Fid *fid; + + fid = rpc->rpc0->fid; + if(fid == nil) + error("fid not set"); + rpc->xr.offset = rpc->xt.offset; + rpc->xr.count = fidwrite(fid, rpc->xt.data, rpc->xt.count, &rpc->xr.offset); +} + +static void +rclunk(Rpc *rpc) +{ + Fid *fid; + + fid = rpc->rpc0->fid; + if(fid == nil) + error("fid not set"); + if(fid->omode != -1) + fidclose(fid); + fid->cflags = 0; + putfid(fid); + putfid(fid); + rpc->rpc0->fid = nil; +} + +static void +rremove(Rpc *rpc) +{ + Fid *fid; + + fid = rpc->rpc0->fid; + if(fid == nil) + error("fid not set"); + fidremove(fid); + fid->cflags = 0; + putfid(fid); + putfid(fid); + rpc->rpc0->fid = nil; +} + +static void +rattr(Rpc *rpc) +{ + Fid *fid; + Path *p; + Memblk *f; + + fid = rpc->rpc0->fid; + if(fid == nil) + error("fid not set"); + p = lastpath(&fid->p, fid->p->nf); + f = p->f[p->nf-1]; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + rpc->xr.value = (char*)rpc->data + ixrattrhdrsz; + dfrattr(f, rpc->xt.attr, rpc->xr.value, Minmdata - ixrattrhdrsz); + rwunlock(f, Rd); + noerror(); +} + +static void +rwattr(Rpc *rpc) +{ + Fid *fid; + Path *p; + Memblk *f; + + /* + * BUG: add checks like done in wstat(). + * this code is incomplete. + */ + fid = rpc->rpc0->fid; + if(fid == nil) + error("fid not set"); + p = fid->p; + f = p->f[p->nf-1]; + if(fs->mode == Rd) + error("read only file system"); + if(writedenied(fid->uid)) + error("user can't write"); + if(isro(f) || fid->archived) + error("can't wattr archived or built-in files"); + p = meltedpath(&fid->p, fid->p->nf, 1); + f = p->f[p->nf-1]; + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + dfwattr(f, rpc->xt.attr, rpc->xt.value); + noerror(); + rwunlock(f, Wr); +} + +static void +rcond(Rpc *rpc) +{ + Fid *fid; + Path *p; + Memblk *f; + + fid = rpc->rpc0->fid; + if(fid == nil) + error("fid not set"); + p = fid->p; + f = p->f[p->nf-1]; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + dfcattr(f, rpc->xt.op, rpc->xt.attr, rpc->xt.value); + noerror(); + rwunlock(f, Rd); +} + +static void +rmove(Rpc *rpc) +{ + if(rpc->rpc0->fid == nil) + error("fid not set"); + error("move not yet implemented"); +} + +/* + * Read a short or large rpc and return it. + * Shouldn't we use bio, or at least a buffer? + */ +static Rpc* +readix(int fd) +{ + uchar hdr[BIT16SZ+BIT16SZ+BIT8SZ]; + long nhdr, nr; + ulong sz; + uint type; + Rpc *rpc; + + nhdr = readn(fd, hdr, sizeof hdr); + if(nhdr < 0){ + dXprint("readix: %r\n"); + return nil; + } + if(nhdr == 0){ + werrstr("eof"); + return nil; + } + sz = GBIT16(hdr); + if(sz > IOHDRSZ+Maxmdata){ + /* don't read it; the entire stream will fail */ + werrstr("msg too large"); + return nil; + } + if(sz < BIT16SZ+BIT8SZ){ + /* don't read it; the entire stream will fail */ + werrstr("msg too small"); + return nil; + } + type = GBIT8(hdr+BIT16SZ+BIT16SZ); + if(type >= IXTmax){ + werrstr("wrong message type"); + rpc = newrpc(); + }else if(largeix[type]) + rpc = newrpc(); + else + rpc = newsrpc(); + rpc->chan = GBIT16(hdr+BIT16SZ); + rpc->xt.type = type; + PBIT8(rpc->data, type); + nr = readn(fd, rpc->data+BIT8SZ, sz-(BIT16SZ+BIT8SZ)); + if(nr < 0){ + freeixrpc(rpc); + return nil; + } + if(nr != sz){ + werrstr("short msg data"); + freeixrpc(rpc); + return nil; + } + if(fs->profile) + rpc->t0 = nsec(); + if(ixunpack(rpc->data, sz-BIT16SZ, &rpc->xt) != sz-BIT16SZ){ + freeixrpc(rpc); + return nil; + } + return rpc; +} + +static int +reply(Rpc *rpc) +{ + ulong sz, max; + uchar *p, *buf; + Cli *cli; + u16int chan; + + cli = rpc->cli; + chan = rpc->chan&Tmask; + if(rpc->xr.type == IXRerror || (rpc->chan&Tlast) != 0) + chan |= Tlast; + xqlock(&cli->wlk); + if(largeix[rpc->xt.type]) + buf = rpc->data; + else + buf = cli->wdata; + max = IOHDRSZ+Maxmdata; + p = buf; + p += BIT16SZ; + PBIT16(p, chan); + p += BIT16SZ; + sz = ixpack(&rpc->xr, p, max-BIT16SZ-BIT16SZ); + if(sz == 0) + fatal("writeix: message too large or ixpack failed"); + PBIT16(buf, sz); + p += sz; + + if(rpc->rpc0->flushed){ + xqunlock(&cli->wlk); + werrstr("flushed"); + dXprint("write: flushed"); + return -1; + } + if(chan&Tlast){ + putfid(rpc->rpc0->fid); /* release rpc fid before replying */ + rpc->rpc0->fid = nil; /* or we might get "fid in use" errors */ + } + dXprint("-> %G\n", &rpc->xr); + if(write(cli->fd, buf, p-buf) != p-buf){ + xqunlock(&cli->wlk); + dXprint("write: %r"); + return -1; + } + if(fs->profile) + calltime[rpc->xt.type] += nsec() - rpc->t0; + ncalls[rpc->xt.type]++; + xqunlock(&cli->wlk); + return p-buf; +} + +static char* +rpcworkerix(void *v, void**aux) +{ + Rpc *rpc, *rpc0; + Cli *cli; + Channel *c; + char err[128]; + long nw; + int nerr; + Memblk *fahead; + + c = v; + if(*aux == nil){ + errinit(Errstack); + *aux = v; /* make it not nil */ + } + + err[0] = 0; + rpc = recvp(c); + rpc0 = rpc; + cli = rpc->cli; + threadsetname("rpcworkerix %s chan %d", cli->addr, rpc0->chan); + dPprint("%s started\n", threadgetname()); + + do{ + fspolicy(Pre); + + nerr = errstacksize(); + rpc->xr.type = rpc->xt.type + 1; + rpc->rpc0 = rpc0; + quiescent(No); + if(catcherror()){ + quiescent(Yes); + rpc->xr.type = Rerror; + rpc->xr.ename = err; + rerrstr(err, sizeof err); + if(rpc0->fid != nil && (rpc0->fid->cflags&OCERR) != 0) + rpc0->fid->cflags |= OCEND; + }else{ + ixcalls[rpc->xt.type](rpc); + quiescent(Yes); + noerror(); + } + + fahead = nil; + if(rpc0->fid != nil && rpc0->fid->p != nil) + if(rpc->xr.type == IXRread || rpc->xr.type == IXRwalk){ + fahead = rpc0->fid->p->f[rpc0->fid->p->nf - 1]; + incref(fahead); + } + if(catcherror()){ + mbput(fahead); + error(nil); + } + + nw = reply(rpc); + + if(fahead != nil){ + if(rpc->xr.type == IXRread && rpc->xt.nmsg <= 1) + rahead(fahead, rpc->xt.offset + rpc->xr.count); + mbput(fahead); + } + noerror(); + + if(rpc != rpc0) + freeixrpc(rpc); + if(errstacksize() != nerr) + fatal("%s: unbalanced error stack", threadgetname()); + }while(!rpc0->closed && nw > 0 && err[0] == 0 && (rpc = recvp(c)) != nil); + + while((rpc = nbrecvp(c)) != nil) + freeixrpc(rpc); + replied(rpc0); + freeixrpc(rpc0); + + fspolicy(Post); + + dPprint("%s exiting\n", threadgetname()); + threadsetname("rpcworkerix"); + return nil; +} + +static void +ixinit(void) +{ + IXcall xt; + if(ixrreadhdrsz != 0) + return; + xt.type = IXRread; + ixrreadhdrsz = ixpackedsize(&xt) + BIT16SZ + BIT16SZ; + xt.type = IXRattr; + xt.value = ""; + ixrattrhdrsz = ixpackedsize(&xt) + BIT16SZ + BIT16SZ; +} + +static char* +cliworkerix(void *v, void**aux) +{ + Cli *cli; + Rpc *rpc, *r; + + cli = v; + threadsetname("cliworkerix %s", cli->addr); + dPprint("%s started\n", threadgetname()); + + ixinit(); + for(;;){ + if(dbg['E']) + dumpfids(); + loop: rpc = readix(cli->fd); + if(rpc == nil){ + dXprint("%s: read: %r\n", cli->addr); + break; + } + rpc->cli = cli; + incref(cli); + + xqlock(&cli->rpclk); + for(r = cli->rpcs; r != nil; r = r->next) + if((r->chan&Tmask) == (rpc->chan&Tmask)){ + if(rpc->chan&Tlast) + if(r->closed) + r->flushed = 1; + else + r->closed = 1; + sendp(r->c, rpc); + xqunlock(&cli->rpclk); + goto loop; + } + if((rpc->chan&Tfirst) == 0){ /* it's channel is gone */ + freeixrpc(rpc); + xqunlock(&cli->rpclk); + goto loop; + } + + /* new channel */ + rpc->next = cli->rpcs; + cli->rpcs = rpc; + if(rpc->c == nil) + rpc->c = chancreate(sizeof(Rpc*), 64); + cli->nrpcs++; + xqunlock(&cli->rpclk); + + if(rpc->chan&Tlast) + rpc->closed = 1; + sendp(rpc->c, rpc); + if(Rpcspercli != 0 && cli->nrpcs >= Rpcspercli) + rpcworkerix(rpc->c, aux); + else + getworker(rpcworkerix, rpc->c, nil); + if(fs->halt){ + fprint(2, "%s: halted\n", argv0); + threadexitsall(nil); + } + } + putcli(cli); + dPprint("%s exiting\n", threadgetname()); + return nil; +} --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,174 @@ +/* + * The protocol packs requests within transport channels. + * + * The format in the wire is len[2] tag[2] msg[] + * where len is the number of bytes of the message and + * tag is a channel number. + * + * there two special bits in tags: Tfirst, and Tlast. + * The first message in a channel must set Tfirst + * the last one must set Tlast. + * Tfirst = 0x8000, Tlast = 0x4000 + * + * A channel is duplex and is closed when both directions have + * exchanged messages with Tlast set. + * An error reply always has Tlast set. + * + * Authentication is fully left out of the protocol. + * The underlying transport must be secured, in a way that + * provides trust between both parties. + * + * This is the set of individual requests or messages: + * Here, data[] means what's left of the message (i.e., count is implied). + * + * IXTversion msize[4] version[s] + * IXRversion msize[4] version[s] + * IXTattach uname[s] aname[s] + * IXRattach fid[4] // fid becomes the current fid + * IXTfid fid[4] + * IXRfid + * IXRerror ename[s] + * IXTclone cflags[1] + * IXRclone fid[4] // fid becomes the current fid + * IXTwalk wname[s] + * IXRwalk + * IXTopen mode[1] // mode includes cflags as well + * IXRopen + * IXTcreate name[s] perm[4] mode[1] // mode includes cflags as well + * IXRcreate + * IXTread nmsg[2] offset[8] count[4] + * IXRread data[] + * IXTwrite offset[8] endoffset[8] data[] + * IXRwrite offset[8] count[4] + * IXTclunk + * IXRclunk + * IXTremove + * IXRremove + * IXTattr attr[s] //"*" means: return attr name list + * IXRattr value[s] + * IXTwattr attr[s] value[s] + * IXRwattr + * IXTcond op[1] attr[s] value[s] + * IXRcond + * IXTmove dirfid[4] newname[s] + * IXRmove + * + * There is no flush. Flushing is done by flushing the channel. + */ + +enum +{ + IXTversion = 50, + IXRversion, + IXTattach, + IXRattach, + IXTfid, + IXRfid, + __IXunused__, + IXRerror, + IXTclone, + IXRclone, + IXTwalk, + IXRwalk, + IXTopen, + IXRopen, + IXTcreate, + IXRcreate, + IXTread, + IXRread, + IXTwrite, + IXRwrite, + IXTclunk, + IXRclunk, + IXTremove, + IXRremove, + IXTattr, + IXRattr, + IXTwattr, + IXRwattr, + IXTcond, + IXRcond, + IXTmove, + IXRmove, + IXTmax, + + /* + * flags used in Tclone, Topen, Tcreate + */ + OCEND = 0x4, /* clunk on end of rpc */ + OCERR = 0x8, /* clunk on error */ + + CEQ = 0, /* Tcond.op */ + CGE, + CGT, + CLE, + CLT, + CNE, + CMAX, +}; + + +typedef struct IXcall IXcall; + +/* + * len[2] tag[2] prepended by the transport. + * This is an individual call request.T + * Fids are selected by the server and handed to the client. + */ +struct IXcall +{ + uchar type; + union{ + struct{ /* Tversion, Rversion */ + u32int msize; + char *version; + }; + struct{ + u32int fid; /* Tfid, Rattach, Rclone */ + }; + struct{ + char *uname; /* Tattach */ + char *aname; /* Tattach */ + }; + struct{ + char *ename; /* Rerror */ + }; + struct{ + uchar cflags; /* Tclone (OCEND|OCERR) */ + }; + struct{ + uchar mode; /* Topen, Tcreate */ + u32int perm; /* Tcreate */ + char *name; /* Tcreate */ + }; + struct{ /* Twalk */ + char *wname; + }; + struct{ + u16int nmsg; /* Tread */ + uvlong offset; /* Tread, Twrite, Rwrite */ + uvlong endoffset; /* Twrite */ + u32int count; /* Tread, Rread, Twrite, Rwrite */ + uchar *data; /* Twrite, Rread */ + }; + struct{ + uchar op; /* Tcond */ + char *attr; /* Tattr, Twattr, Tcond */ + char *value; /* Rattr, Twattr, Tcond */ + }; + struct{ /* Tmove */ + u32int dirfid; + char *newname; + }; + /* With struct{}: + * Rfid, + * Rwalk, Ropen, Rcreate, Tclunk, Rclunk, Tclose, Rclose, + * Tremove, Rremove, Rwattr, Rcond, Rmove + */ + }; +}; + +/* 8c bug: varargck does not like IX */ +typedef IXcall COMPILERBUGpcall; +#pragma varargck type "G" COMPILERBUGpcall* + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,784 @@ +#include "all.h" + +static char* cname[CMAX] = +{ + [CEQ] "==", + [CGE] ">=", + [CGT] "> ", + [CLT] "< ", + [CLE] "<=", + [CNE] "!=", +}; + +vlong calltime[Tmax]; +ulong ncalls[Tmax]; + +char* callname[] = +{ + /* ix requests */ + [IXTversion] "Tversion", + [IXRversion] "Rversion", + [IXTattach] "Tattach", + [IXRattach] "Rattach", + [IXTfid] "Tfid", + [IXRfid] "Rfid", + [__IXunused__] "__IXunused__", + [IXRerror] "Rerror", + [IXTclone] "Tclone", + [IXRclone] "Rclone", + [IXTwalk] "Twalk", + [IXRwalk] "Rwalk", + [IXTopen] "Topen", + [IXRopen] "Ropen", + [IXTcreate] "Tcreate", + [IXRcreate] "Rcreate", + [IXTread] "Tread", + [IXRread] "Rread", + [IXTwrite] "Twrite", + [IXRwrite] "Rwrite", + [IXTclunk] "Tclunk", + [IXRclunk] "Rclunk", + [IXTremove] "Tremove", + [IXRremove] "Rremove", + [IXTattr] "Tattr", + [IXRattr] "Rattr", + [IXTwattr] "Twattr", + [IXRwattr] "Rwattr", + [IXTcond] "Tcond", + [IXRcond] "Rcond", + [IXTmove] "Tmove", + [IXRmove] "Rmove", + + /* 9p requests */ + [Tversion] "Tversion", + [Rversion] "Rversion", + [Tauth] "Tauth", + [Rauth] "Rauth", + [Tattach] "Tattach", + [Rattach] "Rattach", + [Terror] "Terror", + [Rerror] "Rerror", + [Tflush] "Tflush", + [Rflush] "Rflush", + [Twalk] "Twalk", + [Rwalk] "Rwalk", + [Topen] "Topen", + [Ropen] "Ropen", + [Tcreate] "Tcreate", + [Rcreate] "Rcreate", + [Tread] "Tread", + [Rread] "Rread", + [Twrite] "Twrite", + [Rwrite] "Rwrite", + [Tclunk] "Tclunk", + [Rclunk] "Rclunk", + [Tremove] "Tremove", + [Rremove] "Rremove", + [Tstat] "Tstat", + [Rstat] "Rstat", + [Twstat] "Twstat", + [Rwstat] "Rwstat", +}; + +static uchar* +pstring(uchar *p, char *s) +{ + uint n; + + if(s == nil){ + PBIT16(p, 0); + p += BIT16SZ; + return p; + } + + n = strlen(s); + /* + * We are moving the string before the length, + * so you can S2M a struct into an existing message + */ + memmove(p + BIT16SZ, s, n); + PBIT16(p, n); + p += n + BIT16SZ; + return p; +} + +static uint +stringsz(char *s) +{ + if(s == nil) + return BIT16SZ; + + return BIT16SZ+strlen(s); +} + +/* + * Does NOT count the data bytes added past the packed + * message for IXRread, IXTwrite. This is so to save copying. + * The caller is expected to copy the final data in-place and + * adjust the total message length. + */ +uint +ixpackedsize(IXcall *f) +{ + uint n; + + n = BIT8SZ; /* type */ + + switch(f->type){ + case IXTversion: + case IXRversion: + n += BIT32SZ; + n += stringsz(f->version); + break; + + case IXTattach: + n += stringsz(f->uname); + n += stringsz(f->aname); + break; + case IXRattach: + n += BIT32SZ; + break; + + case IXTfid: + n += BIT32SZ; + break; + case IXRfid: + break; + + case IXRerror: + n += stringsz(f->ename); + break; + + case IXTclone: + n += BIT8SZ; + break; + case IXRclone: + n += BIT32SZ; + break; + + case IXTwalk: + n += stringsz(f->wname); + break; + case IXRwalk: + break; + + case IXTopen: + n += BIT8SZ; + break; + case IXRopen: + break; + + case IXTcreate: + n += stringsz(f->name); + n += BIT32SZ; + n += BIT8SZ; + break; + case IXRcreate: + break; + + case IXTread: + n += BIT16SZ; + n += BIT64SZ; + n += BIT32SZ; + break; + case IXRread: + /* data follows; not counted */ + break; + + case IXTwrite: + n += BIT64SZ; + n += BIT64SZ; + /* data follows; not counted */ + break; + case IXRwrite: + n += BIT64SZ; + n += BIT32SZ; + break; + + case IXTclunk: + case IXRclunk: + case IXTremove: + case IXRremove: + break; + + case IXTattr: + n += stringsz(f->attr); + break; + case IXRattr: + n += stringsz(f->value); + break; + + case IXTwattr: + n += stringsz(f->attr); + n += stringsz(f->value); + break; + case IXRwattr: + break; + + case IXTcond: + n += BIT8SZ; + n += stringsz(f->attr); + n += stringsz(f->value); + break; + case IXRcond: + break; + + case IXTmove: + n += BIT32SZ; + n += stringsz(f->newname); + break; + case IXRmove: + break; + + default: + sysfatal("packedsize: unknown type %d", f->type); + + } + return n; +} + +uint +ixpack(IXcall *f, uchar *ap, uint nap) +{ + uchar *p; + uint size; + + size = ixpackedsize(f); + if(size == 0 || size > nap) + return 0; + + p = (uchar*)ap; + + PBIT8(p, f->type); + p += BIT8SZ; + + switch(f->type){ + case IXTversion: + case IXRversion: + PBIT32(p, f->msize); + p += BIT32SZ; + p = pstring(p, f->version); + break; + + case IXTattach: + p = pstring(p, f->uname); + p = pstring(p, f->aname); + break; + case IXRattach: + PBIT32(p, f->fid); + p += BIT32SZ; + break; + + case IXTfid: + PBIT32(p, f->fid); + p += BIT32SZ; + break; + case IXRfid: + break; + + case IXRerror: + p = pstring(p, f->ename); + break; + + case IXTclone: + PBIT8(p, f->cflags); + p += BIT8SZ; + break; + case IXRclone: + PBIT32(p, f->fid); + p += BIT32SZ; + break; + + case IXTwalk: + p = pstring(p, f->wname); + break; + case IXRwalk: + break; + + case IXTopen: + PBIT8(p, f->mode); + p += BIT8SZ; + break; + case IXRopen: + break; + + case IXTcreate: + p = pstring(p, f->name); + PBIT32(p, f->perm); + p += BIT32SZ; + PBIT8(p, f->mode); + p += BIT8SZ; + break; + case IXRcreate: + break; + + case IXTread: + PBIT16(p, f->nmsg); + p += BIT16SZ; + PBIT64(p, f->offset); + p += BIT64SZ; + PBIT32(p, f->count); + p += BIT32SZ; + break; + case IXRread: + /* data follows; not packed */ + break; + + case IXTwrite: + PBIT64(p, f->offset); + p += BIT64SZ; + PBIT64(p, f->endoffset); + p += BIT64SZ; + /* data follows; not packed */ + break; + case IXRwrite: + PBIT64(p, f->offset); + p += BIT64SZ; + PBIT32(p, f->count); + p += BIT32SZ; + break; + + case IXTclunk: + case IXRclunk: + case IXTremove: + case IXRremove: + break; + + case IXTattr: + p = pstring(p, f->attr); + break; + case IXRattr: + p = pstring(p, f->value); + break; + + case IXTwattr: + p = pstring(p, f->attr); + p = pstring(p, f->value); + break; + case IXRwattr: + break; + + case IXTcond: + if(f->op >= CMAX){ + werrstr("unknown cond op"); + return 0; + } + PBIT8(p, f->op); + p += BIT8SZ; + p = pstring(p, f->attr); + p = pstring(p, f->value); + break; + case IXRcond: + break; + + case IXTmove: + PBIT32(p, f->dirfid); + p += BIT32SZ; + p = pstring(p, f->newname); + break; + case IXRmove: + break; + + default: + sysfatal("pack: type %d", f->type); + + } + if(size != p-ap) + return 0; + return size; +} + +static uchar* +gstring(uchar *p, uchar *ep, char **s) +{ + uint n; + + if(p == nil || p+BIT16SZ > ep) + return nil; + n = GBIT16(p); + p += BIT16SZ - 1; + if(p+n+1 > ep) + return nil; + /* move it down, on top of count, to make room for '\0' */ + memmove(p, p + 1, n); + p[n] = '\0'; + *s = (char*)p; + p += n+1; + return p; +} + +uint +ixunpack(uchar *ap, uint nap, IXcall *f) +{ + uchar *p, *ep; + + p = ap; + ep = p + nap; + + if(p+BIT8SZ > ep){ + werrstr("msg too short"); + return 0; + } + + f->type = GBIT8(p); + p += BIT8SZ; + + switch(f->type){ + case IXTversion: + case IXRversion: + if(p+BIT32SZ > ep) + return 0; + f->msize = GBIT32(p); + p += BIT32SZ; + p = gstring(p, ep, &f->version); + break; + + case IXTattach: + p = gstring(p, ep, &f->uname); + if(p == nil) + return 0; + p = gstring(p, ep, &f->aname); + break; + case IXRattach: + if(p+BIT32SZ > ep) + return 0; + f->fid = GBIT32(p); + p += BIT32SZ; + break; + + case IXTfid: + if(p+BIT32SZ > ep) + return 0; + f->fid = GBIT32(p); + p += BIT32SZ; + break; + case IXRfid: + break; + + case IXRerror: + p = gstring(p, ep, &f->ename); + break; + + case IXTclone: + if(p+BIT8SZ > ep) + return 0; + f->cflags = GBIT8(p); + p += BIT8SZ; + break; + case IXRclone: + if(p+BIT32SZ > ep) + return 0; + f->fid = GBIT32(p); + p += BIT32SZ; + break; + + case IXTwalk: + p = gstring(p, ep, &f->wname); + break; + case IXRwalk: + break; + + case IXTopen: + if(p+BIT8SZ > ep) + return 0; + f->mode = GBIT8(p); + p += BIT8SZ; + break; + case IXRopen: + break; + + case IXTcreate: + p = gstring(p, ep, &f->name); + if(p == nil) + break; + if(p+BIT32SZ+BIT8SZ > ep) + return 0; + f->perm = GBIT32(p); + p += BIT32SZ; + f->mode = GBIT8(p); + p += BIT8SZ; + break; + case IXRcreate: + break; + + case IXTread: + if(p+BIT16SZ+BIT64SZ+BIT32SZ > ep) + return 0; + f->nmsg = GBIT16(p); + p += BIT16SZ; + f->offset = GBIT64(p); + p += BIT64SZ; + f->count = GBIT32(p); + p += BIT32SZ; + break; + case IXRread: + f->data = p; + f->count = ep - p; + break; + + case IXTwrite: + if(p+BIT64SZ > ep) + return 0; + f->offset = GBIT64(p); + p += BIT64SZ; + f->endoffset = GBIT64(p); + p += BIT64SZ; + f->data = p; + f->count = ep - p; + break; + case IXRwrite: + if(p+BIT32SZ+BIT64SZ > ep) + return 0; + f->offset = GBIT64(p); + p += BIT64SZ; + f->count = GBIT32(p); + p += BIT32SZ; + break; + + case IXTclunk: + case IXRclunk: + case IXTremove: + case IXRremove: + break; + + case IXTattr: + p = gstring(p, ep, &f->attr); + break; + case IXRattr: + p = gstring(p, ep, &f->value); + break; + + case IXTwattr: + p = gstring(p, ep, &f->attr); + p = gstring(p, ep, &f->value); + break; + case IXRwattr: + break; + + case IXTcond: + if(p+BIT8SZ > ep) + return 0; + f->op = GBIT8(p); + if(f->op >= CMAX){ + werrstr("unknown cond op"); + return 0; + } + p += BIT8SZ; + p = gstring(p, ep, &f->attr); + p = gstring(p, ep, &f->value); + break; + case IXRcond: + break; + + case IXTmove: + if(p+BIT32SZ > ep) + return 0; + f->dirfid = GBIT32(p); + p += BIT32SZ; + p = gstring(p, ep, &f->newname); + break; + case IXRmove: + break; + + default: + werrstr("unpack: unknown type %d", f->type); + return 0; + } + + if(p==nil || p>ep || p == ap){ + werrstr("unpack: p %#p ep %#p", p, ep); + return 0; + } + return p - ap; +} + +int +rpcfmt(Fmt *fmt) +{ + Rpc *rpc; + + rpc = va_arg(fmt->args, Rpc*); + if(rpc == nil) + return fmtprint(fmt, ""); + if(rpc->t.type == 0) + return fmtprint(fmt, "Tnull"); + if(rpc->t.type < nelem(callname) && callname[rpc->t.type]) + return fmtprint(fmt, "%s tag %ud", callname[rpc->t.type], rpc->t.tag); + return fmtprint(fmt, "type=%d??? tag %ud", rpc->t.type, rpc->t.tag); +} + + +/* + * dump out count (or DUMPL, if count is bigger) bytes from + * buf to ans, as a string if they are all printable, + * else as a series of hex bytes + */ +#define DUMPL 64 + +static uint +dumpsome(char *ans, char *e, void *b, long count) +{ + int i, printable; + char *p; + char *buf; + + buf = b; + if(buf == nil){ + seprint(ans, e, ""); + return strlen(ans); + } + printable = 1; + if(count > DUMPL) + count = DUMPL; + for(i=0; i127) + printable = 0; + p = ans; + *p++ = '\''; + if(printable){ + if(count > e-p-2) + count = e-p-2; + for(; count > 0; count--, p++, buf++) + if(*buf == '\n' || *buf == '\t') + *p = ' '; + else + *p = *buf; + }else{ + if(2*count > e-p-2) + count = (e-p-2)/2; + for(i=0; i0 && i%4==0) + *p++ = ' '; + sprint(p, "%2.2ux", (uchar)buf[i]); + p += 2; + } + } + *p++ = '\''; + *p = 0; + return p - ans; +} + +/* + * Uses a buffer so prints are not mixed with other debug prints. + */ +int +ixcallfmt(Fmt *fmt) +{ + IXcall *f; + int type; + char buf[512]; + char *e, *s; + + e = buf+sizeof(buf); + f = va_arg(fmt->args, IXcall*); + type = f->type; + if(type < IXTversion || type >= IXTmax) + return fmtprint(fmt, "", type); + s = seprint(buf, e, "%s", callname[type]); + switch(type){ + case IXTversion: + case IXRversion: + seprint(s, e, " msize %ud version '%s'", f->msize, f->version); + break; + break; + + case IXTattach: + seprint(s, e, " uname '%s' aname '%s'", f->uname, f->aname); + break; + case IXRattach: + seprint(s, e, " fid %d", f->fid); + break; + + case IXTfid: + seprint(s, e, " fid %ud", f->fid); + break; + case IXRfid: + break; + + case IXRerror: + seprint(s, e, " ename '%s'", f->ename); + break; + + case IXTclone: + seprint(s, e, " cflags %#x", f->cflags); + break; + case IXRclone: + seprint(s, e, " fid %d", f->fid); + break; + + case IXTwalk: + seprint(s, e, " '%s'", f->wname); + break; + case IXRwalk: + break; + + case IXTopen: + seprint(s, e, " mode %d", f->mode); + break; + case IXRopen: + break; + + case IXTcreate: + seprint(s, e, " name '%s' perm %M mode %d", + f->name, (ulong)f->perm, f->mode); + break; + case IXRcreate: + break; + + case IXTread: + seprint(s, e, " nmsg %d offset %lld count %ud", + f->nmsg, f->offset, f->count); + break; + case IXRread: + s = seprint(s, e, " count %ud ", f->count); + dumpsome(s, e, f->data, f->count); + break; + + case IXTwrite: + s = seprint(s, e, " offset %lld endoffset %lld count %ud ", + f->offset, f->endoffset, f->count); + dumpsome(s, e, f->data, f->count); + break; + case IXRwrite: + seprint(s, e, " offset %lld count %ud", f->offset, f->count); + break; + + case IXTclunk: + case IXRclunk: + case IXTremove: + case IXRremove: + break; + + case IXTattr: + seprint(s, e, " attr '%s'", f->attr); + break; + case IXRattr: + seprint(s, e, " value '%s'", f->value); + break; + + case IXTwattr: + seprint(s, e, " attr '%s' value '%s'", f->attr, f->value); + break; + case IXRwattr: + break; + + case IXTcond: + s = "??"; + if(f->op < CMAX) + s = cname[f->op]; + s = seprint(s, e, " op '%s'", s); + seprint(s, e, " attr '%s' value '%s'", f->attr, f->value); + break; + case IXRcond: + break; + + case IXTmove: + seprint(s, e, " dirfid %d newname '%s'", f->dirfid, f->newname); + break; + case IXRmove: + break; + + } + return fmtstrcpy(fmt, buf); +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,623 @@ +#include "all.h" + +/* + * memory blocks. + * see dk.h + */ + +/* + * For simplicity, functions in mblk.c do not raise errors. + * (debug dump functions may be an exception). + */ + +Alloc mfalloc = +{ + .elsz = sizeof(Mfile), + .zeroing = 1, +}; + +char* +tname(int t) +{ + static char*nms[] = { + [DBfree] "DBfree", + [DBsuper] "DBsuper", + [DBref] "DBref", + [DBdata] "DBdata", + [DBattr] "DBattr", + [DBfile] "DBfile", + [DBptr0] "DBptr0", + [DBptr0+1] "DBptr1", + [DBptr0+2] "DBptr2", + [DBptr0+3] "DBptr3", + [DBptr0+4] "DBptr4", + [DBptr0+5] "DBptr5", + [DBptr0+6] "DBptr6", + }; + + if(t < 0 || t >= nelem(nms)) + return "BADTYPE"; + return nms[t]; +} + +int fullfiledumps = 0; + +/* + * NO LOCKS. debug only + */ +static void +fmttab(Fmt *fmt, int t, int c) +{ + while(t-- > 0) + fmtprint(fmt, "%c ", c?'.':' '); +} +int mbtab; +static void +fmtptr(Fmt *fmt, int type, daddrt addr, char *tag, int n) +{ + Memblk *b; + + if(addr == 0) + return; + b = mbget(type, addr, Dontmk); + if(b == nil){ + fmttab(fmt, mbtab, 0); + fmtprint(fmt, "%s[%d] = d%#010ullx \n", tag, n, addr); + }else{ + fmtprint(fmt, "%H", b); + mbput(b); + } +} +static void +dumpdirdata(Fmt *fmt, Memblk *b) +{ + long doff; + daddrt *p; + int i; + + if(b->d.length == 0 || DBDIR(b) == 0) + return; + doff = embedattrsz(b); + if(doff < Embedsz){ + fmttab(fmt, mbtab, 0); + p = (daddrt*)(b->d.embed+doff); + for(i = 0; i < 5 && (uchar*)p < b->d.embed+Embedsz - Daddrsz; i++) + fmtprint(fmt, "%sd%#010ullx", i?" ":"data: ", EP(*p++)); + fmtprint(fmt, "\n"); + } +} + +int +mbfmt(Fmt *fmt) +{ + Memblk *b; + int i, n; + + b = va_arg(fmt->args, Memblk*); + if(b == nil) + return fmtprint(fmt, "\n"); + nodebug(); + fmttab(fmt, mbtab, b->type == DBfile); + + fmtprint(fmt, "%s", tname(b->type)); + if(b->type == DBfile && b->mf != nil) + fmtprint(fmt, " '%s'", b->mf->name); + if(b->frozen) + fmtprint(fmt, " FZ"); + if(b->dirty) + fmtprint(fmt, " DT"); + if(DBDIR(b)) + fmtprint(fmt, " DIR"); + fmtprint(fmt, " m%#p d%#010ullx", b, EP(b->addr)); + fmtprint(fmt, " r=%d", b->ref); + switch(b->type){ + case DBfree: + fmtprint(fmt, "\n"); + break; + case DBdata: + case DBattr: + fmtprint(fmt, " dr=%ulld\n", dbgetref(b->addr)); + break; + case DBref: + fmtprint(fmt, " next m%#p", b->lnext); + for(i = n = 0; i < Drefperblk; i++) + if(b->d.ref[i]){ + if(n++%3 == 0){ + fmtprint(fmt, "\n"); + fmttab(fmt, mbtab, 0); + } + fmtprint(fmt, " "); + fmtprint(fmt, "[%02d]d%#010ullx=%#ullx", + i, addrofref(b->addr, i), b->d.ref[i]); + } + if(n == 0 || --n%4 != 0) + fmtprint(fmt, "\n"); + break; + case DBfile: + fmtprint(fmt, " dr=%ulld", dbgetref(b->addr)); + if(b->mf == nil){ + fmtprint(fmt, " no mfile\n"); + break; + } + fmtprint(fmt, " nr%d nw%d\n", b->mf->readers, b->mf->writer); + if(0) + fmtprint(fmt, " asz %#ullx aptr %#ullx", + b->d.asize, b->d.aptr); + fmttab(fmt, mbtab, 0); + fmtprint(fmt, " %M '%s' len %ulld ndents %ulld melted m%#p\n", + (ulong)b->d.mode, usrname(b->d.uid), + b->d.length, b->d.ndents, b->mf->melted); + if(0){ + fmttab(fmt, mbtab, 0); + fmtprint(fmt, " id %#ullx mode %M mt %#ullx" + " '%s'\n", + EP(b->d.id), (ulong)b->d.mode, + EP(b->d.mtime), b->mf->uid); + } + mbtab++; + if(DBDIR(b)) + dumpdirdata(fmt, b); + for(i = 0; i < nelem(b->d.dptr); i++) + fmtptr(fmt, DBdata, b->d.dptr[i], "d", i); + for(i = 0; i < nelem(b->d.iptr); i++) + fmtptr(fmt, DBptr0+i, b->d.iptr[i], "i", i); + mbtab--; + break; + case DBsuper: + fmtprint(fmt, "\n"); + fmttab(fmt, mbtab, 0); + fmtprint(fmt, " free d%#010ullx eaddr d%#010ullx root d%#010ullx %s refs\n", + b->d.free, b->d.eaddr, b->d.root, + b->d.oddrefs?"odd":"even"); + break; + default: + if(b->type < DBptr0 || b->type >= DBptr0+Niptr){ + fmtprint(fmt, "", b->type); + break; + } + fmtprint(fmt, " dr=%ulld\n", dbgetref(b->addr)); + mbtab++; + if(fullfiledumps) + for(i = 0; i < Dptrperblk; i++) + fmtptr(fmt, b->type-1, b->d.ptr[i], "p", i); + mbtab--; + break; + } + debug(); + return 0; +} + +/* + * Blocks are kept in a hash while loaded, to look them up. + * When in the hash, they fall into exactly one of this cases: + * - a super block or a fake mem block (e.g., cons, /), unlinked + * - a ref block, linked in the fs->refs list + * - a clean block, linked in the fs clean list + * - a dirty block, linked in the fs dirty list. + * + * The policy function (eg fslru) keeps the lock on the list while + * releasing blocks from the hash. This implies locking in the wrong order. + * The "ispolicy" argument in some functions here indicates that the + * call is from the policy function. + */ + +void +ismelted(Memblk *b) +{ + if(b->frozen) + fatal("frozen at pc %#p", getcallerpc(&b)); +} + +void +munlink(List *l, Memblk *b, int ispolicy) +{ + if(!ispolicy) + xqlock(l); + if(b->lprev != nil) + b->lprev->lnext = b->lnext; + else + l->hd = b->lnext; + if(b->lnext != nil) + b->lnext->lprev = b->lprev; + else + l->tl = b->lprev; + b->lnext = nil; + b->lprev = nil; + l->n--; + if(!ispolicy) + xqunlock(l); + b->unlinkpc = getcallerpc(&l); +} + +static void +mlink(List *l, Memblk *b) +{ + assert(b->lnext == nil && b->lprev == nil); + xqlock(l); + b->lnext = l->hd; + if(l->hd != nil) + l->hd->lprev = b; + else + l->tl = b; + l->hd = b; + l->n++; + xqunlock(l); +} + +static void +mlinklast(List *l, Memblk *b) +{ + xqlock(l); + b->lprev = l->tl; + if(l->tl != nil) + l->tl->lnext = b; + else + l->hd = b; + l->tl = b; + l->n++; + xqunlock(l); +} + +List +mfilter(List *bl, int(*f)(Memblk*)) +{ + Memblk *b, *bnext; + List wl; + + memset(&wl, 0, sizeof wl); + xqlock(bl); + for(b = bl->hd; b != nil; b = bnext){ + bnext = b->lnext; + if(f(b)){ + munlink(bl, b, 1); + mlinklast(&wl, b); + } + } + xqunlock(bl); + return wl; +} + +void +mlistdump(char *tag, List *l) +{ + Memblk *b; + int i; + + fprint(2, "%s:", tag); + i = 0; + for(b = l->hd; b != nil; b = b->lnext){ + if(i++ % 5 == 0) + fprint(2, "\n\t"); + fprint(2, "d%#010ullx ", EP(b->addr)); + } + fprint(2, "\n"); +} + +static void +mbused(Memblk *b) +{ + if(b->dirty != 0 || (b->addr&Fakeaddr) != 0) + return; + switch(b->type){ + case DBref: + case DBsuper: + break; + default: + munlink(&fs->clean, b, 0); + mlink(&fs->clean, b); + } +} + +void +mbunused(Memblk *b) +{ + if(b->dirty || (b->addr&Fakeaddr) != 0) /* not on the clean list */ + return; + if(b->type == DBsuper || b->type == DBref) /* idem */ + return; + munlink(&fs->clean, b, 0); + mlinklast(&fs->clean, b); +} + +void +changed(Memblk *b) +{ + if(b->type != DBsuper) + ismelted(b); + if(b->dirty || (b->addr&Fakeaddr) != 0) + return; + lock(&b->dirtylk); + if(b->dirty){ + unlock(&b->dirtylk); + return; + } + switch(b->type){ + case DBsuper: + case DBref: + b->dirty = 1; + break; + default: + assert(b->dirty == 0); + munlink(&fs->clean, b, 0); + b->dirty = 1; + mlink(&fs->dirty, b); + } + unlock(&b->dirtylk); +} + +void +written(Memblk *b) +{ + lock(&b->dirtylk); + assert(b->dirty != 0); + switch(b->type){ + case DBsuper: + case DBref: + b->dirty = 0; + unlock(&b->dirtylk); + break; + default: + /* + * data blocks are removed from the dirty list, + * then written. They are not on the list while + * being written. + */ + assert(b->lprev == nil && b->lnext == nil); + b->dirty = 0; + unlock(&b->dirtylk); + + /* + * heuristic: frozen files that have a melted version + * are usually no longer useful. + */ + if(b->type == DBfile && b->mf->melted != nil) + mlinklast(&fs->clean, b); + else + mlink(&fs->clean, b); + } +} + +static void +linkblock(Memblk *b) +{ + if((b->addr&Fakeaddr) != 0 || b->type == DBsuper) + return; + if(b->type == DBref) + mlink(&fs->refs, b); + else{ + assert(b->dirty == 0); + mlink(&fs->clean, b); + } +} + +static void +unlinkblock(Memblk *b, int ispolicy) +{ + if((b->addr&Fakeaddr) != 0) + return; + switch(b->type){ + case DBref: + fatal("unlinkblock: DBref"); + case DBsuper: + fatal("unlinkblock: DBsuper"); + } + + if(b->dirty){ + assert(!ispolicy); + munlink(&fs->dirty, b, 0); + }else + munlink(&fs->clean, b, ispolicy); + b->unlinkpc = getcallerpc(&b); +} + +/* + * hashing a block also implies placing it in the refs/clean/dirty lists. + * mbget has also the guts of mbhash, for new blocks. + */ +Memblk* +mbhash(Memblk *b) +{ + Memblk **h; + uint hv; + + hv = b->addr%nelem(fs->fhash); + xqlock(&fs->fhash[hv]); + for(h = &fs->fhash[hv].b; *h != nil; h = &(*h)->next) + if((*h)->addr == b->addr){ + warn("mbhash: dup blocks:"); + warn("b=> %H*h=> %H", b, *h); + fatal("mbhash: dup"); + } + *h = b; + if(b->next != nil) + fatal("mbhash: next"); + incref(b); + linkblock(b); + xqunlock(&fs->fhash[hv]); + return b; +} + +/* + * unhashing a block also implies removing it from the refs/clean/dirty lists. + * + */ +int +mbunhash(Memblk *b, int ispolicy) +{ + Memblk **h; + uint hv; + + if(b->type == DBref) + fatal("mbunhash: DBref"); + + hv = b->addr%nelem(fs->fhash); + if(ispolicy){ + if(!xcanqlock(&fs->fhash[hv])) + return 0; + }else + xqlock(&fs->fhash[hv]); + for(h = &fs->fhash[hv].b; *h != nil; h = &(*h)->next) + if((*h)->addr == b->addr){ + if(*h != b) + fatal("mbunhash: dup"); + *h = b->next; + b->next = nil; + unlinkblock(b, ispolicy); + b->unlinkpc = getcallerpc(&b); + xqunlock(&fs->fhash[hv]); + mbput(b); + return 1; + } + fatal("mbunhash: not found"); + return 0; +} + +static void +mbfree(Memblk *b) +{ + Mfile *mf; + + if(b == nil) + return; + dNprint("mbfree m%#p d%#010ullx\n", b, b->addr); + if(b->ref > 0) + fatal("mbfree: d%#010ullx has %d refs\n", b->addr, b->ref); + if(b->type == DBfree) + fatal("mbfree: d%#010ullx double free:\n", b->addr); + if(b->next != nil) + fatal("mbfree: d%#010ullx has next\n", b->addr); + if(b->lnext != nil || b->lprev != nil) + fatal("mbfree: d%#010ullx has lnext/lprev\n", b->addr); + + /* this could panic, but errors reading a block might cause it */ + if(b->type == DBref) + warn("free of DBref. i/o errors?"); + + if(b->mf != nil){ + mf = b->mf; + b->mf = nil; + mbput(mf->melted); + assert(mf->writer == 0 && mf->readers == 0); + afree(&mfalloc, mf); + } + + xqlock(fs); + fs->nmused--; + fs->nmfree++; + b->next = fs->free; + fs->free = b; + xqunlock(fs); +} + +Memblk* +mballocz(daddrt addr, int zeroit) +{ + Memblk *b; + static int nwait; + + for(;;){ + xqlock(fs); + if(fs->free != nil){ + b = fs->free; + fs->free = b->next; + fs->nmfree--; + b->next = nil; + break; + } + if(fs->nblk < fs->nablk){ + b = &fs->blk[fs->nblk++]; + break; + } + xqunlock(fs); + if((nwait++ % 60) == 0) + warn("out of memory blocks. waiting"); + sleep(1000); + } + fs->nmused++; + xqunlock(fs); + + if(zeroit) + memset(b, 0, sizeof *b); + else + memset(&b->Meminfo, 0, sizeof b->Meminfo); + + b->addr = addr; + b->ref = 1; + dNprint("mballocz %H", b); + return b; +} + +int +mbhashed(daddrt addr) +{ + Memblk *b; + uint hv; + + hv = addr%nelem(fs->fhash); + xqlock(&fs->fhash[hv]); + for(b = fs->fhash[hv].b; b != nil; b = b->next) + if(b->addr == addr) + break; + xqunlock(&fs->fhash[hv]); + return b != nil; +} + +Memblk* +mbget(int type, daddrt addr, int mkit) +{ + Memblk *b; + uint hv; + + if(catcherror()) + fatal("mbget: %r"); + hv = addr%nelem(fs->fhash); + xqlock(&fs->fhash[hv]); + for(b = fs->fhash[hv].b; b != nil; b = b->next) + if(b->addr == addr){ + checktag(b->d.tag, type, addr); + incref(b); + break; + } + if(mkit) + if(b == nil){ + b = mballocz(addr, 0); + b->loading = 1; + b->type = type; + b->d.tag = TAG(type, 0, addr); + /* mbhash() it, without releasing the locks */ + b->next = fs->fhash[hv].b; + fs->fhash[hv].b = b; + incref(b); + linkblock(b); + xqlock(&b->newlk); /* make others wait for it */ + }else if(b->loading){ + xqunlock(&fs->fhash[hv]); + xqlock(&b->newlk); /* wait for it */ + xqunlock(&b->newlk); + if(b->loading){ + mbput(b); + dprint("mbget %#ullx -> i/o error\n", addr); + return nil; /* i/o error reading it */ + } + dMprint("mbget %#010ullx -> waited for m%#p\n", addr, b); + noerror(); + return b; + } + xqunlock(&fs->fhash[hv]); + if(b != nil) + mbused(b); + dMprint("mbget %#010ullx -> m%#p\n", addr, b); + noerror(); + return b; +} + +void +mbput(Memblk *b) +{ + if(b == nil) + return; + dMprint("mbput m%#p d%#010ullx pc=%#p\n", b, b->addr, getcallerpc(&b)); + if(decref(b) == 0) + mbfree(b); +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,51 @@ +fns.h + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,114 @@ +typedef struct Fid Fid; +typedef struct Rpc Rpc; +typedef struct Shortrpc Shortrpc; +typedef struct Largerpc Largerpc; +typedef struct Cli Cli; + +enum +{ + Maxmdata = 8*KiB, + Minmdata = 128, + + QTCACHE = 0x02, /* experiment */ +}; + +/* + * One reference kept because of existence and another per req using it. + */ +struct Fid +{ + Fid *next; /* in alloc or free list */ + Fid *prev; /* in alloc list */ + Fid *hnext; /* in hash */ + Ref; + QLock; + Cli* cli; /* no is local to a client */ + int no; + Path* p; + int omode; /* -1 if closed */ + int rclose; + int archived; + int cflags; /* OCERR|OCEND */ + int consopen; /* for flush. has /cons open? */ + int uid; + + uvlong loff; /* last offset, for dir reads */ + int lidx; /* next dir entry index to read */ + char* buf; /* for statsread() */ + + int afd; /* for afids */ + int authok; /* for afids */ + AuthRpc *rpc; /* for afids */ +}; + +struct Rpc +{ + Rpc *next; /* in client or free list */ + Cli *cli; + Fid *fid; + union{ + Fcall t; + IXcall xt; + }; + union{ + Fcall r; + IXcall xr; + }; + + /* these are for ix */ + Rpc* rpc0; /* where to get fid, c, closed, flushed */ + u16int chan; /* channel # (ix) */ + Channel* c; /* to worker (ix) */ + int closed; /* got last rpc in chan */ + + vlong t0; + int flushed; + uchar data[1]; +}; + +/* + * 9p, and ix if carrying data. + */ +struct Largerpc +{ + Rpc; + uchar buf[IOHDRSZ+Maxmdata]; +}; + +/* + * ix requests that do not carry much data. + */ +struct Shortrpc +{ + Rpc; + uchar buf[IOHDRSZ+Minmdata]; +}; + +struct Cli +{ + Cli *next; /* in list of clients or free list*/ + Ref; + int fd; + int cfd; + char *addr; + int uid; + ulong msize; + + QLock wlk; /* lock for writing replies to the client */ + uchar wdata[IOHDRSZ+Maxmdata]; + + QLock rpclk; + ulong nrpcs; /* should we limit the max # per client? */ + Rpc *rpcs; +}; + +typedef ulong (*Packmeta)(Memblk*, uchar*, int); + +#pragma varargck type "X" Fid* +#pragma varargck type "R" Rpc* + +extern vlong calltime[]; +extern ulong ncalls[]; +extern char *callname[]; +extern Alloc fidalloc, rpcalloc, clialloc; +extern int noauth; --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,102 @@ +#include "all.h" + +/* + * Requiescat in pace to all old files. + * They deserve it. + * + * This is a WORM archive for creepy files. + * It is actually a creepy 9pix that operates in worm mode: + * - only "main" and "archive" are valid attach specs. + * - only the owner can attach to "main" + * - dbrefs are not used (all blocks are kept forever). + * - there is no automatic sync proc. + * + * This is meant to be used by an archiving program that determines + * which files changed from day to day and writes them to the archive + * using the active tree of the worm, and calling sync() after that. + * + * XXX: modify sync so that in worm mode only ddmm[.n] dirs are kept + * if the sync is due to a console request. (other syncs are ok to flush + * changes to disk). + * + * XXX: The plan is that the archive can use an index file + * /active/idx/n0/.../n15 to map sha1 -> address, such that + * the archival program computes the index, looks if the file is already + * kept here, and uses a new "link new old" ctl in that case, and + * copies the file (and updates the index) otherwise. + * + * This permits the archival process to operate with multiple concurrent + * processes archiving files in parallel (and computing hashes in parallel). + * It's likely that's going to outperform fossil+venti. + * + * XXX: The owner must be always in allow mode for the active file tree. + * + * XXX: Change worm mode so that file->id value is the disk address. + */ + +static void +usage(void) +{ + fprint(2, "usage: %s [-DFLAGS] [-a] [-A addr] [-S srv] disk\n", argv0); + exits("usage"); +} + +int mainstacksize = Stack; + +void +threadmain(int argc, char *argv[]) +{ + char *addr, *dev, *srv; + + addr = "tcp!*!dump"; + srv = "dump"; + ARGBEGIN{ + case 'A': + addr = EARGF(usage()); + break; + case 'S': + srv = EARGF(usage()); + break; + case 'a': + noauth = 1; + break; + default: + if(ARGC() >= 'A' && ARGC() <= 'Z' || ARGC() == '9'){ + dbg[ARGC()] = 1; + fatalaborts = 1; + }else + usage(); + }ARGEND; + if(argc != 1) + usage(); + dev = argv[0]; + if(dbg['d']) + dbg['Z'] = 1; + + outofmemoryexits(1); + workerthreadcreate = proccreate; + fmtinstall('H', mbfmt); + fmtinstall('M', dirmodefmt); + fmtinstall('F', fcallfmt); + fmtinstall('G', ixcallfmt); + fmtinstall('X', fidfmt); + fmtinstall('R', rpcfmt); + fmtinstall('A', usrfmt); + fmtinstall('P', pathfmt); + + errinit(Errstack); + if(catcherror()) + fatal("uncatched error: %r"); + rfork(RFNAMEG|RFNOTEG); + rwusers(nil); + fsopen(dev, Worm, Wr); + if(srv != nil) + srv9pix(srv, cliworker9p); + if(addr != nil) + listen9pix(addr, cliworker9p); + consinit(); + proccreate(timeproc, nil, Stack); + noerror(); + threadexits(nil); +} + --- /sys/src/cmd/creepy Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy Fri Apr 13 11:47:45 2012 @@ -0,0 +1,351 @@ +#include "all.h" + +/* + * Misc tools. + */ + +static Lock lstatslk; +static Lstat none; +static Lstat *lstats; +static int lstatson; +int fatalaborts; + +Alloc pathalloc = +{ + .elsz = sizeof(Path), + .zeroing = 0, +}; + +void +fatal(char *fmt, ...) +{ + va_list arg; + char *s; + + va_start(arg, fmt); + s = vsmprint(fmt, arg); + vfprint(2, fmt, arg); + va_end(arg); + if(fs != nil && fs->dev != nil) + fprint(2, "%s: %s: fatal: %s\n", argv0, fs->dev, s); + else + fprint(2, "%s: fatal: %s\n", argv0, s); + free(s); + if(fatalaborts) + abort(); + threadexitsall("fatal"); +} + +void +warn(char *fmt, ...) +{ + va_list arg; + char *s; + + va_start(arg, fmt); + s = vsmprint(fmt, arg); + va_end(arg); + if(fs != nil && fs->dev != nil) + fprint(2, "%s: %s: %s\n", argv0, fs->dev, s); + else + fprint(2, "%s: %s\n", argv0, s); + free(s); +} + +void +warnerror(char *fmt, ...) +{ + va_list arg; + char err[128]; + + va_start(arg, fmt); + vseprint(err, err+sizeof err, fmt, arg); + va_end(arg); + if(fs != nil && fs->dev != nil) + fprint(2, "%s: %s: %s\n", argv0, fs->dev, err); + else + fprint(2, "%s: %s\n", argv0, err); + error(err); +} + +void +lockstats(int on) +{ + if(lstats == nil && on) + lstats = mallocz(sizeof lstats[0] * Nlstats, 1); + lstatson = on; +} + +void +dumplockstats(void) +{ + static char *tname[] = {"qlock", "rwlock", "lock"}; + int lon, i; + Lstat *lst; + + lon = lstatson; + lstatson = 0; + fprint(2, "locks\tpc\tntimes\tncant\twtime\tmtime\n"); + for(i = 0; i < Nlstats; i++){ + lst = &lstats[i]; + if(lst->ntimes != 0) + fprint(2, "src -n -s %#ullx %s\t# %s\t%d\t%d\t%ulld\t%ulld\t\n", + (uvlong)lst->pc, argv0, tname[lst->type], lst->ntimes, + lst->ncant, lst->wtime, lst->wtime/lst->ntimes); + } + lstatson = lon; +} + +static Lstat* +getlstat(uintptr pc, int type) +{ + Lstat *lst; + int i, h; + + h = pc%Nlstats; + lock(&lstatslk); + for(i = 0; i < Nlstats; i++){ + lst = &lstats[(h+i)%Nlstats]; + if(lst->pc == 0){ + lst->type = type; + lst->pc = pc; + } + if(lst->pc == pc){ + unlock(&lstatslk); + return lst; + } + } + unlock(&lstatslk); + return &none; +} + +void +xqlock(QLock *q) +{ + vlong t; + Lstat *lst; + + lst = nil; + t = 0; + if(lstats != nil){ + lst = getlstat(getcallerpc(&q), Tqlock); + ainc(&lst->ntimes); + if(canqlock(q)) + return; + ainc(&lst->ncant); + t = nsec(); + } + qlock(q); + if(lstats != nil){ + t = nsec() - t; + lock(&lstatslk); + lst->wtime += t; + unlock(&lstatslk); + } +} + +void +xqunlock(QLock *q) +{ + qunlock(q); +} + +int +xcanqlock(QLock *q) +{ + vlong t; + Lstat *lst; + + t = 0; + if(lstats != nil){ + lst = getlstat(getcallerpc(&q), Tqlock); + ainc(&lst->ntimes); + if(canqlock(q)) + return 1; + ainc(&lst->ncant); + return 0; + } + return canqlock(q); +} + +void +xrwlock(RWLock *rw, int iswr) +{ + vlong t; + Lstat *lst; + + lst = nil; + t = 0; + if(lstats != nil){ + lst = getlstat(getcallerpc(&rw), Trwlock); + ainc(&lst->ntimes); + if(iswr){ + if(canwlock(rw)) + return; + }else + if(canrlock(rw)) + return; + ainc(&lst->ncant); + t = nsec(); + } + if(iswr) + wlock(rw); + else + rlock(rw); + if(lstats != nil){ + t = nsec() - t; + lock(&lstatslk); + lst->wtime += t; + unlock(&lstatslk); + } +} + +void +xrwunlock(RWLock *rw, int iswr) +{ + if(iswr) + wunlock(rw); + else + runlock(rw); +} + +void* +anew(Alloc *a) +{ + Next *n; + + assert(a->elsz > 0); + xqlock(a); + n = a->free; + if(n != nil){ + a->free = n->next; + a->nfree--; + }else{ + a->nalloc++; + n = mallocz(a->elsz, !a->zeroing); + } + xqunlock(a); + if(a->zeroing) + memset(n, 0, a->elsz); + return n; + +} + +void +afree(Alloc *a, void *nd) +{ + Next *n; + + if(nd == nil) + return; + n = nd; + xqlock(a); + n->next = a->free; + a->free = n; + a->nfree++; + xqunlock(a); +} + +static void +xaddelem(Path *p, Memblk *f) +{ + if(p->nf == p->naf){ + p->naf += Incr; + p->f = realloc(p->f, p->naf*sizeof p->f[0]); + } + p->f[p->nf++] = f; + incref(f); +} + +static Path* +duppath(Path *p) +{ + Path *np; + int i; + + np = newpath(p->f[0]); + for(i = 1; i < p->nf; i++) + xaddelem(np, p->f[i]); + return np; +} + +void +ownpath(Path **pp) +{ + Path *p; + + p = *pp; + if(p->ref > 1){ + *pp = duppath(p); + putpath(p); + } +} + +Path* +addelem(Path **pp, Memblk *f) +{ + Path *p; + + ownpath(pp); + p = *pp; + xaddelem(p, f); + return p; +} + +Path* +dropelem(Path **pp) +{ + Path *p; + + ownpath(pp); + p = *pp; + if(p->nf > 0) + mbput(p->f[--p->nf]); + return p; +} + +Path* +newpath(Memblk *root) +{ + Path *p; + + p = anew(&pathalloc); + p->ref = 1; + xaddelem(p, root); + p->nroot = p->nf; + return p; +} + +void +putpath(Path *p) +{ + int i; + + if(p == nil || decref(p) > 0) + return; + for(i = 0; i < p->nf; i++) + mbput(p->f[i]); + p->nf = 0; + afree(&pathalloc, p); +} + +Path* +clonepath(Path *p) +{ + incref(p); + return p; +} + +int +pathfmt(Fmt *fmt) +{ + Path *p; + int i; + + p = va_arg(fmt->args, Path*); + if(p == nil) + return fmtprint(fmt, "/"); + for(i = 0; i < p->nf; i++) + fmtprint(fmt, "p[%d] = %H", i, p->f[i]); + return 0; +}