# HG changeset patch # User Francisco J Ballesteros # Date 1331134887 0 # Node ID 6fe89e1c4d071b7c1d975d1cbf4d94ba7c217b70 # Parent 33a08947f013d09d8d75bcf765857e8b3b4909d4 creepy: update R=nixiedev, nemo CC=nix-dev http://codereview.appspot.com/5694087 diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/9p.c --- a/sys/src/cmd/creepy/9p.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/9p.c Wed Mar 07 15:41:27 2012 +0000 @@ -42,196 +42,29 @@ [Twstat] rwstat, }; -static RWLock fidhashlk; -static Fid *fidhash[Fidhashsz]; -static uint fidgen; +void +ninestats(int clr) +{ + int i; -static Alloc fidalloc = -{ - .elsz = sizeof(Fid), - .zeroing = 1, -}; -static Alloc rpcalloc = -{ - .elsz = sizeof(Largerpc), - .zeroing = 0, -}; -Alloc clialloc = -{ - .elsz = sizeof(Cli), - .zeroing = 1, -}; - -static QLock clientslk; -static Cli *clients; - -int -fidfmt(Fmt *fmt) -{ - Fid *fid; - - fid = va_arg(fmt->args, Fid*); - if(fid == nil) - return fmtprint(fmt, ""); - return fmtprint(fmt, "fid %#p no %d r%d, omode %d arch %d", - fid, fid->no, fid->ref, fid->omode, fid->archived); + fprint(2, "fids:\t%4uld alloc %4uld free (%4uld bytes)\n", + fidalloc.nalloc, fidalloc.nfree, fidalloc.elsz); + fprint(2, "rpcs:\t%4uld alloc %4uld free (%4uld bytes)\n", + rpcalloc.nalloc, rpcalloc.nfree, rpcalloc.elsz); + fprint(2, "clis:\t%4uld alloc %4uld free (%4uld bytes)\n", + clialloc.nalloc, clialloc.nfree, clialloc.elsz); + for(i = 0; i < nelem(fcalls); i++) + if(fcalls[i] != nil && ncalls[i] > 0){ + fprint(2, "%-8s\t%5uld calls\t%11ulld µs\n", + callname[i], ncalls[i], + (calltime[i]/ncalls[i])/1000); + if(clr){ + ncalls[i] = 0; + calltime[i] = 0; + } + } } -void -ninestats(void) -{ - print("fids:\t%4uld alloc %4uld free (%4uld bytes)\n", - fidalloc.nalloc, fidalloc.nfree, fidalloc.elsz); - print("rpcs:\t%4uld alloc %4uld free (%4uld bytes)\n", - rpcalloc.nalloc, rpcalloc.nfree, rpcalloc.elsz); - print("clis:\t%4uld alloc %4uld free (%4uld bytes)\n", - clialloc.nalloc, clialloc.nfree, clialloc.elsz); - -} - -Rpc* -newrpc(void) -{ - Rpc *rpc; - - rpc = anew(&rpcalloc); - rpc->next = nil; - rpc->cli = nil; - rpc->fid = nil; - rpc->flushed = 0; - rpc->closed = 0; - rpc->chan = ~0; - rpc->rpc0 = nil; - /* ouch! union. */ - if(sizeof(Fcall) > sizeof(IXcall)){ - memset(&rpc->t, 0, sizeof rpc->t); - memset(&rpc->r, 0, sizeof rpc->r); - }else{ - memset(&rpc->xt, 0, sizeof rpc->xt); - memset(&rpc->xr, 0, sizeof rpc->xr); - } - return rpc; -} - -void -freerpc(Rpc *rpc) -{ - afree(&rpcalloc, rpc); -} - -Fid* -newfid(void* clino, int no) -{ - Fid *fid, **fidp; - - wlock(&fidhashlk); - if(catcherror()){ - wunlock(&fidhashlk); - error(nil); - } - if(no < 0) - no = fidgen++; - for(fidp = &fidhash[no%Fidhashsz]; *fidp != nil; fidp = &(*fidp)->next) - if((*fidp)->clino == clino && (*fidp)->no == no) - error("fid in use"); - fid = anew(&fidalloc); - *fidp = fid; - fid->omode = -1; - fid->no = no; - fid->clino = clino; - fid->ref = 2; /* one for the caller; another because it's kept */ - noerror(); - wunlock(&fidhashlk); - d9print("new fid %X\n", fid); - return fid; -} - -Fid* -getfid(void* clino, int no) -{ - Fid *fid; - - rlock(&fidhashlk); - if(catcherror()){ - runlock(&fidhashlk); - error(nil); - } - for(fid = fidhash[no%Fidhashsz]; fid != nil; fid = fid->next) - if(fid->clino == clino && fid->no == no){ - incref(fid); - noerror(); - runlock(&fidhashlk); - return fid; - } - error("fid not found"); - return fid; -} - -void -putfid(Fid *fid) -{ - Fid **fidp; - - if(fid == nil || decref(fid) > 0) - return; - d9print("clunk fid %X\n", fid); - putpath(fid->p); - free(fid->uid); - wlock(&fidhashlk); - if(catcherror()){ - wunlock(&fidhashlk); - error(nil); - } - for(fidp = &fidhash[fid->no%Fidhashsz]; *fidp != nil; fidp = &(*fidp)->next) - if(*fidp == fid){ - *fidp = fid->next; - noerror(); - wunlock(&fidhashlk); - afree(&fidalloc, fid); - return; - } - fatal("putfid: fid not found"); -} - -/* keeps addr, does not copy it */ -Cli* -newcli(char *addr, int fd, int cfd) -{ - Cli *cli; - - cli = anew(&clialloc); - cli->fd = fd; - cli->cfd = cfd; - cli->addr = addr; - cli->ref = 1; - - qlock(&clientslk); - cli->next = clients; - clients = cli; - qunlock(&clientslk); - return cli; -} - -void -putcli(Cli *cli) -{ - Cli **cp; - - if(decref(cli) == 0){ - qlock(&clientslk); - for(cp = &clients; *cp != nil; cp = &(*cp)->next) - if(*cp == cli) - break; - if(*cp == nil) - fatal("client not found"); - *cp = cli->next; - qunlock(&clientslk); - close(cli->fd); - close(cli->cfd); - free(cli->addr); - afree(&clialloc, cli); - } -} static Qid mkqid(Memblk *f) @@ -276,8 +109,8 @@ Rpc *r; cli = rpc->cli; - qlock(&cli->wlk); /* nobody replies now */ - qlock(&rpc->cli->rpclk); + xqlock(&cli->wlk); /* nobody replies now */ + xqlock(&rpc->cli->rpclk); for(r = rpc->cli->rpcs; r != nil; r = r->next) if(r->t.tag == rpc->t.oldtag) break; @@ -286,8 +119,8 @@ if(r->t.type == Tread && r->fid->consopen) consprint(""); /* in case it's waiting... */ } - qunlock(&rpc->cli->rpclk); - qunlock(&cli->wlk); + xqunlock(&rpc->cli->rpclk); + xqunlock(&cli->wlk); } static void @@ -297,25 +130,6 @@ error("no auth required"); } -void -attach(Fid *fid, char *aname, char *uname) -{ - Path *p; - - fid->uid = strdup(uname); - p = newpath(fs->root); - fid->p = p; - if(strcmp(aname, "active") == 0 || strcmp(aname, "main/active") == 0){ - addelem(&p, fs->active); - return; - } - fid->archived = 1; - if(strcmp(aname, "archive") == 0 || strcmp(aname, "main/archive") == 0) - addelem(&p, fs->archive); - else if(strcmp(aname, "main") != 0 && strcmp(aname, "") != 0) - error("unknown tree"); -} - static void rattach(Rpc *rpc) { @@ -325,7 +139,7 @@ fid = newfid(rpc->cli, rpc->t.fid); rpc->fid = fid; - attach(fid, rpc->t.aname, rpc->t.uname); + fidattach(fid, rpc->t.aname, rpc->t.uname); p = fid->p; f = p->f[p->nf-1]; rwlock(f, Rd); @@ -333,52 +147,6 @@ rwunlock(f, Rd); } -Fid* -clone(Cli *cli, Fid *fid, int no) -{ - Fid *nfid; - - nfid = newfid(cli, no); - nfid->p = clonepath(fid->p); - nfid->uid = strdup(fid->uid); - nfid->archived = fid->archived; - nfid->consopen = fid->consopen; - return nfid; -} - -void -walk(Fid *fid, char *wname) -{ - Path *p; - Memblk *f, *nf; - - p = fid->p; - if(strcmp(wname, ".") == 0) - goto done; - if(strcmp(wname, "..") == 0){ - if(p->nf > 1) - p = dropelem(&fid->p); - goto done; - } - f = p->f[p->nf-1]; - rwlock(f, Rd); - if(catcherror()){ - rwunlock(f, Rd); - error(nil); - } - dfaccessok(f, fid->uid, AEXEC); - nf = dfwalk(f, wname, 0); - rwunlock(f, Rd); - p = addelem(&fid->p, nf); - decref(nf); -done: - f = p->f[p->nf-1]; - if(isro(f)) - fid->archived = f != fs->cons; - else if(f == fs->active) - fid->archived = 0; -} - static void rwalk(Rpc *rpc) { @@ -393,7 +161,7 @@ error("can't walk like a clone without one"); nfid = nil; if(rpc->t.fid != rpc->t.newfid) - nfid = clone(rpc->cli, rpc->fid, rpc->t.newfid); + nfid = fidclone(rpc->cli, rpc->fid, rpc->t.newfid); if(catcherror()){ putfid(nfid); putfid(nfid); /* clunk */ @@ -406,7 +174,8 @@ error(nil); break; } - walk(nfid, rpc->t.wname[i]); + fidwalk(nfid, rpc->t.wname[i]); + noerror(); p = nfid->p; nf = p->f[p->nf-1]; rwlock(nf, Rd); @@ -425,72 +194,6 @@ noerror(); } -void -fidopen(Fid *fid, int mode) -{ - int fmode, amode; - Memblk *f; - Path *p; - uvlong z; - - if(fid->omode != -1) - error("fid already open"); - - /* check this before we try to melt it */ - p = fid->p; - f = p->f[p->nf-1]; - if(mode != OREAD) - if(f == fs->root || f == fs->archive || fid->archived) - error("can't write archived or built-in files"); - amode = 0; - if((mode&3) != OREAD || (mode&OTRUNC) != 0) - amode |= AWRITE; - if((mode&3) != OWRITE) - amode |= AREAD; - if(amode != AREAD) - if(f == fs->cons) - rwlock(f, Wr); - else{ - p = dfmelt(&fid->p, fid->p->nf); - f = p->f[p->nf-1]; - } - else - rwlock(f, Rd); - if(catcherror()){ - rwunlock(f, (amode!=AREAD)?Wr:Rd); - error(nil); - } - fmode = f->mf->mode; - if(mode != OREAD){ - if(f != fs->root && p->f[p->nf-2]->mf->mode&DMAPPEND) - error("directory is append only"); - if((fmode&DMDIR) != 0) - error("wrong open mode for a directory"); - } - dfaccessok(f, fid->uid, amode); - if(mode&ORCLOSE){ - if(f == fs->active || f == fs->cons || fid->archived) - error("can't remove an archived or built-in file"); - dfaccessok(p->f[p->nf-2], fid->uid, AWRITE); - } - if(mode&ORCLOSE) - fid->rclose++; - if((fmode&DMEXCL) != 0 && f->mf->open) - if(f != fs->cons || amode != AWRITE) /* ok to write cons */ - error("exclusive use file already open"); - if((mode&OTRUNC) && f != fs->cons){ - z = 0; - dfwattr(f, "length", &z, sizeof z); - } - f->mf->open++; - fid->omode = mode&3; - fid->loff = 0; - fid->lidx = 0; - fid->consopen = f == fs->cons; - noerror(); - rwunlock(f, (amode!=AREAD)?Wr:Rd); -} - static void ropen(Rpc *rpc) { @@ -508,50 +211,6 @@ rwunlock(f, Rd); } -void -fidcreate(Fid *fid, char *name, int mode, ulong perm) -{ - Path *p; - Memblk *f, *nf; - - if(fid->omode != -1) - error("fid already open"); - if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) - error("that file name scares me"); - if(utfrune(name, '/') != nil) - error("that file name is too creepy"); - if((perm&DMDIR) != 0 && mode != OREAD) - error("wrong open mode for a directory"); - p = fid->p; - f = p->f[p->nf-1]; - if(fid->archived) - error("can't create in archived or built-in files"); - if((f->mf->mode&DMDIR) == 0) - error("not a directory"); - p = dfmelt(&fid->p, fid->p->nf); - f = p->f[p->nf-1]; - if(catcherror()){ - rwunlock(f, Wr); - error(nil); - } - dfaccessok(f, fid->uid, AWRITE); - if(!catcherror()){ - mbput(dfwalk(f, name, 0)); - error("file already exists"); - } - nf = dfcreate(f, name, fid->uid, perm); - addelem(&fid->p, nf); - decref(nf); - nf->mf->open++; - noerror(); - rwunlock(f, Wr); - fid->omode = mode&3; - fid->loff = 0; - fid->lidx = 0; - if(mode&ORCLOSE) - fid->rclose++; -} - static void rcreate(Rpc *rpc) { @@ -572,7 +231,7 @@ } static ulong -packmeta(Memblk *f, uchar *buf, int nbuf) +pack9dir(Memblk *f, uchar *buf, int nbuf) { Dir d; @@ -581,6 +240,8 @@ d.qid = mkqid(f); d.mode = f->mf->mode; d.length = f->mf->length; + if(d.mode&DMDIR) + d.length = 0; d.uid = f->mf->uid; d.gid = f->mf->gid; d.muid = f->mf->muid; @@ -589,100 +250,22 @@ return convD2M(&d, buf, nbuf); } -static ulong -readdir(Fid *fid, uchar *data, ulong ndata, uvlong) -{ - Memblk *d, *f; - ulong tot, nr; - - d = fid->p->f[fid->p->nf-1]; - for(tot = 0; tot+2 < ndata; tot += nr){ - - f = dfchild(d, fid->lidx); - if(f == nil) - break; - nr = packmeta(f, data+tot, ndata-tot); - mbput(f); - if(nr <= 2) - break; - fid->lidx++; - } - return tot; -} - -long -fidread(Fid *fid, void *data, ulong count, vlong offset) -{ - Memblk *f; - Path *p; - - if(fid->omode == -1) - error("fid not open"); - if(fid->omode == OWRITE) - error("fid not open for reading"); - if(offset < 0) - error("negative offset"); - p = fid->p; - f = p->f[p->nf-1]; - if(f == fs->cons) - return consread(data, count); - rwlock(f, Rd); - if(catcherror()){ - rwunlock(f, Rd); - error(nil); - } - if(f->mf->mode&DMDIR){ - if(fid->loff != offset) - error("non-sequential dir read not supported"); - count = readdir(fid, data, count, offset); - fid->loff += count; - }else - count = dfpread(f, data, count, offset); - noerror(); - rwunlock(f, Rd); - return count; -} - static void rread(Rpc *rpc) { Fid *fid; + vlong off; fid = getfid(rpc->cli, rpc->t.fid); rpc->fid = fid; if(rpc->t.count > rpc->cli->msize-IOHDRSZ) rpc->r.count = rpc->cli->msize-IOHDRSZ; rpc->r.data = (char*)rpc->data; - rpc->r.count = fidread(fid, rpc->r.data, rpc->t.count, rpc->t.offset); + off = rpc->t.offset; + rpc->r.count = fidread(fid, rpc->r.data, rpc->t.count, off, pack9dir); } -long -fidwrite(Fid *fid, void *data, ulong count, uvlong *offset) -{ - Memblk *f; - Path *p; - - if(fid->omode == -1) - error("fid not open"); - if(fid->omode == OREAD) - error("fid not open for writing"); - p = fid->p; - f = p->f[p->nf-1]; - if(f == fs->cons) - return conswrite(data, count); - p = dfmelt(&fid->p, fid->p->nf); - f = p->f[p->nf-1]; - if(catcherror()){ - rwunlock(f, Wr); - error(nil); - } - count = dfpwrite(f, data, count, offset); - rwunlock(f, Wr); - noerror(); - return count; -} - static void rwrite(Rpc *rpc) { @@ -697,37 +280,6 @@ rpc->r.count = fidwrite(fid, rpc->t.data, rpc->t.count, &off); } -void -fidclose(Fid *fid) -{ - Memblk *f, *fp; - Path *p; - - p = fid->p; - f = p->f[p->nf-1]; - rwlock(f, Wr); - f->mf->open--; - rwunlock(f, Wr); - fid->omode = -1; - if(fid->rclose){ - p = dfmelt(&fid->p, fid->p->nf-1); - fp = p->f[p->nf-2]; - rwlock(f, Wr); - if(catcherror()){ - rwunlock(f, Wr); - mbput(f); - }else{ - dfremove(fp, f); - fid->p->nf--; - noerror(); - } - rwunlock(fp, Wr); - } - putpath(fid->p); - fid->p = nil; - fid->consopen = 0; -} - static void rclunk(Rpc *rpc) { @@ -737,43 +289,11 @@ rpc->fid = fid; if(fid->omode != -1) fidclose(fid); - d9print("clunking %X\n\n", fid); putfid(fid); putfid(fid); rpc->fid = nil; } -void -fidremove(Fid *fid) -{ - Memblk *f, *fp; - Path *p; - - p = fid->p; - f = p->f[p->nf-1]; - if(fid->archived || f == fs->cons || f == fs->active) - error("can't remove archived or built-in files"); - p = dfmelt(&fid->p, fid->p->nf-1); - fp = p->f[p->nf-2]; - f = p->f[p->nf-1]; - rwlock(f, Wr); - if(catcherror()){ - rwunlock(f, Wr); - rwunlock(fp, Wr); - error(nil); - } - if(fp->mf->mode&DMAPPEND) - error("directory is append only"); - dfaccessok(fp, fid->uid, AWRITE); - fid->omode = -1; - dfremove(fp, f); - fid->p->nf--; - noerror(); - rwunlock(fp, Wr); - putpath(fid->p); - fid->p = nil; -} - static void rremove(Rpc *rpc) { @@ -788,6 +308,7 @@ rpc->fid = nil; error(nil); } + fidremove(fid); noerror(); d9print("clunking %X\n\n", fid); @@ -805,15 +326,22 @@ fid = getfid(rpc->cli, rpc->t.fid); rpc->fid = fid; + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } p = fid->p; f = p->f[p->nf-1]; rwlock(f, Rd); + noerror(); + xqunlock(fid); if(catcherror()){ rwunlock(f, Rd); error(nil); } rpc->r.stat = rpc->data; - rpc->r.nstat = packmeta(f, rpc->data, rpc->cli->msize-IOHDRSZ); + rpc->r.nstat = pack9dir(f, rpc->data, rpc->cli->msize-IOHDRSZ); if(rpc->r.nstat <= 2) fatal("rstat: convD2M"); noerror(); @@ -838,109 +366,89 @@ Fid *fid; Memblk *f; Path *p; - Dir d, *sd; + Dir sd; u64int n; + n = convM2D(rpc->t.stat, rpc->t.nstat, &sd, (char*)rpc->t.stat); + if(n != rpc->t.nstat) + error("convM2D: bad stat"); fid = getfid(rpc->cli, rpc->t.fid); rpc->fid = fid; + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } p = fid->p; f = p->f[p->nf-1]; if(fid->archived || f == fs->cons) error("can't wstat archived or built-in files"); p = dfmelt(&fid->p, fid->p->nf); f = p->f[p->nf-1]; - n = convM2D(rpc->t.stat, rpc->t.nstat, &d, nil); - sd = malloc(n); + noerror(); + xqunlock(fid); if(catcherror()){ rwunlock(f, Wr); - free(sd); error(nil); } - n = convM2D(rpc->t.stat, rpc->t.nstat, sd, (char*)&sd[1]); - if(n <= BIT16SZ){ - free(sd); - error("wstat: convM2D"); - } - if(sd->length != ~0 && sd->length != f->mf->length){ + if(sd.length != ~0 && sd.length != f->mf->length){ if(f->mf->mode&DMDIR) error("can't resize a directory"); dfaccessok(f, fid->uid, AWRITE); }else - sd->length = ~0; + sd.length = ~0; - if(sd->name[0] && strcmp(f->mf->name, sd->name) != 0){ + if(sd.name[0] && strcmp(f->mf->name, sd.name) != 0){ if(isro(f) || f == fs->active) error("can't rename built-in files"); dfaccessok(p->f[p->nf-2], fid->uid, AWRITE); if(!catcherror()){ - mbput(dfwalk(f, sd->name, 0)); + mbput(dfwalk(f, sd.name, 0)); error("file already exists"); } }else - sd->name[0] = 0; + sd.name[0] = 0; - if(sd->uid[0] != 0 && strcmp(sd->uid, f->mf->uid) != 0){ + if(sd.uid[0] != 0 && strcmp(sd.uid, f->mf->uid) != 0){ if(!fs->config && strcmp(fid->uid, f->mf->uid) != 0) error("only the owner may donate a file"); - if(!fs->config && !member(sd->uid, fid->uid) != 0) + if(!fs->config && !member(sd.uid, fid->uid) != 0) error("you are not in that group"); }else - sd->uid[0] = 0; - if(sd->gid[0] != 0 && strcmp(sd->gid, f->mf->gid) != 0){ + sd.uid[0] = 0; + if(sd.gid[0] != 0 && strcmp(sd.gid, f->mf->gid) != 0){ if(!fs->config && strcmp(fid->uid, f->mf->uid) != 0) error("only the onwer may change group"); - if(!fs->config && !member(sd->gid, fid->uid) != 0) + if(!fs->config && !member(sd.gid, fid->uid) != 0) error("you are not in that group"); }else - sd->gid[0] = 0; - if(sd->mode != ~0 && f->mf->mode != sd->mode){ + sd.gid[0] = 0; + if(sd.mode != ~0 && f->mf->mode != sd.mode){ if(!fs->config && strcmp(fid->uid, f->mf->uid) != 0 && !member(f->mf->gid, fid->uid) != 0) error("only the onwer or members may change mode"); }else - sd->mode = ~0; + sd.mode = ~0; - if(sd->length != ~0) - wstatint(f, "length", sd->length); - if(sd->name[0]) - wstatstr(f, "name", sd->name); - if(sd->uid[0]) - wstatstr(f, "name", sd->name); - if(sd->gid[0]) - wstatstr(f, "name", sd->name); - if(sd->mode != ~0) - wstatint(f, "mode", sd->mode); - if(fs->config && sd->atime != ~0) - wstatint(f, "atime", sd->atime); - if(fs->config && sd->mtime != ~0) - wstatint(f, "mtime", sd->mtime); - if(fs->config && sd->muid[0] != 0 && strcmp(sd->muid, f->mf->muid) != 0) - wstatint(f, "mtime", sd->mtime); + if(sd.length != ~0) + wstatint(f, "length", sd.length); + if(sd.name[0]) + wstatstr(f, "name", sd.name); + if(sd.uid[0]) + wstatstr(f, "name", sd.name); + if(sd.gid[0]) + wstatstr(f, "name", sd.name); + if(sd.mode != ~0) + wstatint(f, "mode", sd.mode); + if(fs->config && sd.atime != ~0) + wstatint(f, "atime", sd.atime); + if(fs->config && sd.mtime != ~0) + wstatint(f, "mtime", sd.mtime); + if(fs->config && sd.muid[0] != 0 && strcmp(sd.muid, f->mf->muid) != 0) + wstatint(f, "mtime", sd.mtime); noerror(); rwunlock(f, Wr); - free(sd); - -} - -void -replied(Rpc *rpc) -{ - Rpc **rl; - - qlock(&rpc->cli->rpclk); - for(rl = &rpc->cli->rpcs; (*rl != nil); rl = &(*rl)->next) - if(*rl == rpc){ - *rl = rpc->next; - break; - } - rpc->cli->nrpcs--; - qunlock(&rpc->cli->rpclk); - rpc->next = nil; - putfid(rpc->fid); - rpc->fid = nil; - putcli(rpc->cli); - rpc->cli = nil; } static char* @@ -948,8 +456,10 @@ { Rpc *rpc; Cli *cli; + Fid *fid; char err[128]; long n; + int nerr; rpc = v; cli = rpc->cli; @@ -960,6 +470,8 @@ errinit(Errstack); *aux = v; /* make it not nil */ } + nerr = nerrors(); + rpc->r.tag = rpc->t.tag; rpc->r.type = rpc->t.type + 1; @@ -968,14 +480,26 @@ rpc->r.type = Rerror; rpc->r.ename = err; rerrstr(err, sizeof err); - goto out; + }else{ + fcalls[rpc->t.type](rpc); + noerror(); } - fcalls[rpc->t.type](rpc); - noerror(); + fid = nil; + if(rpc->fid != nil && rpc->fid->ref > 1){ + /* The fid is not clunked by this rpc; ok to read/walk ahead */ + fid = rpc->fid; + incref(fid); + } + if(catcherror()){ + if(fid != nil) + putfid(fid); + } -out: - qlock(&cli->wlk); + xqlock(&cli->wlk); + putfid(rpc->fid); /* release rpc fid before replying */ + rpc->fid = nil; + if(rpc->flushed == 0){ d9print("-> %F\n", &rpc->r); n = convS2M(&rpc->r, cli->wdata, sizeof cli->wdata); @@ -984,12 +508,32 @@ if(write(cli->fd, cli->wdata, n) != n) d9print("%s: %r\n", cli->addr); }else - d9print("flushed: %F\n", &rpc->r); - qunlock(&cli->wlk); + dprint("flushed: %F\n", &rpc->r); + calltime[rpc->t.type] += nsec() - rpc->t0; + ncalls[rpc->t.type]++; + xqunlock(&cli->wlk); + + if(fid != nil){ + switch(rpc->t.type){ + case Tread: /* read ahead? */ + if(rpc->r.type == Rread && rpc->r.count == rpc->t.count) + fidrahead(fid, rpc->t.offset + rpc->t.count); + break; + case Twalk: /* walk ahead? */ + if(rpc->r.type == Rwalk) + fidwahead(fid); + break; + } + putfid(fid); + } + noerror(); replied(rpc); freerpc(rpc); dPprint("%s exiting\n", threadgetname()); + + if(nerrors() != nerr) + fatal("%s: unbalanced error stack", threadgetname()); return nil; } @@ -1013,6 +557,8 @@ rpc = nil; for(;;){ + if(dbg['E']) + dumpfids(); if(rpc == nil) rpc = newrpc(); n = read9pmsg(cli->fd, rpc->data, Maxmdata+IOHDRSZ); @@ -1022,6 +568,7 @@ } if(n == 0) continue; + rpc->t0 = nsec(); if(convM2S(rpc->data, n, &rpc->t) == 0){ d9print("%s: convM2S failed\n", cli->addr); continue; @@ -1034,11 +581,11 @@ rpc->cli = cli; incref(cli); - qlock(&cli->rpclk); + xqlock(&cli->rpclk); rpc->next = cli->rpcs; cli->rpcs = rpc; cli->nrpcs++; - qunlock(&cli->rpclk); + xqunlock(&cli->rpclk); fspolicy(); if(rpc->t.type == Tflush || diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/9pix.c --- a/sys/src/cmd/creepy/9pix.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/9pix.c Wed Mar 07 15:41:27 2012 +0000 @@ -13,6 +13,768 @@ #include "net.h" #include "fns.h" +static RWLock fidhashlk; +static Fid *fidshd, *fidstl; +static Fid *fidhash[Fidhashsz]; +static uint fidgen; + +Alloc fidalloc = +{ + .elsz = sizeof(Fid), + .zeroing = 1, +}; +Alloc rpcalloc = +{ + .elsz = sizeof(Largerpc), + .zeroing = 0, +}; + +Alloc clialloc = +{ + .elsz = sizeof(Cli), + .zeroing = 1, +}; + +static QLock clientslk; +static Cli *clients; + +static void +fidlink(Fid *fid) +{ + fid->next = fidshd; + fid->prev = nil; + if(fidshd != nil) + fidshd->prev = fid; + else + fidstl = fid; + fidshd = fid; +} + +static void +fidunlink(Fid *fid) +{ + if(fid->prev != nil) + fid->prev->next = fid->next; + else + fidshd = fid->next; + if(fid->next != nil) + fid->next->prev = fid->prev; + else + fidstl = fid->prev; + fid->next = nil; + fid->prev = nil; +} + +int +fidfmt(Fmt *fmt) +{ + Fid *fid; + Path *p; + int i; + + fid = va_arg(fmt->args, Fid*); + if(fid == nil) + return fmtprint(fmt, ""); + fmtprint(fmt, "fid %#p no %d r%d, omode %d arch %d", + fid, fid->no, fid->ref, fid->omode, fid->archived); + p = fid->p; + if(p == nil) + return 0; + fmtprint(fmt, " path"); + for(i = 0; i < p->nf; i++) + fmtprint(fmt, " d%#ullx", p->f[i]->addr); + return fmtprint(fmt, "\n=>%H", p->f[p->nf-1]); +} + +void +dumpfids(void) +{ + Fid *fid; + int n; + + xrwlock(&fidhashlk, Rd); + fprint(2, "fids:\n"); + n = 0; + for(fid = fidshd; fid != nil; fid = fid->next) + fprint(2, "[%d] = %X\n", n++, fid); + xrwunlock(&fidhashlk, Rd); +} + +static int +meltpath(Path *p) +{ + int i, n; + Memblk *f; + + n = 0; + for(i = 0; i < p->nf; i++) + while((f = p->f[i]->mf->melted) != nil){ + n++; + incref(f); + mbput(p->f[i]); + p->f[i] = f; + } + return n; +} + +void +meltfids(void) +{ + Fid *fid; + int n; + + xrwlock(&fidhashlk, Rd); + n = 0; + for(fid = fidshd; fid != nil; fid = fid->next) + if(canqlock(fid)){ + if(!fid->archived) + n += meltpath(fid->p); + qunlock(fid); + } + xrwunlock(&fidhashlk, Rd); + dprint("meltfids: %d fids advanced\n", n); +} + +Rpc* +newrpc(void) +{ + Rpc *rpc; + + rpc = anew(&rpcalloc); + rpc->next = nil; + rpc->cli = nil; + rpc->fid = nil; + rpc->flushed = 0; + rpc->closed = 0; + rpc->chan = ~0; + rpc->rpc0 = nil; + /* ouch! union. */ + if(sizeof(Fcall) > sizeof(IXcall)){ + memset(&rpc->t, 0, sizeof rpc->t); + memset(&rpc->r, 0, sizeof rpc->r); + }else{ + memset(&rpc->xt, 0, sizeof rpc->xt); + memset(&rpc->xr, 0, sizeof rpc->xr); + } + return rpc; +} + +void +freerpc(Rpc *rpc) +{ + afree(&rpcalloc, rpc); +} + +Fid* +newfid(void* clino, int no) +{ + Fid *fid, **fidp; + + xrwlock(&fidhashlk, Wr); + if(catcherror()){ + xrwunlock(&fidhashlk, Wr); + error(nil); + } + if(no < 0) + no = fidgen++; + for(fidp = &fidhash[no%Fidhashsz]; *fidp != nil; fidp = &(*fidp)->hnext) + if((*fidp)->clino == clino && (*fidp)->no == no) + error("fid in use"); + fid = anew(&fidalloc); + *fidp = fid; + fid->hnext = nil; + fid->omode = -1; + fid->no = no; + fid->clino = clino; + fid->ref = 2; /* one for the caller; another because it's kept */ + fidlink(fid); + noerror(); + xrwunlock(&fidhashlk, Wr); + dEprint("new fid %X\n", fid); + return fid; +} + +Fid* +getfid(void* clino, int no) +{ + Fid *fid; + + xrwlock(&fidhashlk, Rd); + if(catcherror()){ + xrwunlock(&fidhashlk, Rd); + error(nil); + } + for(fid = fidhash[no%Fidhashsz]; fid != nil; fid = fid->hnext) + if(fid->clino == clino && fid->no == no){ + incref(fid); + noerror(); + dEprint("getfid %d -> %X\n", no, fid); + xrwunlock(&fidhashlk, Rd); + return fid; + } + error("fid not found"); + return fid; +} + +void +putfid(Fid *fid) +{ + Fid **fidp; + + if(fid == nil || decref(fid) > 0) + return; + dEprint("clunk fid %X\n", fid); + putpath(fid->p); + fid->p = nil; + free(fid->uid); + fid->uid = nil; + xrwlock(&fidhashlk, Wr); + if(catcherror()){ + xrwunlock(&fidhashlk, Wr); + fprint(2, "putfid: %r"); + error(nil); + } + for(fidp = &fidhash[fid->no%Fidhashsz]; *fidp != nil; fidp = &(*fidp)->hnext) + if(*fidp == fid){ + *fidp = fid->hnext; + fidunlink(fid); + noerror(); + xrwunlock(&fidhashlk, Wr); + afree(&fidalloc, fid); + return; + } + fatal("putfid: fid not found"); +} + +/* keeps addr, does not copy it */ +Cli* +newcli(char *addr, int fd, int cfd) +{ + Cli *cli; + + cli = anew(&clialloc); + cli->fd = fd; + cli->cfd = cfd; + cli->addr = addr; + cli->ref = 1; + + xqlock(&clientslk); + cli->next = clients; + clients = cli; + xqunlock(&clientslk); + return cli; +} + +void +putcli(Cli *cli) +{ + Cli **cp; + + if(decref(cli) == 0){ + xqlock(&clientslk); + for(cp = &clients; *cp != nil; cp = &(*cp)->next) + if(*cp == cli) + break; + if(*cp == nil) + fatal("client not found"); + *cp = cli->next; + xqunlock(&clientslk); + close(cli->fd); + close(cli->cfd); + free(cli->addr); + afree(&clialloc, cli); + } +} + +void +fidattach(Fid *fid, char *aname, char *uname) +{ + Path *p; + + fid->uid = strdup(uname); + p = newpath(fs->root); + fid->p = p; + if(strcmp(aname, "active") == 0 || strcmp(aname, "main/active") == 0){ + addelem(&p, fs->active); + return; + } + fid->archived = 1; + if(strcmp(aname, "archive") == 0 || strcmp(aname, "main/archive") == 0) + addelem(&p, fs->archive); + else if(strcmp(aname, "main") != 0 && strcmp(aname, "") != 0) + error("unknown tree"); +} + +Fid* +fidclone(Cli *cli, Fid *fid, int no) +{ + Fid *nfid; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + nfid = newfid(cli, no); + nfid->p = clonepath(fid->p); + nfid->uid = strdup(fid->uid); + nfid->archived = fid->archived; + nfid->consopen = fid->consopen; + noerror(); + xqunlock(fid); + return nfid; +} + +void +fidwalk(Fid *fid, char *wname) +{ + Path *p; + Memblk *f, *nf; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + p = fid->p; + if(strcmp(wname, ".") == 0) + goto done; + if(strcmp(wname, "..") == 0){ + if(p->nf > 1) + p = dropelem(&fid->p); + goto done; + } + f = p->f[p->nf-1]; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + dfaccessok(f, fid->uid, AEXEC); + nf = dfwalk(f, wname, 0); + rwunlock(f, Rd); + noerror(); + p = addelem(&fid->p, nf); + decref(nf); +done: + f = p->f[p->nf-1]; + if(isro(f)) + fid->archived = f != fs->cons; + else if(f == fs->active) + fid->archived = 0; + noerror(); + xqunlock(fid); +} + +void +fidopen(Fid *fid, int mode) +{ + int fmode, amode; + Memblk *f; + Path *p; + uvlong z; + + if(fid->omode != -1) + error("fid already open"); + + /* check this before we try to melt it */ + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + p = fid->p; + f = p->f[p->nf-1]; + if(mode != OREAD) + if(f == fs->root || f == fs->archive || fid->archived) + error("can't write archived or built-in files"); + amode = 0; + if((mode&3) != OREAD || (mode&OTRUNC) != 0) + amode |= AWRITE; + if((mode&3) != OWRITE) + amode |= AREAD; + if(amode != AREAD) + if(f == fs->cons) + rwlock(f, Wr); + else{ + p = dfmelt(&fid->p, fid->p->nf); + f = p->f[p->nf-1]; + } + else + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, (amode!=AREAD)?Wr:Rd); + error(nil); + } + fmode = f->mf->mode; + if(mode != OREAD){ + if(f != fs->root && p->f[p->nf-2]->mf->mode&DMAPPEND) + error("directory is append only"); + if((fmode&DMDIR) != 0) + error("wrong open mode for a directory"); + } + dfaccessok(f, fid->uid, amode); + if(mode&ORCLOSE){ + if(f == fs->active || f == fs->cons || fid->archived) + error("can't remove an archived or built-in file"); + dfaccessok(p->f[p->nf-2], fid->uid, AWRITE); + } + if(mode&ORCLOSE) + fid->rclose++; + if((fmode&DMEXCL) != 0 && f->mf->open) + if(f != fs->cons || amode != AWRITE) /* ok to write cons */ + error("exclusive use file already open"); + if((mode&OTRUNC) && f != fs->cons){ + z = 0; + dfwattr(f, "length", &z, sizeof z); + } + f->mf->open++; + fid->omode = mode&3; + fid->loff = 0; + fid->lidx = 0; + fid->consopen = f == fs->cons; + noerror(); + rwunlock(f, (amode!=AREAD)?Wr:Rd); + if(mode&OTRUNC) + dfchanged(p); + noerror(); + xqunlock(fid); +} + +void +fidcreate(Fid *fid, char *name, int mode, ulong perm) +{ + Path *p; + Memblk *f, *nf; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + if(fid->omode != -1) + error("fid already open"); + if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) + error("that file name scares me"); + if(utfrune(name, '/') != nil) + error("that file name is too creepy"); + if((perm&DMDIR) != 0 && mode != OREAD) + error("wrong open mode for a directory"); + p = fid->p; + f = p->f[p->nf-1]; + if(fid->archived) + error("can't create in archived or built-in files"); + if((f->mf->mode&DMDIR) == 0) + error("not a directory"); + p = dfmelt(&fid->p, fid->p->nf); + f = p->f[p->nf-1]; + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + dfaccessok(f, fid->uid, AWRITE); + if(!catcherror()){ + mbput(dfwalk(f, name, 0)); + error("file already exists"); + } + nf = dfcreate(f, name, fid->uid, perm); + p = addelem(&fid->p, nf); + decref(nf); + nf->mf->open++; + noerror(); + rwunlock(f, Wr); + fid->omode = mode&3; + fid->loff = 0; + fid->lidx = 0; + if(mode&ORCLOSE) + fid->rclose++; + dfchanged(p); + noerror(); + xqunlock(fid); +} + +static ulong +readdir(Fid *fid, uchar *data, ulong ndata, uvlong, Packmeta pack) +{ + Memblk *d, *f; + ulong tot, nr; + + d = fid->p->f[fid->p->nf-1]; + for(tot = 0; tot+2 < ndata; tot += nr){ + f = dfchild(d, fid->lidx); + if(f == nil) + break; + nr = pack(f, data+tot, ndata-tot); + mbput(f); + if(nr <= 2) + break; + fid->lidx++; + } + return tot; +} + +long +fidread(Fid *fid, void *data, ulong count, vlong offset, Packmeta pack) +{ + Memblk *f; + Path *p; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + if(fid->omode == -1) + error("fid not open"); + if(fid->omode == OWRITE) + error("fid not open for reading"); + if(offset < 0) + error("negative offset"); + p = fid->p; + f = p->f[p->nf-1]; + if(f == fs->cons){ + noerror(); + xqunlock(fid); + return consread(data, count); + } + rwlock(f, Rd); + noerror(); + xqunlock(fid); + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + if(f->mf->mode&DMDIR){ + if(fid->loff != offset) + error("non-sequential dir read not supported"); + count = readdir(fid, data, count, offset, pack); + fid->loff += count; + }else + count = dfpread(f, data, count, offset); + noerror(); + rwunlock(f, Rd); + return count; +} + +long +fidwrite(Fid *fid, void *data, ulong count, uvlong *offset) +{ + Memblk *f; + Path *p; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + if(fid->omode == -1) + error("fid not open"); + if(fid->omode == OREAD) + error("fid not open for writing"); + p = fid->p; + f = p->f[p->nf-1]; + if(f == fs->cons){ + xqunlock(fid); + noerror(); + return conswrite(data, count); + } + p = dfmelt(&fid->p, fid->p->nf); + f = p->f[p->nf-1]; + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + count = dfpwrite(f, data, count, offset); + noerror(); + rwunlock(f, Wr); + + dfchanged(p); + noerror(); + xqunlock(fid); + return count; +} + +void +fidclose(Fid *fid) +{ + Memblk *f, *fp; + Path *p; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + p = fid->p; + f = p->f[p->nf-1]; + rwlock(f, Wr); + f->mf->open--; + rwunlock(f, Wr); + fid->omode = -1; + if(fid->rclose){ + p = dfmelt(&fid->p, fid->p->nf-1); + fp = p->f[p->nf-2]; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + mbput(f); + }else{ + dfremove(fp, f); + fid->p->nf--; + noerror(); + } + rwunlock(fp, Wr); + dfchanged(p); + } + putpath(fid->p); + fid->p = nil; + fid->consopen = 0; + noerror(); + xqunlock(fid); +} + +void +fidremove(Fid *fid) +{ + Memblk *f, *fp; + Path *p; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + p = fid->p; + f = p->f[p->nf-1]; + if(fid->archived || f == fs->cons || f == fs->active) + error("can't remove archived or built-in files"); + p = dfmelt(&fid->p, fid->p->nf-1); + fp = p->f[p->nf-2]; + f = p->f[p->nf-1]; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + rwunlock(fp, Wr); + error(nil); + } + if(fp->mf->mode&DMAPPEND) + error("directory is append only"); + dfaccessok(fp, fid->uid, AWRITE); + fid->omode = -1; + dfremove(fp, f); + fid->p->nf--; + noerror(); + rwunlock(fp, Wr); + dfchanged(fid->p); + putpath(fid->p); + fid->p = nil; + noerror(); + xqunlock(fid); +} + +void +replied(Rpc *rpc) +{ + Rpc **rl; + + xqlock(&rpc->cli->rpclk); + for(rl = &rpc->cli->rpcs; (*rl != nil); rl = &(*rl)->next) + if(*rl == rpc){ + *rl = rpc->next; + break; + } + rpc->cli->nrpcs--; + xqunlock(&rpc->cli->rpclk); + rpc->next = nil; + assert(rpc->fid == nil); + putcli(rpc->cli); + rpc->cli = nil; + +} + +/* + * Read ahead policy: to be called after replying to an ok. read RPC. + * + * We try to keep at least Nahead more bytes in the file if it seems + * that's ok. + */ +void +fidrahead(Fid *fid, uvlong offset) +{ + Path *p; + Memblk *f; + Mfile *m; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + p = fid->p; + f = p->f[p->nf-1]; + rwlock(f, Rd); + xqunlock(fid); + noerror(); + m = f->mf; + if(m->sequential == 0 || m->raoffset > offset + Nahead){ + rwunlock(f, Rd); + return; + } + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + m->raoffset = offset + Nahead; + d9print("rahead d%#ullx off %#ullx\n", f->addr, m->raoffset); + for(; offset < m->raoffset; offset += Maxmdata) + if(dfpread(f, nil, Maxmdata, offset) != Maxmdata) + break; + noerror(); + rwunlock(f, Rd); +} + +/* + * Walk ahead policy: to be called after replying to an ok. walk RPC. + * + * We try to keep the children of a directory we have walked to + * loaded in memory before further walks/reads. + */ +void +fidwahead(Fid *fid) +{ + Path *p; + Memblk *f; + Mfile *m; + int i; + + xqlock(fid); + if(catcherror()){ + xqunlock(fid); + error(nil); + } + p = fid->p; + f = p->f[p->nf-1]; + rwlock(f, Rd); + noerror(); + xqunlock(fid); + + if(catcherror()){ + rwunlock(f, Rd); + error(nil); + } + m = f->mf; + if((m->mode&DMDIR) == 0 || m->wadone){ + noerror(); + rwunlock(f, Rd); + return; + } + m->wadone = 1; + for(i = 0; i < f->mf->length/sizeof(Dentry); i++) + mbput(dfchild(f, i)); + noerror(); + rwunlock(f, Rd); +} + static void postfd(char *name, int pfd) { @@ -117,11 +879,10 @@ break; default: if(ARGC() >= 'A' && ARGC() <= 'Z' || ARGC() == '9'){ - dbg['d'] = 1; + dbg['D'] = 1; dbg[ARGC()] = 1; }else usage(); - dbg['x'] = dbg['X']; }ARGEND; if(argc != 1) usage(); diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/attr.c --- a/sys/src/cmd/creepy/attr.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/attr.c Wed Mar 07 15:41:27 2012 +0000 @@ -34,6 +34,7 @@ long wname(Memblk*, void*, long); static long rname(Memblk*, void*, long); static long rid(Memblk*, void*, long); +long wid(Memblk*, void*, long); long watime(Memblk*, void*, long); static long ratime(Memblk*, void*, long); long wmtime(Memblk*, void*, long); @@ -152,10 +153,8 @@ ulong sz; sz = metasize(meta); - if(sz > nbuf){ - fatal("bug: allocate and use ablk"); + if(sz > nbuf) error("attributes are too long"); - } d = buf; bufp = buf; d->id = meta->id; @@ -391,6 +390,19 @@ return len; } +long +wid(Memblk *f, void *buf, long) +{ + u64int *p; + Dmeta *d; + + p = buf; + d = (Dmeta*)f->d.embed; + f->mf->id = *p; + d->id = *p; + return BIT64SZ; +} + static long rid(Memblk *f, void *buf, long) { diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/cfg.c --- a/sys/src/cmd/creepy/cfg.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/cfg.c Wed Mar 07 15:41:27 2012 +0000 @@ -285,9 +285,22 @@ } static void -cdump(int, char *argv[]) +cdump(int argc, char *argv[]) { - fsdump(strcmp(argv[0], "dumpall") == 0); + switch(argc){ + case 1: + fsdump(0, strcmp(argv[0], "dumpall") == 0); + break; + case 2: + if(strcmp(argv[1], "-l") == 0){ + fsdump(1, strcmp(argv[0], "dumpall") == 0); + break; + } + /*fall*/ + default: + consprint("usage: %s [-l]\n", argv[0]); + return; + } } static void @@ -329,15 +342,91 @@ } } +static void +cstats(int argc, char *argv[]) +{ + int clr; + + clr =0; + if(argc == 2 && strcmp(argv[1], "-c") == 0){ + clr = 1; + argc--; + } + if(argc != 1){ + consprint("usage: %s [-c]\n", argv[0]); + return; + } + fsstats(clr); + ninestats(clr); + ixstats(clr); +} + +static void +cdebug(int, char *argv[]) +{ + char *f; + char flags[50]; + int i; + + f = argv[1]; + if(strcmp(f, "on") == 0){ + dbg['D'] = 1; + return; + } + if(strcmp(f, "off") == 0){ + memset(dbg, 0, sizeof dbg); + return; + } + if(*f != '+' && *f != '-') + memset(dbg, 0, sizeof dbg); + else + f++; + for(; *f != 0; f++){ + dbg[*f] = 1; + if(*argv[1] == '-') + dbg[*f] = 0; + } + f = flags; + for(i = 0; i < nelem(dbg) && f < flags+nelem(flags)-1; i++) + if(dbg[i]) + *f++ = i; + *f = 0; + consprint("debug = '%s'\n", flags); + +} + +static void +clocks(int, char *argv[]) +{ + if(strcmp(argv[1], "on") == 0) + lockstats(1); + else if(strcmp(argv[1], "off") == 0) + lockstats(0); + else if(strcmp(argv[1], "dump") == 0) + dumplockstats(); + else + consprint("usage: %s [on|off|dump]\n", argv[0]); +} + +static void +cfids(int, char**) +{ + dumpfids(); +} + static void chelp(int, char**); static Cmd cmds[] = { - {"dump", cdump, 1, "dump"}, - {"dumpall", cdump, 1, "dumpall"}, + {"dump", cdump, 0, "dump [-l]"}, + {"dumpall", cdump, 0, "dumpall [-l]"}, + {"stats", cstats, 0, "stats [-c]"}, {"sync", csync, 1, "sync"}, {"halt", chalt, 1, "halt"}, {"users", cusers, 0, "users [-r|-w]"}, + {"debug", cdebug, 2, "cdebug [+-]FLAGS | on | off"}, + {"locks", clocks, 2, "locks [on|off|dump]"}, + {"fids", cfids, 1, "fids"}, {"?", chelp, 1, "?"}, }; diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/conf.h --- a/sys/src/cmd/creepy/conf.h Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/conf.h Wed Mar 07 15:41:27 2012 +0000 @@ -10,43 +10,27 @@ #ifdef TESTING Incr = 2, - Fsysmem = 1*GiB , /* size for in-memory block array */ - Dzerofree = 10, /* out of disk blocks */ + Fsysmem = 200*KiB, /* size for in-memory block array */ /* disk parameters; don't change */ Dblksz = 512UL, /* disk block size */ - Dblkhdrsz = 2*BIT64SZ, Ndptr = 2, /* # of direct data pointers */ Niptr = 2, /* # of indirect data pointers */ #else Incr = 16, Fsysmem = 2*GiB, /* size for in-memory block array */ - Dzerofree = 10, /* out of disk blocks */ /* disk parameters; don't change */ Dblksz = 16*KiB, /* disk block size */ - Dblkhdrsz = 2*BIT64SZ, Ndptr = 8, /* # of direct data pointers */ Niptr = 4, /* # of indirect data pointers */ #endif + Mminfree = 50, /* low on mem blocks */ Dminfree = 1000, /* low on disk blocks */ - Dmaxfree = 1000, /* high on disk blocks */ - Mminfree = 50, /* low on mem blocks */ - Mmaxfree = 500, /* high on mem blocks */ Dminattrsz = Dblksz/2, /* min size for attributes */ - /* - * The format of the disk is: - * blk 0: unused - * blk 1: super - * Nblkgrpsz blocks (1st is ref, Nblkgrpsz-1 are data) - * ... - * Nblkgrpsz blocks (1st is ref, Nblkgrpsz-1 are data) - * - */ - Nblkgrpsz = (Dblksz - Dblkhdrsz) / BIT64SZ, - Dblk0addr = 2*Dblksz, + Nahead = 10 * Dblksz, /* # of bytes to read ahead */ /* * Caution: Errstack also limits the max tree depth, @@ -58,7 +42,12 @@ Fidhashsz = 97, /* size of the fid hash size */ Uhashsz = 97, + Rpcspercli = 0, /* != 0 places a limit */ - Rpcspercli = 0, /* != 0 places a limit */ + Nlstats = 1009, /* # of lock profiling entries */ + + Mmaxfree = 2*Mminfree, /* high on mem blocks */ + Dmaxfree = 2*Dminfree, /* high on disk blocks */ + Dzerofree = 10, /* out of disk blocks */ }; diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/dbg.c --- a/sys/src/cmd/creepy/dbg.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/dbg.c Wed Mar 07 15:41:27 2012 +0000 @@ -1,14 +1,43 @@ #include #include +#include char dbg[256]; +static char sdbg[256]; +static Ref nodbg; + +void +nodebug(void) +{ + incref(&nodbg); + if(nodbg.ref == 1) + memmove(sdbg, dbg, sizeof dbg); + memset(dbg, 0, sizeof dbg); +} + +void +debug(void) +{ + if(decref(&nodbg) == 0) + memmove(dbg, sdbg, sizeof dbg); +} int -dbgclr(uchar flag) +setdebug(void) { - int x; + int r; - x = dbg[flag]; - dbg[flag] = 0; - return x; + r = nodbg.ref; + if(r > 0) + memmove(dbg, sdbg, sizeof dbg); + return r; } + +void +rlsedebug(int r) +{ + nodbg.ref = r; + if(r > 0) + memset(dbg, 0, sizeof dbg); +} + diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/dbg.h --- a/sys/src/cmd/creepy/dbg.h Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/dbg.h Wed Mar 07 15:41:27 2012 +0000 @@ -1,20 +1,28 @@ /* * '9': 9p + * 'A': mblk/dblk alloc/free chdentry, drefs * 'D': disk + * 'E': fids * 'F': slices, indirects, dirnth + * 'K': reclaim * 'M': mblk/dblk gets puts + * 'P': procs * 'R': block read * 'W': block write + * 'X': ix * 'd': general debug - * 'P': procs - * 'x': ix + * 'O': lru blocks out */ -#define d9print if(!dbg['9']){}else print -#define dDprint if(!dbg['D']){}else print -#define dFprint if(!dbg['F']){}else print -#define dMprint if(!dbg['M']){}else print -#define dRprint if(!dbg['R']){}else print -#define dWprint if(!dbg['W']){}else print -#define dxprint if(!dbg['x']){}else print -#define dPprint if(!dbg['P']){}else print -extern char dbg[]; +#define d9print(...) if(!dbg['9']){}else fprint(2, __VA_ARGS__) +#define dAprint(...) if(!dbg['A']){}else fprint(2, __VA_ARGS__) +#define dEprint(...) if(!dbg['E']){}else fprint(2, __VA_ARGS__) +#define dFprint(...) if(!dbg['F']){}else fprint(2, __VA_ARGS__) +#define dKprint(...) if(!dbg['K']){}else fprint(2, __VA_ARGS__) +#define dMprint(...) if(!dbg['M']){}else fprint(2, __VA_ARGS__) +#define dPprint(...) if(!dbg['P']){}else fprint(2, __VA_ARGS__) +#define dRprint(...) if(!dbg['R']){}else fprint(2, __VA_ARGS__) +#define dWprint(...) if(!dbg['W']){}else fprint(2, __VA_ARGS__) +#define dXprint(...) if(!dbg['X']){}else fprint(2, __VA_ARGS__) +#define dOprint(...) if(!dbg['O']){}else fprint(2, __VA_ARGS__) +#define dprint(...) if(!dbg['d']){}else fprint(2, __VA_ARGS__) +extern char dbg[256]; diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/dblk.c --- a/sys/src/cmd/creepy/dblk.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/dblk.c Wed Mar 07 15:41:27 2012 +0000 @@ -17,10 +17,12 @@ * see dk.h */ +int swreaderr, swwriteerr; + void checktag(u64int tag, uint type, u64int addr) { - if(tag != TAG(addr,type)){ + if(tag != TAG(type, addr)){ fprint(2, "%s: bad tag: %#ullx != %#ux d%#ullx pc = %#p\n", argv0, tag, type, addr, getcallerpc(&tag)); abort(); @@ -28,33 +30,47 @@ } } +void +okaddr(u64int addr) +{ + if((addr&Fakeaddr) == 0 && (addr < Dblksz || addr >= fs->limit)) + error("okaddr %#ullx", addr); +} void -dbclear(u64int addr, int type) +okdiskaddr(u64int addr) +{ + if((addr&Fakeaddr) != 0 || addr < Dblksz || addr >= fs->limit) + fatal("okdiskaddr %#ullx", addr); +} + +void +dbclear(u64int tag, int type, u64int addr) { static Diskblk d; static QLock lk; - dDprint("dbclear d%#ullx type %s\n", addr, tname(type)); - qlock(&lk); - d.tag = TAG(addr, type); - d.epoch = now(); + dprint("dbclear type %s d%#ullx\n", tname(type), addr); + xqlock(&lk); + d.tag = tag; if(pwrite(fs->fd, &d, sizeof d, addr) != Dblksz){ - qunlock(&lk); + xqunlock(&lk); fprint(2, "%s: dbclear: d%#ullx: %r\n", argv0, addr); error("dbclear: d%#ullx: %r", addr); } - qunlock(&lk); + xqunlock(&lk); } void meltedref(Memblk *rb) { - if(canqlock(&fs->rlk)) + if(canqlock(&fs->refs)) fatal("meltedref rlk"); - if(rb->frozen && rb->dirty) + if(rb->frozen){ + dprint("melted ref dirty=%d\n", rb->dirty); dbwrite(rb); - rb->frozen = rb->dirty = 0; + rb->frozen = 0; + } } /* @@ -65,6 +81,10 @@ * in the global list. * This would keep locality and make it less likely that a failure in the * middle of a sync destroyes the entire list. + * + * TODO: If there's a bad address in the free list, we fatal. + * we could throw away the entire free list and continue operation, after + * issuing a warning so the user knows. */ u64int @@ -72,15 +92,39 @@ { u64int addr, naddr; - qlock(fs); + xqlock(fs); if(catcherror()){ - qunlock(fs); + xqunlock(fs); error(nil); } Again: if(fs->super == nil) addr = Dblksz; - else if(fs->super->d.eaddr < fs->limit){ + else if(fs->super->d.free != 0){ + addr = fs->super->d.free; + okdiskaddr(addr); + /* + * Caution: can't acquire new locks while holding the fs lock, + * but dbgetref may allocate blocks. + */ + xqunlock(fs); + if(catcherror()){ + xqlock(fs); /* restore the default in this fn. */ + error(nil); + } + naddr = dbgetref(addr); /* acquires locks */ + if(naddr != 0) + okdiskaddr(naddr); + noerror(); + xqlock(fs); + if(addr != fs->super->d.free){ + /* had a race */ + goto Again; + } + fs->super->d.free = naddr; + fs->super->d.ndfree--; + changed(fs->super); + }else if(fs->super->d.eaddr < fs->limit){ addr = fs->super->d.eaddr; fs->super->d.eaddr += Dblksz; changed(fs->super); @@ -91,33 +135,13 @@ * initialized. */ if(((addr-Dblk0addr)/Dblksz)%Nblkgrpsz == 0){ - dDprint("new ref blk addr = d%#ullx\n", addr); - dbclear(addr, DBref); /* fs initialization */ - addr += Dblksz; - fs->super->d.eaddr += Dblksz; + dprint("new ref blk addr = d%#ullx\n", addr); + /* on-demand fs initialization */ + dbclear(TAG(DBref, addr), DBref, addr); + dbclear(TAG(DBref, addr), DBref, addr+Dblksz); + addr += 2*Dblksz; + fs->super->d.eaddr += 2*Dblksz; } - }else if(fs->super->d.free != 0){ - addr = fs->super->d.free; - - /* - * Caution: can't acquire new locks while holding the fs lock, - * but dbgetref may allocate blocks. - */ - qunlock(fs); - if(catcherror()){ - qlock(fs); /* restore the default in this fn. */ - error(nil); - } - naddr = dbgetref(addr); /* acquires locks */ - noerror(); - qlock(fs); - if(addr != fs->super->d.free){ - /* had a race */ - goto Again; - } - fs->super->d.free = naddr; - fs->super->d.ndfree--; - changed(fs->super); }else{ addr = 0; /* preserve backward compatibility with fossil */ @@ -125,22 +149,16 @@ } noerror(); - qunlock(fs); + xqunlock(fs); okaddr(addr); - dDprint("newblkaddr = d%#ullx\n", addr); + dAprint("newblkaddr = d%#ullx\n", addr); return addr; } u64int addrofref(u64int refaddr, int idx) { - u64int bno; - - bno = (refaddr - Dblk0addr)/Dblksz; - bno *= Nblkgrpsz; - bno += idx; - - return Dblk0addr + bno*Dblksz; + return refaddr + idx*Dblksz; } u64int @@ -152,7 +170,7 @@ bno = addr/Dblksz; *idx = bno%Nblkgrpsz; refaddr = Dblk0addr + bno/Nblkgrpsz * Nblkgrpsz * Dblksz; - dDprint("refaddr d%#ullx = d%#ullx[%d]\n", + if(0)dprint("refaddr d%#ullx = d%#ullx[%d]\n", Dblk0addr + addr, refaddr, *idx); return refaddr; } @@ -161,8 +179,8 @@ * db*ref() functions update the on-disk reference counters. * memory blocks use Memblk.Ref instead. Beware. */ -static u64int -dbaddref(u64int addr, int delta, int set) +u64int +dbaddref(u64int addr, int delta, int set, Memblk **rbp, int *ip) { Memblk *rb; u64int raddr, ref; @@ -170,20 +188,16 @@ if(addr == 0) return 0; - if(addr == Noaddr) /* root doesn't count */ + if(addr&Fakeaddr) /* root and ctl files don't count */ return 0; - if(set != 0) - dDprint("dbsetref %#ullx = %d\n", addr, set); - else if(delta != 0) - dDprint("dbaddref %#ullx += %d\n", addr, delta); - nodebug(); raddr = refaddr(addr, &i); rb = dbget(DBref, raddr); - qlock(&fs->rlk); + + xqlock(&fs->refs); if(catcherror()){ mbput(rb); - qunlock(&fs->rlk); + xqunlock(&fs->refs); debug(); error(nil); } @@ -194,44 +208,144 @@ else rb->d.ref[i] += delta; rb->dirty = 1; - if(delta < 0 && rb->d.ref[i] == 0){ - qlock(fs); - rb->d.ref[i] = fs->super->d.free; - fs->super->d.free = addr; - fs->super->d.ndfree++; - qunlock(fs); - } } ref = rb->d.ref[i]; + if(set != 0) + dAprint("dbsetref %#ullx -> %d\n", addr, set); + else if(delta != 0) + dAprint("dbaddref %#ullx += %d -> %ulld\n", addr, delta, ref); noerror(); - qunlock(&fs->rlk); - mbput(rb); - debug(); + xqunlock(&fs->refs); + if(rbp == nil) + mbput(rb); + else + *rbp = rb; + if(ip != nil) + *ip = i; return ref; } u64int dbgetref(u64int addr) { - return dbaddref(addr, 0, 0); + return dbaddref(addr, 0, 0, nil, nil); } void dbsetref(u64int addr, int ref) { - dbaddref(addr, 0, ref); + dbaddref(addr, 0, ref, nil, nil); } u64int dbincref(u64int addr) { - return dbaddref(addr, +1, 0); + return dbaddref(addr, +1, 0, nil, nil); } +/* + * Drop a on-disk reference. + * When no references are left, the block is unlinked from the hash + * (and its hash ref released), and disk references to blocks pointed to by + * this blocks are also decremented (and perhaps such blocks released). + * + * More complex than needed, because we don't want to read a data block + * just to release a reference to it + * b may be nil if type and addr are given. + */ u64int -dbdecref(u64int addr) +dbput(Memblk *b, int type, u64int addr) { - return dbaddref(addr, -1, 0); + u64int ref; + Memblk *mb, *rb; + int i, idx; + + if(b == nil && addr == 0) + return 0; + + okdiskaddr(addr); + ref = dbgetref(addr); + dKprint("dbput d%#010ullx dr %#ullx type %s\n", addr, ref, tname(type)); + if(ref > 2*Dblksz) + fatal("dbput: d%#010ullx: double free", addr); + + ref = dbaddref(addr, -1, 0, &rb, &idx); + if(ref != 0){ + mbput(rb); + return ref; + } + /* + * Gone from disk, be sure it's also gone from memory. + */ + if(catcherror()){ + mbput(rb); + error(nil); + } + mb = b; + if(mb == nil){ + if(type != DBdata) + mb = dbget(type, addr); + else + mb = mbget(type, addr, 0); + } + if(mb != nil) + assert(type == mb->type && addr == mb->addr); + dAprint("dbput: ref = 0 %H\n", mb); + + if(mb != nil) + mbunhash(mb, 0); + if(catcherror()){ + if(mb != b) + mbput(mb); + error(nil); + } + switch(type){ + case DBsuper: + case DBref: + fatal("dbput: super or ref"); + case DBdata: + case DBattr: + break; + case DBfile: + if(0)dbput(nil, DBattr, mb->d.aptr); + for(i = 0; i < nelem(mb->d.dptr); i++) + dbput(nil, DBdata, mb->d.dptr[i]); + for(i = 0; i < nelem(mb->d.iptr); i++) + dbput(nil, DBptr0+i, mb->d.iptr[i]); + break; + default: + if(type < DBptr0 || type >= DBptr0+Niptr) + fatal("dbput: type %d", type); + for(i = 0; i < Dptrperblk; i++) + dbput(nil, mb->type-1, mb->d.ptr[i]); + } + noerror(); + if(mb != b) + mbput(mb); + xqlock(fs); + xqlock(&fs->refs); + rb->d.ref[idx] = fs->super->d.free; + fs->super->d.free = addr; + fs->super->d.ndfree++; + xqunlock(&fs->refs); + xqunlock(fs); + noerror(); + mbput(rb); + + return ref; +} + +static u64int +newfakeaddr(void) +{ + static u64int addr = ~0; + u64int n; + + xqlock(fs); + addr -= Dblksz; + n = addr; + xqunlock(fs); + return n|Fakeaddr; } Memblk* @@ -239,32 +353,30 @@ { Memblk *b; u64int addr; - int root; + int ctl; - nodebug(); - - root = (type == Noaddr); - addr = Noaddr; - if(root) + ctl = type == DBctl; + if(ctl){ type = DBfile; - else + addr = newfakeaddr(); + }else addr = newblkaddr(); b = mballoc(addr); - b->d.tag = TAG(b->addr,type); + b->d.tag = TAG(type, b->addr); + b->type = type; if(catcherror()){ mbput(b); debug(); error(nil); } - changed(b); - if(addr != Noaddr && addr >= Dblk0addr) + if((addr&Fakeaddr) == 0 && addr >= Dblk0addr) dbsetref(addr, 1); if(type == DBfile) b->mf = anew(&mfalloc); b = mbhash(b); + changed(b); noerror(); - debug(); - dDprint("dballoc %s -> %H\n", tname(type), b); + dAprint("dballoc %s -> %H\n", tname(type), b); return b; } @@ -295,22 +407,44 @@ u.i = 0x1122334455667788ULL; if(u.m[0] != 0x88) fatal("fix hosttodisk/disktohost for big endian"); - checktag(b->d.tag, TAGTYPE(b->d.tag), b->addr); + checktag(b->d.tag, b->type, b->addr); } +/* + * Write the block a b->addr. + * DBrefs are written at even (b->addr) or odd (b->addr+DBlksz) + * reference blocks as indicated by the frozen super block to be written. + */ long dbwrite(Memblk *b) { Memblk *nb; + static int nw; + u64int addr; - dWprint("dbwrite %H\n",b); + if(b->addr&Fakeaddr) + fatal("dbwrite: fake addr %H", b); + if(b->dirty == 0) + return 0; + addr = b->addr; + if(b->type == DBref){ + assert(fs->fzsuper != nil); + if(fs->fzsuper->d.oddrefs) + addr += Dblksz; + } + dWprint("dbwrite at d%#010ullx %H\n",addr, b); nb = hosttodisk(b); - nb->d.epoch = now(); - if(pwrite(fs->fd, &nb->d, sizeof nb->d, nb->addr) != Dblksz){ + if(swwriteerr != 0 && ++nw % swwriteerr == 0){ + fprint(2, "%s: dbwrite: software fault injected\n", argv0); + mbput(nb); + error("dbwrite: sw fault"); + } + if(pwrite(fs->fd, &nb->d, sizeof nb->d, addr) != Dblksz){ mbput(nb); fprint(2, "%s: dbwrite: d%#ullx: %r\n", argv0, b->addr); error("dbwrite: %r"); } + written(b); mbput(nb); return Dblksz; @@ -319,26 +453,37 @@ long dbread(Memblk *b) { - long tot, nr; + static int nr; + long tot, n; uchar *p; + u64int addr; - + if(b->addr&Fakeaddr) + fatal("dbread: fake addr %H", b); p = b->d.ddata; - for(tot = 0; tot < Dblksz; tot += nr){ - nr = pread(fs->fd, p+tot, Dblksz-tot, b->addr + tot); - if(nr == 0) + addr = b->addr; + if(b->type == DBref && fs->super->d.oddrefs) + addr += Dblksz; + for(tot = 0; tot < Dblksz; tot += n){ + if(swreaderr != 0 && ++nr % swreaderr == 0){ + fprint(2, "%s: dbread: software fault injected\n", argv0); + error("dbwrite: sw fault"); + } + n = pread(fs->fd, p+tot, Dblksz-tot, addr + tot); + if(n == 0) werrstr("eof on disk file"); - if(nr <= 0){ + if(n <= 0){ fprint(2, "%s: dbread: d%#ullx: %r\n", argv0, b->addr); error("dbread: %r"); } } - assert(tot == sizeof b->d); + assert(tot == sizeof b->d && tot == Dblksz); + dRprint("dbread from d%#010ullx tag %#ullx %H\n", addr, b->d.tag, b); disktohost(b); - if(TAGTYPE(b->d.tag) != DBref) + if(b->type != DBref) b->frozen = 1; - dRprint("dbread %H\n", b); + return tot; } @@ -349,33 +494,30 @@ dMprint("dbget %s d%#ullx\n", tname(type), addr); okaddr(addr); - b = mbget(addr, 1); + b = mbget(type, addr, 1); if(b == nil) error("i/o error"); - if(TAGTYPE(b->d.tag) != DBnew){ - if(TAGTYPE(b->d.tag) != type) - fatal("dbget: bug: type %d tag %#ullx", type, b->d.tag); + if(b->loading == 0) return b; - } /* the file is new, must read it */ if(catcherror()){ - b->d.tag = TAG(addr, DBnew); - qunlock(&b->newlk); /* awake those waiting for it */ - mbput(b); /* our ref and the hash ref */ + xqunlock(&b->newlk); /* awake those waiting for it */ + mbunhash(b, 0); /* put our ref and the hash ref */ mbput(b); error(nil); } dbread(b); checktag(b->d.tag, type, addr); + assert(b->type == type); if(type == DBfile){ assert(b->mf == nil); b->mf = anew(&mfalloc); gmeta(b->mf, b->d.embed, Embedsz); - b->written = 1; } + b->loading = 0; noerror(); - qunlock(&b->newlk); + xqunlock(&b->newlk); return b; } @@ -388,35 +530,34 @@ d = p; for(i = 0; i < n; i++) if(d[i].file != 0){ - dDprint("add ref on melt d%#ullx\n", d[i].file); + dprint("add ref on melt d%#ullx\n", d[i].file); dbincref(d[i].file); } } + /* * caller responsible for locking. - * On errors we leak disk blocks because of added references. + * On errors we may leak disk blocks because of added references. */ Memblk* dbdup(Memblk *b) { Memblk *nb; - uint type; int i; Mfile *nm; ulong doff; - type = TAGTYPE(b->d.tag); - nb = dballoc(type); + nb = dballoc(b->type); if(catcherror()){ mbput(nb); error(nil); } - switch(type){ + switch(b->type){ case DBfree: case DBref: case DBsuper: case DBattr: - fatal("dbdup: %s", tname(type)); + fatal("dbdup: %s", tname(b->type)); case DBdata: memmove(nb->d.data, b->d.data, Dblkdatasz); break; @@ -453,8 +594,8 @@ } break; default: - if(type < DBptr0 || type >= DBptr0 + Niptr) - fatal("dbdup: bad type %d", type); + if(b->type < DBptr0 || b->type >= DBptr0 + Niptr) + fatal("dbdup: bad type %d", b->type); for(i = 0; i < Dptrperblk; i++){ nb->d.ptr[i] = b->d.ptr[i]; if(nb->d.ptr[i] != 0) @@ -463,6 +604,14 @@ } changed(nb); noerror(); + + /* when b is a frozen block, it's likely we won't use it more, + * because we now have a melted one. + * pretend it's the lru one. + */ + if(b->frozen) + mbunused(b); + return nb; } diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/dk.h --- a/sys/src/cmd/creepy/dk.h Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/dk.h Wed Mar 07 15:41:27 2012 +0000 @@ -17,22 +17,9 @@ typedef struct Path Path; typedef struct Alloc Alloc; typedef struct Next Next; - -/* - * these are used by several functions that have flags to indicate - * mem-only, also on disk; and read-access/write-access. (eg. dfmap). - */ -enum{ - Mem=0, - Disk, - - Rd=0, - Wr, - No, -}; - -#define HOWMANY(x, y) (((x)+((y)-1))/(y)) -#define ROUNDUP(x, y) (HOWMANY((x), (y))*(y)) +typedef struct Lstat Lstat; +typedef struct List List; +typedef struct Link Link; /* * Conventions: @@ -41,12 +28,26 @@ * - Ref is used for in-memory RCs. This has nothing to do with on-disk refs. * - Mem refs include the reference from the hash. That one keeps the file * loaded in memory while unused. - * - The hash ref also accounts for the lru list and list of DBref blocks. + * - The hash ref also accounts for refs from the lru/ref/dirty lists. * - Disk refs count only references within the tree on disk. + * - There are two copies of disk references, even, and odd. + * Only one of them is active. Every time the system is written, + * the inactive copy becomes active and vice-versa. Upon errors, + * the active copy on disk is always coherent because the super is + * written last. * - Children do not add refs to parents; parents do not add ref to children. * - 9p, fscmd, ix, and other top-level shells for the fs are expected to * keep Paths for files in use, so that each file in the path * is referenced once by the path + * - example, on debug fsdump()s: + * r=2 -> 1 (from hash) + 1 (while dumping the file info). + * (block is cached, in the hash, but unused otherwise). + * r=3 in /active: 1 (hash) + 1(fs->active) + 1(dump) + * r is greater: + * - some fid is referencing the block + * - it's a melt and the frozen f->mf->melted is a ref. + * - some rpc is using it (reading/writing/...) + * * Assumptions: * - /active is *never* found on disk, it's memory-only. * - b->addr is worm. @@ -79,11 +80,57 @@ * All the code assumes outofmemoryexits = 1. */ +/* + * these are used by several functions that have flags to indicate + * mem-only, also on disk; and read-access/write-access. (eg. dfmap). + */ +enum{ + Mem=0, + Disk, + + Rd=0, + Wr, + + Tqlock = 0, + Trwlock, + Tlock, +}; + + +struct Lstat +{ + int type; + uintptr pc; + int ntimes; + int ncant; + vlong wtime; +}; + + + +#define HOWMANY(x, y) (((x)+((y)-1))/(y)) +#define ROUNDUP(x, y) (HOWMANY((x), (y))*(y)) + +/* + * ##### On disk structures. ##### + * + * All on-disk integer values are little endian. + * + * blk 0: unused + * blk 1: super + * even ref blk + odd ref blk + Nblkgrpsz-2 blocks + * ... + * even ref blk + odd ref blk + Nblkgrpsz-2 blocks + * + * The code assumes these structures are packed. + * Be careful if they are changed to make things easy for the + * compiler and keep them naturally aligned. + */ + enum { /* block types */ DBfree = 0, - DBnew, /* never found on disk */ DBref, DBattr, DBfile, @@ -93,24 +140,14 @@ /* double */ /* triple */ /*...*/ + DBctl = ~0, /* DBfile, never on disk. arg for dballoc */ + + Dblkhdrsz = 2*BIT64SZ, + Nblkgrpsz = (Dblksz - Dblkhdrsz) / BIT64SZ, + Dblk0addr = 2*Dblksz, + }; -/* - * ##### On disk structures. ##### - * - * All on-disk integer values are little endian. - * - * blk 0: unused - * blk 1: super - * ref blk + Nblkgrpsz-1 blocks - * ... - * ref blk + Nblkgrpsz-1 blocks - * - * The code assumes these structures are packed. - * Be careful if they are changed to make things easy for the - * compiler and keep them naturally aligned. - */ - struct Ddatablk { uchar data[1]; /* raw memory */ @@ -200,6 +237,7 @@ u64int free; /* first free block on list */ u64int eaddr; /* end of the assigned disk portion */ u64int root; /* address of /archive in disk */ + u64int oddrefs; /* use odd ref blocks? or even ref blocks? */ u64int ndfree; /* # of blocks in free list */ u64int dblksz; /* only for checking */ u64int nblkgrpsz; /* only for checking */ @@ -215,10 +253,14 @@ enum { - Noaddr = ~0UL /* null address, for / */ + /* addresses for ctl files and / have this bit set, and are never + * found on disk. + */ + Fakeaddr = 0x8000000000000000ULL, + Noaddr = ~0ULL, }; -#define TAG(addr,type) ((addr)<<8|((type)&0x7F)) +#define TAG(type,addr) ((addr)<<8|((type)&0x7F)) #define TAGTYPE(t) ((t)&0x7F) #define TAGADDROK(t,addr) (((t)&~0xFF) == ((addr)<<8)) @@ -228,12 +270,10 @@ /* * header for all disk blocks. - * Those using on-disk references keep them at a DBref block */ struct Diskblkhdr { u64int tag; /* block tag */ - u64int epoch; /* block epoch */ }; union Diskblk @@ -296,8 +336,26 @@ Fmeta; Memblk* melted; /* next version for this one, if frozen */ - ulong lastbno; /* help for RA */ + ulong lastbno; /* last accessed block nb within this file */ + ulong sequential; /* access has been sequential */ + int open; /* for DMEXCL */ + uvlong raoffset; /* we did read ahead up to this offset */ + int wadone; /* we did walk ahead here */ +}; + +struct List +{ + QLock; + Memblk *hd; + Memblk *tl; + long n; +}; + +struct Link +{ + Memblk *lprev; + Memblk *lnext; }; /* @@ -309,19 +367,17 @@ u64int addr; /* block address */ Memblk *next; /* in hash or free list */ - union{ - Memblk *rnext; /* in list of DBref blocks */ - Mfile *mf; /* DBfile on memory info. */ - }; + Link; /* lru / dirty / ref lists */ + Mfile *mf; /* DBfile on-memory info. */ + + int type; int dirty; /* must be written */ int frozen; /* is frozen */ - int written; /* no need to scan this for dirties */ + int loading; /* block is being read */ + int changed; /* for freerefs/writerefs */ + QLock newlk; /* only to wait on DBnew blocks */ - Memblk *lnext; /* list from fs->mru -> fs->lru */ - Memblk *lprev; - - QLock newlk; /* only to wait on DBnew blocks */ Diskblk d; }; @@ -349,18 +405,15 @@ usize nablk; /* # of entries allocated */ usize nmused; /* blocks in use */ usize nmfree; /* free blocks */ - Memblk *free; /* free list of unused blocks in blk */ - QLock llk; - Memblk *lru; - Memblk *mru; + List lru; /* hd: mru; tl: lru */ + List mdirty; /* dirty blocks, not on lru */ + List refs; /* DBref blocks, not in lru nor dirty lists */ QLock mlk; Mfile *mfree; /* unused list */ - QLock rlk; - Memblk *refs; /* list of DBref blocks (also hashed) */ Memblk *super; /* locked by blklk */ Memblk *root; /* only in memory */ @@ -376,8 +429,14 @@ char *dev; /* name for disk */ int fd; /* of disk */ u64int limit; /* address for end of disk */ + usize ndblk; /* # of disk blocks in dev */ int config; /* config mode enabled */ + + int nindirs[Niptr]; /* stats */ + int nmelts; + + uchar *chk; /* for fscheck() */ }; /* @@ -425,10 +484,13 @@ #pragma varargck type "H" Memblk* +/* used in debug prints to print just part of huge values */ +#define EP(e) ((e)&0xFFFFFFFFUL) + typedef int(*Blkf)(Memblk*); extern Fsys*fs; extern uvlong maxfsz; extern char*defaultusers; -extern Alloc mfalloc; +extern Alloc mfalloc, pathalloc; diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/fblk.c --- a/sys/src/cmd/creepy/fblk.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/fblk.c Wed Mar 07 15:41:27 2012 +0000 @@ -21,39 +21,27 @@ void rwlock(Memblk *f, int iswr) { - if(iswr == No) - return; - if(iswr) - wlock(f->mf); - else - rlock(f->mf); + xrwlock(f->mf, iswr); } void rwunlock(Memblk *f, int iswr) { - if(iswr == No) - return; - if(iswr) - wunlock(f->mf); - else - runlock(f->mf); + xrwunlock(f->mf, iswr); } void isfile(Memblk *f) { - if(TAGTYPE(f->d.tag) != DBfile || f->mf == nil) + if(f->type != DBfile || f->mf == nil) fatal("isfile: not a file at pc %#p", getcallerpc(&f)); } void isrwlocked(Memblk *f, int iswr) { - if(TAGTYPE(f->d.tag) != DBfile || f->mf == nil) + if(f->type != DBfile || f->mf == nil) fatal("isrwlocked: not a file at pc %#p", getcallerpc(&f)); - if(iswr == No) - return; if((iswr && canrlock(f->mf)) || (!iswr && canwlock(f->mf))) fatal("is%clocked at pc %#p", iswr?'w':'r', getcallerpc(&f)); } @@ -61,7 +49,7 @@ void isdir(Memblk *f) { - if(TAGTYPE(f->d.tag) != DBfile || f->mf == nil) + if(f->type != DBfile || f->mf == nil) fatal("isdir: not a file at pc %#p", getcallerpc(&f)); if((f->mf->mode&DMDIR) == 0) fatal("isdir: not a dir at pc %#p", getcallerpc(&f)); @@ -70,7 +58,7 @@ void isnotdir(Memblk *f) { - if(TAGTYPE(f->d.tag) != DBfile || f->mf == nil) + if(f->type != DBfile || f->mf == nil) fatal("isnotdir: not a file at pc %#p", getcallerpc(&f)); if((f->mf->mode&DMDIR) != 0) fatal("isnotdir: dir at pc %#p", getcallerpc(&f)); @@ -104,9 +92,10 @@ dupdentries(nb->d.data, Dblkdatasz/sizeof(Dentry)); USED(&nb); /* for error() */ *addrp = nb->addr; + dbput(b, b->type, b->addr); + noerror(); incref(nb); - dbdecref(b->addr); - noerror(); + mbput(b); return nb; } @@ -135,7 +124,11 @@ ismelted(f); isdir = (f->mf->mode&DMDIR); - f->mf->lastbno = bno; + if(bno != f->mf->lastbno){ + f->mf->sequential = (!mkit && bno == f->mf->lastbno + 1); + f->mf->lastbno = bno; + } + /* * bno: block # relative to the the block we are looking at. * prev: # of blocks before the current one. @@ -168,7 +161,7 @@ } if(i == nelem(f->d.iptr)) error("offset exceeds file capacity"); - + ainc(&fs->nindirs[i]); type = DBptr0+i; dFprint("dfblk: indirect %s nblks %uld (ppb %ud) bno %uld\n", tname(type), nblks, Dptrperblk, bno); @@ -234,7 +227,7 @@ ismelted(f); isnotdir(f); - dDprint("dfdropblks: could remove d%#ullx[%uld:%uld]\n", + dprint("dfdropblks: could remove d%#ullx[%uld:%uld]\n", f->addr, bno, bend); /* * Instead of releasing the references on the data blocks, @@ -352,7 +345,7 @@ f, off, len, iswr?'w':'r', sl.len); return sl; } - if(TAGTYPE(sl.b->d.tag) == DBfile) + if(sl.b->type == DBfile) dFprint("slice m%#p[%#ullx:+%#ulx]%c -> m%#p:e+%#uld[%#ulx]\n", f, off, len, iswr?'w':'r', sl.b, (uchar*)sl.data - sl.b->d.embed, sl.len); @@ -387,7 +380,7 @@ } noerror(); updatesize(d, lastoff); - changed(d); + changed(d); /*paranoia: caller of dfchdentry calls dfchanged*/ } /* @@ -405,7 +398,7 @@ uvlong off; int i; - dDprint("dfchdentry d%#ullx -> d%#ullx\nin %H\n", addr, naddr, d); + dAprint("dfchdentry d%#ullx -> d%#ullx\nin %H\n", addr, naddr, d); isrwlocked(d, iswr); isdir(d); @@ -414,11 +407,15 @@ sl = dfslice(d, Dblkdatasz, off, iswr); if(sl.len == 0) break; - if(sl.b == 0){ + if(sl.b == nil){ if(addr == 0 && !iswr) return off; continue; } + if(catcherror()){ + mbput(sl.b); + error(nil); + } de = sl.data; for(i = 0; i < sl.len/sizeof(Dentry); i++){ if(de[i].file == addr){ @@ -429,11 +426,13 @@ de[i].file = naddr; changed(sl.b); } + noerror(); mbput(sl.b); return off + i*sizeof(Dentry); } } off += sl.len; + noerror(); mbput(sl.b); } if(iswr) @@ -481,7 +480,7 @@ addr = dfdirnth(f, n); if(addr == 0) return nil; - b = mbget(addr, 0); + b = mbget(DBfile, addr, 0); if(b != nil || disktoo == 0) return b; return dbget(DBfile, addr); @@ -510,11 +509,10 @@ isdir(d); dfchdentry(d, 0, f->addr, Wr); - changed(d); } /* - * does not dbdecref(f) + * does not dbput(f) * caller locks both d and f */ void @@ -524,7 +522,6 @@ isdir(d); dfchdentry(d, f->addr, 0, Wr); - changed(d); } /* @@ -540,7 +537,6 @@ uvlong off; int i; - dDprint("dfwalk '%s' at %H\n", name, d); if(strcmp(name, "..") == 0) fatal("dfwalk: '..'"); isdir(d); @@ -556,6 +552,7 @@ if(sl.b == nil) continue; if(catcherror()){ + dprint("dfwalk d%#ullx '%s': %r\n", d->addr, name); mbput(sl.b); error(nil); } @@ -584,6 +581,7 @@ error("file not found"); done: + dprint("dfwalk d%#ullx '%s' -> d%#ullx\n", d->addr, name, f->addr); return f; } @@ -638,6 +636,7 @@ f = *fp; if(!f->frozen) return p; + ainc(&fs->nmelts); rwunlock(f, Wr); /* @@ -647,13 +646,23 @@ * /active is special, because it's only frozen temporarily while * creating a frozen version of the tree. Instead of melting it, * we should just wait for it. + * p[0] is / + * p[1] is /active */ - followmelted(&p->f[1]); - + for(;;){ + followmelted(&p->f[1]); + if(p->f[1]->frozen == 0) + break; + rwunlock(p->f[1], Wr); + yield(); + } + /* + * At loop header, parent is p->f[i-1], melted and wlocked. + * At the end of the loop, p->f[i] is melted and wlocked. + */ for(i = 2; i < nth; i++){ followmelted(&p->f[i]); - f = p->f[i]; - if(!f->frozen){ + if(!p->f[i]->frozen){ rwunlock(p->f[i-1], Wr); continue; } @@ -662,21 +671,54 @@ rwunlock(p->f[i], Wr); error(nil); } - nf = dbdup(f); + + nf = dbdup(p->f[i]); rwlock(nf, Wr); + if(catcherror()){ rwunlock(nf, Wr); mbput(nf); error(nil); } - dfchdentry(p->f[i-1], f->addr, nf->addr, 1); + dfchdentry(p->f[i-1], p->f[i]->addr, nf->addr, 1); + noerror(); + noerror(); /* committed */ - rwunlock(f, Wr); - rwunlock(p->f[i-1], Wr); + rwunlock(p->f[i-1], Wr); /* parent */ + rwunlock(p->f[i], Wr); /* old frozen version */ + f = p->f[i]; + p->f[i] = nf; + assert(f->ref > 1); + mbput(f); /* ref from path */ if(!catcherror()){ - dbdecref(f->addr); + dbput(f, f->type, f->addr); /* p->f[i] ref from disk */ noerror(); } } return p; } + +/* + * Report that a file has been modified. + * Modification times propagate up to the root of the file tree. + */ +void +dfchanged(Path *p) +{ + Memblk *f; + u64int t; + int i; + + t = now(); + for(i = 0; i < p->nf; i++){ + f = p->f[i]; + rwlock(f, Wr); + if(f->frozen == 0) + if(!catcherror()){ + wmtime(f, &t, sizeof t); + watime(f, &t, sizeof t); + noerror(); + } + rwunlock(f, Wr); + } +} diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/file.c --- a/sys/src/cmd/creepy/file.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/file.c Wed Mar 07 15:41:27 2012 +0000 @@ -18,15 +18,6 @@ */ static void -dfchanged(Memblk *f) -{ - isfile(f); - changed(f); - wmtime(f, &f->d.epoch, sizeof f->d.epoch); - watime(f, &f->d.epoch, sizeof f->d.epoch); -} - -static void dfused(Memblk *f) { u64int t; @@ -36,58 +27,70 @@ wmtime(f, &t, sizeof t); } +/* + * May be called with null parent, for root and ctl files. + * The first call with a null parent is root, all others are ctl + * files linked at root. + */ Memblk* dfcreate(Memblk *parent, char *name, char *uid, ulong mode) { - Memblk *b; + Memblk *nf; Mfile *m; + int isctl; if(fsfull()) error("file system full"); + isctl = parent == nil; + if(parent == nil) + parent = fs->root; if(parent != nil){ - dDprint("dfcreate '%s' %M at\n%H\n", name, mode, parent); + dprint("dfcreate '%s' %M at\n%H\n", name, mode, parent); isdir(parent); isrwlocked(parent, Wr); ismelted(parent); - b = dballoc(DBfile); - }else{ - dDprint("dfcreate '%s' %M", name, mode); - b = dballoc(Noaddr); /* root */ - } + }else + dprint("dfcreate '%s' %M", name, mode); + + if(isctl) + nf = dballoc(DBctl); + else + nf = dballoc(DBfile); if(catcherror()){ - mbput(b); + mbput(nf); if(parent != nil) rwunlock(parent, Wr); error(nil); } - m = b->mf; - m->id = b->d.epoch; + m = nf->mf; + m->id = now(); m->mode = mode; - m->mtime = b->d.epoch; + m->mtime = m->id; + m->atime = m->id; m->length = 0; m->uid = uid; m->gid = uid; m->muid = uid; m->name = name; - b->d.asize = pmeta(b->d.embed, Embedsz, m); - dfchanged(b); + nf->d.asize = pmeta(nf->d.embed, Embedsz, m); + changed(nf); if(parent != nil){ m->gid = parent->mf->uid; - dflink(parent, b); - dfchanged(parent); + dflink(parent, nf); } noerror(); - dDprint("dfcreate-> %H\n", b); - incref(b); /* initial ref for tree; this for caller */ - return b; + dprint("dfcreate-> %H\n", nf); + return nf; } void dfremove(Memblk *p, Memblk *f) { + vlong n; + /* funny as it seems, we may need extra blocks to melt */ if(fsfull()) error("file system full"); @@ -103,11 +106,12 @@ error(nil); } dfunlink(p, f); - /* can't fail now. it's unlinked */ + /* shouldn't fail now. it's unlinked */ noerror(); rwunlock(f, Wr); if(!catcherror()){ - dfreclaim(f); + n = dfreclaim(f); + dprint("dfreclaim d%#ullx: %lld blks\n", f->addr, n); noerror(); } mbput(f); @@ -161,17 +165,9 @@ changed(sl.b); mbput(sl.b); } - dfchanged(f); return tot; } -/* - * Called only by dfwattr(), for "length", to - * adjust the file data structure before actually - * updating the file length attribute. - * Should return the size in use. - */ - static int ptrmap(u64int addr, int nind, Blkf f, int isdisk) { @@ -184,7 +180,7 @@ if(isdisk) b = dbget(DBdata+nind, addr); else{ - b = mbget(addr, 0); + b = mbget(DBdata+nind, addr, 0); if(b == nil) return 0; /* on disk */ } @@ -193,61 +189,26 @@ error(nil); } tot = 0; - if(f(b) == 0){ + if(f == nil || f(b) == 0){ tot++; - if(nind > 0) + /* we might sweep an entire disk and run out of blocks */ + if(isdisk) + fslru(); + if(nind > 0){ for(i = 0; i < Dptrperblk; i++) tot += ptrmap(b->d.ptr[i], nind-1, f, isdisk); + } } noerror(); mbput(b); return tot; } -static int -fdumpf(Memblk *f) -{ - extern int mbtab; - - isfile(f); - mbtab++; - return 0; -} - -static int -bdumpf(Memblk*) -{ - return 0; -} - -static int -fdumpedf(Memblk *) -{ - extern int mbtab; - - mbtab--; - return 0; -} - /* - * XXX: We must get rid of dfmap. - * There are few uses and they are already too different. - * for example, for dfdump, we want to call fslowmem() now and then, - * so that if we read the entire disk to dump it, we have no problem. + * CAUTION: debug: no locks. */ int -dfdump(Memblk *f, int disktoo) -{ - int n; - - incref(f); - n = dfmap(f, fdumpf, fdumpedf, bdumpf, disktoo, No); - decref(f); - return n; -} - -int -dfmap(Memblk *f, Blkf pre, Blkf post, Blkf bf, int isdisk, int lk) +dfdump(Memblk *f, int isdisk) { int i; Memblk *b; @@ -256,29 +217,15 @@ extern int mbtab; isfile(f); - rwlock(f, lk); - if(catcherror()){ - rwunlock(f, lk); - error(nil); - } - if(pre != nil && pre(f) < 0){ - noerror(); - rwunlock(f, lk); - return 0; - } tot = 1; - if(bf != nil){ - for(i = 0; i < nelem(f->d.dptr); i++) - tot += ptrmap(f->d.dptr[i], 0, bf, isdisk); - for(i = 0; i < nelem(f->d.iptr); i++) - tot += ptrmap(f->d.iptr[i], i+1, bf, isdisk); - } - if(pre == fdumpf){ /* kludge */ - mbtab--; - print("%H\n", f); + /* visit the blocks to fetch them if needed. */ + for(i = 0; i < nelem(f->d.dptr); i++) + tot += ptrmap(f->d.dptr[i], 0, nil, isdisk); + for(i = 0; i < nelem(f->d.iptr); i++) + tot += ptrmap(f->d.iptr[i], i+1, nil, isdisk); + fprint(2, "%H\n", f); + if((f->mf->mode&DMDIR) != 0){ mbtab++; - } - if((f->mf->mode&DMDIR) != 0){ child = dfchild; if(!isdisk) child = mfchild; @@ -287,21 +234,22 @@ if(b == nil) continue; if(!catcherror()){ - tot += dfmap(b, pre, post, bf, isdisk, lk); + tot += dfdump(b, isdisk); noerror(); } mbput(b); } + mbtab--; } - if(post != nil) - post(f); - noerror(); - rwunlock(f, lk); + + /* we might sweep an entire disk and run out of blocks */ + if(isdisk) + fslru(); return tot; } static int -bfreezef(Memblk *b) +bfreeze(Memblk *b) { if(b->frozen) return -1; @@ -309,118 +257,194 @@ return 0; } +int +dffreeze(Memblk *f) +{ + int i; + Memblk *b; + long tot; + + isfile(f); + if(f->frozen && f != fs->active && f != fs->archive) + return 0; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + f->frozen = 1; + tot = 1; + for(i = 0; i < nelem(f->d.dptr); i++) + tot += ptrmap(f->d.dptr[i], 0, bfreeze, Mem); + for(i = 0; i < nelem(f->d.iptr); i++) + tot += ptrmap(f->d.iptr[i], i+1, bfreeze, Mem); + if((f->mf->mode&DMDIR) != 0){ + for(i = 0; i < f->mf->length/sizeof(Dentry); i++){ + b = mfchild(f, i); + if(b == nil) + continue; + if(!catcherror()){ + tot += dffreeze(b); + noerror(); + } + mbput(b); + } + } + noerror(); + rwunlock(f, Wr); + return tot; +} + static int -ffreezef(Memblk *f) +countref(u64int addr) { - /* see fsfreeze() */ - if(f->frozen && f != fs->active && f != fs->archive) - return -1; - f->frozen = 1; + ulong idx; + int old; + + idx = addr/Dblksz; + old = fs->chk[idx]; + if(fs->chk[idx] == 0xFE) + fprint(2, "fscheck: d%#010ullx: too many refs, ignoring some\n", + addr); + else + fs->chk[idx]++; + return old; +} + +static int +bcountrefs(Memblk *b) +{ + countref(b->addr); return 0; } -int -dffreeze(Memblk *f) +static void +countfree(u64int addr) { - return dfmap(f, ffreezef, nil, bfreezef, Mem, Wr); + long i; + + i = addr/Dblksz; + if(fs->chk[i] != 0 && fs->chk[i] <= 0xFE) + fprint(2, "fscheck: d%#010ullx: free block in use\n", addr); + else if(fs->chk[i] == 0xFF) + fprint(2, "fscheck: d%#010ullx: double free\n", addr); + else + fs->chk[i] = 0xFF; } -static int -bsyncf(Memblk *b) +void +dfcountfree(void) { - if(b->dirty) - dbwrite(b); - b->dirty = 0; - return 0; + u64int addr; + + dprint("list...\n"); + addr = fs->super->d.free; + while(addr != 0){ + if(addr >fs->limit){ + fprint(2, "fscheck: d%#010ullx: free overflow\n", addr); + break; + } + countfree(addr); + addr = dbgetref(addr); + } + /* heading unused part */ + dprint("hdr...\n"); + for(addr = 0; addr < Dblk0addr; addr += Dblksz) + countfree(addr); + /* DBref blocks */ + dprint("refs...\n"); + for(addr = Dblk0addr; addr < fs->super->d.eaddr; addr += Dblksz*Nblkgrpsz){ + countfree(addr); /* even DBref */ + countfree(addr+Dblksz); /* odd DBref */ + } } -static int -fsyncf(Memblk *f) +void +dfcountrefs(Memblk *f) { - if(f->written) - return -1; - return 0; -} -static int -fsyncedf(Memblk *f) -{ - if((f != fs->archive && !f->frozen) || f->written) - fatal("fsyncf: not frozen or written\n%H\n", f); - if(f->dirty) - dbwrite(f); - f->dirty = 0; - f->written = 1; /* but for errors! */ - return 0; -} + Memblk *b; + int i; -int -dfsync(Memblk *f) -{ - return dfmap(f, fsyncf, fsyncedf, bsyncf, Mem, Rd); -} - -static int -breclaimf(Memblk *b) -{ - if(catcherror()) - return -1; - if(dbdecref(b->addr) != 0){ - noerror(); - return -1; + isfile(f); + if((f->addr&Fakeaddr) == 0 && f->addr >= fs->limit){ + fprint(2, "fscheck: '%s' d%#010ullx: out of range\n", + f->mf->name, f->addr); + return; } - if(b->ref != 1) - fatal("breclaimf: ref is %d", b->ref); + if((f->addr&Fakeaddr) == 0) + if(countref(f->addr) != 0) /* already visited */ + return; /* skip children */ + rwlock(f, Rd); + if(catcherror()){ + fprint(2, "fscheck: '%s' d%#010ullx: data: %r\n", + f->mf->name, f->addr); + rwunlock(f, Rd); + return; + } + for(i = 0; i < nelem(f->d.dptr); i++) + ptrmap(f->d.dptr[i], 0, bcountrefs, Disk); + for(i = 0; i < nelem(f->d.iptr); i++) + ptrmap(f->d.iptr[i], i+1, bcountrefs, Disk); + if(f->mf->mode&DMDIR) + for(i = 0; i < f->mf->length/sizeof(Dentry); i++){ + b = dfchild(f, i); + if(b == nil) + continue; + if(catcherror()) + fprint(2, "fscheck: '%s' d%#010ullx:" + " child[%d]: %r\n", + f->mf->name, f->addr, i); + else{ + dfcountrefs(b); + noerror(); + } + mbput(b); + } noerror(); - return 0; -} - -static int -freclaimf(Memblk *f) -{ - if(dbdecref(f->addr) != 0) - return -1; - if(f->ref != 1) - print("freclaimf: ref is %d\n", f->ref); - return 0; + rwunlock(f, Rd); } /* - * While reclaiming, we drop disk references from the parent - * to the children, but, in memory, - * the parent is never released before releasing the children, - * so clients holding locks within the reclaimed tree should be safe. + * Drop one disk reference for f and reclaim its storage if it's gone. + * The given memory reference is not released. + * For directories, all files contained have their disk references adjusted, + * and they are also reclaimed if no further references exist. */ int dfreclaim(Memblk *f) { - return dfmap(f, freclaimf, nil, breclaimf, Disk, Wr); + int i; + Memblk *b; + long tot; + + isfile(f); + dKprint("dfreclaim %H\n", f); + /* + * Remove children if it's the last disk ref before we drop data blocks. + * No new disk refs may be added, so there's no race here. + */ + tot = 0; + if(dbgetref(f->addr) == 1 && (f->mf->mode&DMDIR) != 0){ + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + for(i = 0; i < f->mf->length/sizeof(Dentry); i++){ + b = dfchild(f, i); + if(b == nil) + continue; + if(!catcherror()){ + tot += dfreclaim(b); + noerror(); + } + mbput(b); + } + noerror(); + rwunlock(f, Wr); + } + + if(dbput(f, f->type, f->addr) == 0) + tot++; + return tot; } - -/* - * DEBUG: no locks. - */ -void -dflist(Memblk *f, char *ppath) -{ - char *path; - Mfile *m; - int i; - Memblk *cf; - - m = f->mf; - if(ppath == nil){ - print("/"); - path = strdup(m->name); - }else - path = smprint("%s/%s", ppath, m->name); - print("%-30s\t%M\t%5ulld\t%s mr=%d dr=%ulld\n", - path, (ulong)m->mode, m->length, m->uid, f->ref, dbgetref(f->addr)); - if(m->mode&DMDIR) - for(i = 0; (cf = dfchild(f, i)) != nil; i++){ - dflist(cf, path); - mbput(cf); - } - free(path); - if(ppath == nil) - print("\n"); -} diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/fns.h --- a/sys/src/cmd/creepy/fns.h Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/fns.h Wed Mar 07 15:41:27 2012 +0000 @@ -2,26 +2,23 @@ extern u64int addrofref(u64int refaddr, int idx); extern void afree(Alloc *a, void *nd); extern void* anew(Alloc *a); -extern void attach(Fid *fid, char *aname, char *uname); extern void changed(Memblk *b); extern void checktag(u64int tag, uint type, u64int addr); -extern void clean(Memblk *b); extern void clearusers(void); extern char* cliworker9p(void *v, void**aux); extern char* cliworkerix(void *v, void**aux); -extern Fid* clone(Cli *cli, Fid *fid, int no); extern Path* clonepath(Path *p); extern void consinit(void); extern void consprint(char *fmt, ...); extern long consread(char *buf, long count); extern long conswrite(char *ubuf, long count); extern Memblk* dballoc(uint type); -extern void dbclear(u64int addr, int type); -extern u64int dbdecref(u64int addr); +extern void dbclear(u64int tag, int type, u64int addr); extern Memblk* dbdup(Memblk *b); extern Memblk* dbget(uint type, u64int addr); extern u64int dbgetref(u64int addr); extern u64int dbincref(u64int addr); +extern u64int dbput(Memblk *b, int type, u64int addr); extern long dbread(Memblk *b); extern void dbsetref(u64int addr, int ref); extern long dbwrite(Memblk *b); @@ -29,15 +26,16 @@ extern void dfaccessok(Memblk *f, char *uid, int bits); extern ulong dfbno(Memblk *f, uvlong off, ulong *boffp); extern void dfcattr(Memblk *f, int op, char *name, void *val, long count); +extern void dfchanged(Path *p); extern u64int dfchdentry(Memblk *d, u64int addr, u64int naddr, int iswr); extern Memblk* dfchild(Memblk *f, int n); +extern void dfcountfree(void); +extern void dfcountrefs(Memblk *f); extern Memblk* dfcreate(Memblk *parent, char *name, char *uid, ulong mode); extern void dfdropblks(Memblk *f, ulong bno, ulong bend); -extern int dfdump(Memblk *f, int disktoo); +extern int dfdump(Memblk *f, int isdisk); extern int dffreeze(Memblk *f); extern void dflink(Memblk *d, Memblk *f); -extern void dflist(Memblk *f, char *ppath); -extern int dfmap(Memblk *f, Blkf pre, Blkf post, Blkf bf, int isdisk, int lk); extern Path* dfmelt(Path **pp, int nth); extern ulong dfpread(Memblk *f, void *a, ulong count, uvlong off); extern ulong dfpwrite(Memblk *f, void *a, ulong count, uvlong *off); @@ -45,36 +43,43 @@ extern int dfreclaim(Memblk *f); extern void dfremove(Memblk *p, Memblk *f); extern Blksl dfslice(Memblk *f, ulong len, uvlong off, int iswr); -extern int dfsync(Memblk *f); extern void dfunlink(Memblk *d, Memblk *f); extern Memblk* dfwalk(Memblk *d, char *name, int iswr); extern long dfwattr(Memblk *f, char *name, void *val, long nval); extern void disktohost(Memblk *b); extern Path* dropelem(Path **pp); +extern void dumpfids(void); +extern void dumplockstats(void); extern void dupdentries(void *p, int n); extern ulong embedattrsz(Memblk *f); extern void fatal(char *fmt, ...); +extern void fidattach(Fid *fid, char *aname, char *uname); +extern Fid* fidclone(Cli *cli, Fid *fid, int no); extern void fidclose(Fid *fid); extern void fidcreate(Fid *fid, char *name, int mode, ulong perm); extern int fidfmt(Fmt *fmt); extern void fidopen(Fid *fid, int mode); -extern long fidread(Fid *fid, void *data, ulong count, vlong offset); +extern void fidrahead(Fid *fid, uvlong offset); +extern long fidread(Fid *fid, void *data, ulong count, vlong offset, Packmeta pack); extern void fidremove(Fid *fid); +extern void fidwahead(Fid *fid); +extern void fidwalk(Fid *fid, char *wname); extern long fidwrite(Fid *fid, void *data, ulong count, uvlong *offset); extern void freerpc(Rpc *rpc); -extern void fsdump(int disktoo); +extern void fscheck(void); +extern void fsdump(int full, int disktoo); extern void fsfmt(char *dev); extern Memblk* fsfreeze(void); extern int fsfull(void); -extern void fslist(void); -extern int fslowmem(void); +extern int fslru(void); extern uvlong fsmemfree(void); extern void fsopen(char *dev); extern void fspolicy(void); extern int fsreclaim(void); -extern void fsstats(void); +extern void fsstats(int); extern void fssync(void); extern Fid* getfid(void* clino, int no); +extern Lstat* getlstat(uintptr pc, int type); extern void gmeta(Fmeta *meta, void *buf, ulong nbuf); extern Memblk* hosttodisk(Memblk *b); extern void isdir(Memblk *f); @@ -86,28 +91,37 @@ extern int ixcallfmt(Fmt *fmt); extern uint ixpack(IXcall *f, uchar *ap, uint nap); extern uint ixpackedsize(IXcall *f); -extern void ixstats(void); +extern void ixstats(int clr); extern uint ixunpack(uchar *ap, uint nap, IXcall *f); extern void listen9pix(char *addr, char* (*cliworker)(void *arg, void **aux)); +extern void lockstats(int on); extern Memblk* mballoc(u64int addr); -extern Memblk* mbdup(Memblk *b); extern int mbfmt(Fmt *fmt); -extern Memblk* mbget(u64int addr, int mkit); +extern Memblk* mbget(int type, u64int addr, int mkit); extern Memblk* mbhash(Memblk *b); extern void mbput(Memblk *b); -extern void mbunhash(Memblk *b); +extern void mbunhash(Memblk *b, int isreclaim); +extern void mbunused(Memblk *b); extern void meltedref(Memblk *rb); +extern void meltfids(void); +extern void meltfids(void); +extern void meltfids(void); +extern int member(char *uid, char *member); +extern int member(char *uid, char *member); extern int member(char *uid, char *member); extern Memblk* mfchild(Memblk *f, int n); +extern void mlistdump(char *tag, List *l); +extern void munlink(List *l, Memblk *b, int isreclaim); extern u64int newblkaddr(void); extern Cli* newcli(char *addr, int fd, int cfd); extern Fid* newfid(void* clino, int no); extern Path* newpath(Memblk *root); extern Rpc* newrpc(void); -extern void ninestats(void); +extern void ninestats(int clr); extern void nodebug(void); extern uvlong now(void); extern void okaddr(u64int addr); +extern void okdiskaddr(u64int addr); extern void ownpath(Path **pp); extern void parseusers(char *u); extern ulong pmeta(void *buf, ulong nbuf, Fmeta *meta); @@ -116,12 +130,19 @@ extern void putpath(Path *p); extern u64int refaddr(u64int addr, int *idx); extern void replied(Rpc *rpc); +extern void rlsedebug(int r); extern int rpcfmt(Fmt *fmt); extern void rwlock(Memblk *f, int iswr); extern void rwunlock(Memblk *f, int iswr); +extern int setdebug(void); extern void srv9pix(char *srv, char* (*cliworker)(void *arg, void **aux)); extern char* tname(int t); -extern void walk(Fid *fid, char *wname); +extern void written(Memblk *b); +extern void xqlock(QLock *q); +extern void xqunlock(QLock *q); +extern void xrwlock(RWLock *rw, int iswr); +extern void xrwunlock(RWLock *rw, int iswr); extern long watime(Memblk *f, void *buf, long); +extern long wid(Memblk *f, void *buf, long); extern long wmtime(Memblk *f, void *buf, long); extern long wname(Memblk *f, void *buf, long len); diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/fscmd.c --- a/sys/src/cmd/creepy/fscmd.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/fscmd.c Wed Mar 07 15:41:27 2012 +0000 @@ -12,6 +12,16 @@ #include "net.h" #include "fns.h" +/* + * HUGE warning: + * these commands do not perform checks at all. + * that means you know what you are doing if you use them. + * e.g., you can create multiple files with the same name + * in the same directory. + * + * This tool is only an aid for testing and debugging. + */ + enum { Nels = 64 @@ -20,6 +30,17 @@ static char *fsdir; static int verb; +int +member(char *uid, char *member) +{ + return strcmp(uid, member); +} + +void +meltfids(void) +{ +} + /* * Walks elems starting at f. * Ok if nelems is 0. @@ -48,6 +69,7 @@ nf = dfwalk(f, elems[i], 0); rwunlock(f, Rd); addelem(&p, nf); + mbput(nf); f = nf; USED(&f); /* in case of error() */ noerror(); @@ -105,7 +127,7 @@ /* * This is unrealistic in that it keeps the file locked * during the entire put. This means that we can only give - * fslowmem() a chance before each put, and not before each + * fslru() a chance before each put, and not before each * write, because everything is going to be in use and dirty if * we run out of memory. */ @@ -142,7 +164,6 @@ m = p->f[p->nf-1]; if(catcherror()){ rwunlock(m, Wr); - mbput(m); error(nil); } f = dfcreate(m, fn, d->uid, d->mode&(DMDIR|0777)); @@ -158,12 +179,12 @@ if((d->mode&DMDIR) == 0){ off = 0; for(;;){ - fslowmem(); + fslru(); nr = read(fd, buf, sizeof buf); if(nr <= 0) break; nw = dfpwrite(f, buf, nr, &off); - dDprint("wrote %ld of %ld bytes\n", nw, nr); + dWprint("wrote %ld of %ld bytes\n", nw, nr); off += nr; } } @@ -202,7 +223,7 @@ if((m->mode&DMDIR) == 0){ off = 0; for(;;){ - fslowmem(); + fslru(); nr = dfpread(f, buf, sizeof buf, off); if(nr <= 0) break; @@ -247,7 +268,7 @@ if((m->mode&DMDIR) == 0){ off = 0; for(;;){ - fslowmem(); + fslru(); nr = dfpread(f, buf, sizeof buf, off); if(nr <= 0) break; @@ -266,15 +287,6 @@ } static void -fsls(int, char**) -{ - if(verb) - fsdump(1); - else - fslist(); -} - -static void fssnap(int, char**) { fssync(); @@ -284,18 +296,13 @@ fsrcl(int, char**) { fsreclaim(); + fssync(); /* commit changes to disk */ } static void -fsdmp(int, char**) +fsdmp(int, char *argv[]) { - fsdump(0); -} - -static void -fsdmpall(int, char**) -{ - fsdump(1); + fsdump(*argv[0] == 'l', strstr(argv[0], "all") != 0); } static void @@ -307,7 +314,7 @@ static void fsout(int, char*[]) { - fslowmem(); + fslru(); } static void @@ -343,7 +350,13 @@ static void fsst(int, char**) { - fsstats(); + fsstats(0); +} + +static void +fschk(int, char**) +{ + fscheck(); } static void @@ -359,15 +372,18 @@ {"put", fsput, 3, "put!src!dst"}, {"get", fsget, 3, "get!dst!src"}, {"cat", fscat, 3, "cat!what"}, - {"ls", fsls, 1, "ls"}, {"dump", fsdmp, 1, "dump"}, - {"dumpall", fsdmpall, 1, "dumpall"}, + {"dumpall", fsdmp, 1, "dumpall"}, + {"ldump", fsdmp, 1, "ldump"}, + {"ldumpall", fsdmp, 1, "ldumpall"}, + {"sync", fssnap, 1, "sync"}, {"snap", fssnap, 1, "snap"}, {"rcl", fsrcl, 1, "rcl"}, {"dbg", fsdbg, 2, "dbg!n"}, {"out", fsout, 1, "out"}, {"rm", fsrm, 2, "rm!what"}, {"stats", fsst, 1, "stats"}, + {"check", fschk, 1, "check"}, }; void @@ -404,14 +420,14 @@ if(catcherror()) fatal("cmd %s: %r", argv[i]); if(verb>1) - fsdump(0); + fsdump(0, 0); print("%% %s\n", argv[i]); nargs = gettokens(argv[i], args, Nels, "!"); for(j = 0; j < nelem(cmds); j++){ if(strcmp(cmds[j].name, argv[i]) != 0) continue; if(cmds[j].nargs != 0 && cmds[j].nargs != nargs) - print("usage: %s\n", cmds[j].usage); + fprint(2, "usage: %s\n", cmds[j].usage); else cmds[j].f(nargs, args); fspolicy(); @@ -419,14 +435,14 @@ } noerror(); if(j == nelem(cmds)){ - print("no such command\n"); + fprint(2, "no such command\n"); for(j = 0; j < nelem(cmds); j++) - print("\t%s\n", cmds[j].usage); + fprint(2, "\t%s\n", cmds[j].usage); break; } } if(verb>1) - fsdump(0); + fsdump(0, 0); noerror(); exits(nil); } diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/fsfmt.c --- a/sys/src/cmd/creepy/fsfmt.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/fsfmt.c Wed Mar 07 15:41:27 2012 +0000 @@ -12,32 +12,40 @@ #include "net.h" #include "fns.h" +int +member(char *uid, char *member) +{ + return strcmp(uid, member); +} + +void +meltfids(void) +{ +} + static void usage(void) { - fprint(2, "usage: %s [-DFLAGS] [-dv]\n", argv0); + fprint(2, "usage: %s [-DFLAGS] [disk]\n", argv0); exits("usage"); } -static char xdbg[256]; -static char zdbg[256]; - void threadmain(int argc, char *argv[]) { + char *dev; int verb; - char *dev; dev = "disk"; verb = 0; ARGBEGIN{ case 'v': - verb++; + verb = 1; break; default: - if(ARGC() >= 'A' && ARGC() <= 'Z'){ - xdbg['d'] = 1; - xdbg[ARGC()] = 1; + if((ARGC() >= 'A' && ARGC() <= 'Z') || ARGC() == '9'){ + dbg['d'] = 1; + dbg[ARGC()] = 1; }else usage(); }ARGEND; @@ -50,13 +58,9 @@ errinit(Errstack); if(catcherror()) fatal("error: %r"); - memmove(dbg, xdbg, sizeof xdbg); fsfmt(dev); - memmove(dbg, zdbg, sizeof zdbg); if(verb) - fsdump(0); - else - fslist(); + fsdump(0, 0); noerror(); exits(nil); } diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/fsys.c --- a/sys/src/cmd/creepy/fsys.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/fsys.c Wed Mar 07 15:41:27 2012 +0000 @@ -16,35 +16,79 @@ * All the code assumes outofmemoryexits = 1. */ +enum +{ + Lru = 0, + Freeze, + Write, + Nfsops, +}; + Fsys *fs; -int fatalaborts = 1; uvlong maxfsz; -void -fatal(char *fmt, ...) +vlong fsoptime[Nfsops]; +ulong nfsopcalls[Nfsops]; + +static char* fsopname[] = { - va_list arg; +[Lru] "lru", +[Freeze] "freeze", +[Write] "write", +}; - va_start(arg, fmt); - vfprint(2, fmt, arg); - va_end(arg); - fprint(2, "\n"); - if(fatalaborts) - abort(); - threadexitsall("fatal"); -} +static uvlong +fsdiskfree(void) +{ + uvlong nfree; -uvlong -now(void) -{ - return nsec(); + xqlock(fs); + nfree = fs->super->d.ndfree; + nfree += (fs->limit - fs->super->d.eaddr)/Dblksz; + xqunlock(fs); + return nfree; } void -okaddr(u64int addr) +fsstats(int) { - if(addr < Dblksz || addr >= fs->limit) - error("okaddr %#ullx", addr); + int i; + + fprint(2, "mblks:\t%4uld nblk %4uld nablk %4uld mused %4uld mfree\n", + fs->nblk, fs->nablk, fs->nmused, fs->nmfree); + fprint(2, "lists:\t%4uld lru %#4uld dirty %#4uld refs %4uld total\n", + fs->lru.n, fs->mdirty.n, fs->refs.n, + fs->lru.n + fs->mdirty.n + fs->refs.n); + fprint(2, "dblks:\t %4ulld dtot %4ulld dfree (%ulld list + %ulld rem)\n", + fs->limit/Dblksz - 1, fsdiskfree(), fs->super->d.ndfree, + (fs->limit - fs->super->d.eaddr)/Dblksz); + fprint(2, "paths:\t%4uld alloc %4uld free (%4uld bytes)\n", + pathalloc.nalloc, pathalloc.nfree, pathalloc.elsz); + fprint(2, "mfs:\t%4uld alloc %4uld free (%4uld bytes)\n", + mfalloc.nalloc, mfalloc.nfree, mfalloc.elsz); + fprint(2, "nmelts:\t%d\n", fs->nmelts); + fprint(2, "nindirs:\t"); + for(i = 0; i < nelem(fs->nindirs); i++) + fprint(2, "%d ", fs->nindirs[i]); + fprint(2, "\n"); + fprint(2, "\n"); + fprint(2, "Fsysmem:\t%uld\n", Fsysmem); + fprint(2, "Dminfree:\t%d\n", Dminfree); + fprint(2, "Dblksz: \t%uld\n", Dblksz); + fprint(2, "Mblksz: \t%ud\n", sizeof(Memblk)); + fprint(2, "Dminattrsz:\t%uld\n", Dminattrsz); + fprint(2, "Nblkgrpsz:\t%uld\n", Nblkgrpsz); + fprint(2, "Dblkdatasz:\t%d\n", Dblkdatasz); + fprint(2, "Embedsz:\t%d\n", Embedsz); + fprint(2, "Dentryperblk:\t%d\n", Dblkdatasz/sizeof(Dentry)); + fprint(2, "Dptrperblk:\t%d\n\n", Dptrperblk); + + for(i = 0; i < nelem(nfsopcalls); i++) + if(nfsopcalls[i] == 0) + fprint(2, "%s:\t0 calls\t0 µs\n", fsopname[i]); + else + fprint(2, "%s:\t%uld calls\t%ulld µs\n", fsopname[i], + nfsopcalls[i], (fsoptime[i]/nfsopcalls[i])/1000); } int @@ -55,52 +99,106 @@ /* * NO LOCKS. debug only + * */ void -fsdump(int disktoo) +fsdump(int full, int disktoo) { - int i; + int i, n, x; Memblk *b; u64int a; + extern int fullfiledumps; + x = fullfiledumps; + fullfiledumps = full; nodebug(); if(fs != nil){ - print("\n\nfsys '%s' limit %#ullx super m%#p root m%#p:\n", + fprint(2, "\n\nfsys '%s' limit %#ullx super m%#p root m%#p:\n", fs->dev, fs->limit, fs->super, fs->root); - print("%H\n", fs->super); + fprint(2, "%H\n", fs->super); dfdump(fs->root, disktoo); - for(b = fs->refs; b != nil; b = b->next) - print("ref %H\n", b); - if(1) + mlistdump("refs", &fs->refs); + if(1){ + n = 0; + fprint(2, "hash:"); for(i = 0; i < nelem(fs->fhash); i++) - for(b = fs->fhash[i].b; b != nil; b = b->next) - print("h[%d] = d%#ullx\n", i, b->addr); - + for(b = fs->fhash[i].b; b != nil; b = b->next){ + if(n++ % 5 == 0) + fprint(2, "\n\t"); + fprint(2, "d%#010ullx ", EP(b->addr)); + } + fprint(2, "\n"); + } } - b = fs->super; - if(b->d.free != 0){ - print("free:"); - for(a = b->d.free; a != 0; a = dbgetref(a)) - print(" d%#ullx", a); - print("\n"); + if(fs->super->d.free != 0){ + fprint(2, "free:"); + i = 0; + for(a = fs->super->d.free; a != 0; a = dbgetref(a)){ + if(i++ % 5 == 0) + fprint(2, "\n\t"); + fprint(2, "d%#010ullx ", EP(a)); + } + fprint(2, "\n"); } - print("mru:"); - for(b = fs->mru; b != nil; b = b->lnext) - print(" d%#ullx", b->addr); - print("\n"); - fsstats(); + mlistdump("mru", &fs->lru); + mlistdump("dirty", &fs->mdirty); + fsstats(0); + fullfiledumps = x; debug(); } +/* + * NO LOCKS: + * The disk FS should be quiescent. + * + * Failed checks are reported but not fixed (but for leaked blocks). + * The user is expected to format the partition and restore contents from venti. + * We might easily remove the dir entries for corrupt files, and restore + */ void -fslist(void) +fscheck(void) { - nodebug(); - print("fsys '%s' blksz %ulld maxfsz %ulld:\n", - fs->dev, fs->super->d.dblksz, maxfsz); - dflist(fs->root, nil); - print("\n"); - debug(); + long i; + u64int n, addr; + + if(fs->chk == nil) + fs->chk = mallocz(fs->ndblk, 1); + else + memset(fs->chk, 0, fs->ndblk); + if(catcherror()){ + fprint(2, "fscheck: %r\n"); + return; + } + + fprint(2, "%s: checking %s...\n", argv0, fs->dev); + dfcountrefs(fs->root); + dprint("countfree...\n"); + dfcountfree(); + + dprint("checks...\n"); + for(addr = 0; addr < fs->super->d.eaddr; addr += Dblksz){ + i = addr/Dblksz; + if(fs->chk[i] == 0){ + fprint(2, "fscheck: d%#010ullx: leak\n", addr); + if(!catcherror()){ + dbsetref(addr, fs->super->d.free); + fs->super->d.free = addr; + noerror(); + } + continue; + } + if(fs->chk[i] == 0xFF) + continue; + n = dbgetref(addr); + if(fs->chk[i] == 0xFE && n < (u64int)0xFE) + fprint(2, "fscheck: d%#010ullx: found >%ud != ref %ulld\n", + addr, fs->chk[i], n); + if(fs->chk[i] < 0xFE && n != fs->chk[i]) + fprint(2, "fscheck: d%#010ullx: found %ud != ref %ulld\n", + addr, fs->chk[i], n); + } + noerror(); + fprint(2, "%s: %s check complete\n", argv0, fs->dev); } static usize @@ -117,26 +215,45 @@ return sz; } -static void -freezerefs(void) -{ - Memblk *rb; - - qlock(&fs->rlk); - for(rb = fs->refs; rb != nil; rb = rb->next) - rb->frozen = 1; - qunlock(&fs->rlk); -} +/* + * To preserve coherency, blocks written are always frozen. + * DBref blocks with RCs and the free block list require some care: + * + * On disk, the super block indicates that even (odd) DBref blocks are active. + * On memory, the super selects even (odd) refs (we read refs from there.) + * To sync... + * 1. we make a frozen super to indicate that odd (even) DBrefs are active. + * 2. we write odd (even) DBref blocks. + * 3. the frozen super is written, indicating that odd (even) refs are in use. + * (The disk is coherent now, pretending to use odd (even) refs). + * 4. The memory super is udpated to select odd (even) DBref blocks. + * (from now on, we are loading refs from odd (even) blocks. + * 5. we update even (odd) DBref blocks, so we can get back to 1. + * with even/odd swapped. + * + */ static void -writerefs(void) +freezesuperrefs(void) { - Memblk *rb; + Memblk *b, *rb; - qlock(&fs->rlk); - for(rb = fs->refs; rb != nil; rb = rb->next) - meltedref(rb); - qunlock(&fs->rlk); + b = mballoc(fs->super->addr); + xqlock(fs); + b->type = fs->super->type; + b->d = fs->super->d; + b->d.oddrefs = !fs->super->d.oddrefs; + assert(fs->fzsuper == nil); + fs->fzsuper = b; + b->frozen = 1; + b->dirty = 1; /* so it's written */ + xqlock(&fs->refs); + for(rb = fs->refs.hd; rb != nil; rb = rb->lnext){ + rb->frozen = 1; + rb->changed = rb->dirty; + } + xqunlock(&fs->refs); + xqunlock(fs); } static Memblk* @@ -172,54 +289,6 @@ return super; } -static void -freezesuper(void) -{ - Memblk *b; - - b = mbdup(fs->super); - qlock(fs); - b->d = fs->super->d; - assert(fs->fzsuper == nil); - fs->fzsuper = b; - fs->fzsuper->frozen = 1; - qunlock(fs); -} - -static void -writezsuper(void) -{ - if(canqlock(&fs->fzlk)) - fatal("writezsuper: lock"); - assert(fs->fzsuper != nil); - dbwrite(fs->fzsuper); - dDprint("writezsuper: %H\n", fs->fzsuper); - mbput(fs->fzsuper); - fs->fzsuper = nil; -} - -/* - * Write any dirty frozen state after a freeze. - * Only this function and initialization routines - * may write to the disk. - */ -static void -fswrite(void) -{ - qlock(&fs->fzlk); - if(fs->fzsuper == nil) - fatal("can't fswrite if we didn't fsfreeze"); - if(catcherror()){ - qunlock(&fs->fzlk); - error(nil); - } - writerefs(); - dfsync(fs->archive); - writezsuper(); - noerror(); - qunlock(&fs->fzlk); -} - /* * Freeze the file tree, keeping active as a new melted file * that refers to frozen children now in the archive. @@ -235,8 +304,13 @@ { Memblk *na, *oa, *arch; char name[50]; + vlong t0; + u64int id; - qlock(&fs->fzlk); + dprint("freezing fs...\n"); + t0 = nsec(); + xqlock(&fs->fzlk); + nfsopcalls[Freeze]++; if(catcherror()){ /* * There was an error during freeze. @@ -247,6 +321,7 @@ fatal("freeze: %r"); } oa = fs->active; + arch = fs->archive; rwlock(fs->root, Wr); rwlock(oa, Wr); @@ -255,7 +330,7 @@ /* * move active into /archive/. */ - seprint(name, name+sizeof(name), "%ulld", oa->d.epoch); + seprint(name, name+sizeof(name), "%ulld", oa->mf->mtime); wname(oa, name, strlen(name)+1); dflink(arch, oa); @@ -268,24 +343,163 @@ /* 2. Freeze the on-disk reference counters * and the state of the super-block. */ - freezerefs(); - freezesuper(); + dprint("freezing refs...\n"); + freezesuperrefs(); - /* 3. Make a new archive and replace the old one. + /* 3. Make a new active and replace the old one. */ na = dbdup(oa); rwlock(na, Wr); + id = nsec(); + wid(na, &id, sizeof id); wname(na, "active", strlen("active")+1); + fs->active = na; + dfchdentry(fs->root, oa->addr, na->addr, 1); + assert(oa->ref > 1); /* release fs->active */ + mbput(oa); + rwunlock(na, Wr); rwunlock(fs->root, Wr); - qunlock(&fs->fzlk); + + /* 4. Try to advance fids within active to their + * most recent melted files, to release refs to old frozen files. + */ + meltfids(); + + fsoptime[Freeze] += nsec() - t0; + xqunlock(&fs->fzlk); noerror(); return na; } +static long +writerefs(void) +{ + Memblk *rb; + long n; + + n = 0; + xqlock(&fs->refs); + for(rb = fs->refs.hd; rb != nil; rb = rb->lnext){ + if(rb->dirty && rb->frozen) + n++; + meltedref(rb); + } + xqunlock(&fs->refs); + return n; +} + +/* + * Written blocks become mru, perhaps we should + * consider keeping their location in the lru list, at the + * expense of visiting them while scanning for blocks to move out. + */ +static long +writedata(void) +{ + Memblk *b, *nb; + long nw; + + nw = 0; + qlock(&fs->mdirty); + b = fs->mdirty.hd; + fs->mdirty.hd = nil; + fs->mdirty.tl = nil; + fs->mdirty.n = 0; + qunlock(&fs->mdirty); + for(; b != nil; b = nb){ + nb = b->lnext; + b->lnext = nil; + b->lprev = nil; + assert(b->dirty); + if((b->addr&Fakeaddr) != 0) + fatal("write data on fake address"); + dbwrite(b); + nw++; + } + return nw; +} + +static void +writezsuper(void) +{ + if(canqlock(&fs->fzlk)) + fatal("writezsuper: lock"); + assert(fs->fzsuper != nil); + dbwrite(fs->fzsuper); + dprint("writezsuper: %H\n", fs->fzsuper); + mbput(fs->fzsuper); + fs->fzsuper = nil; +} + +static void +syncref(u64int addr) +{ + static Memblk b; + + b.addr = addr; + b.type = DBref; + dbread(&b); + if(fs->super->d.oddrefs == 0) /* then the old ones are odd */ + addr += Dblksz; + dWprint("syncref d%#010ullx at d%#010ullx\n", b.addr, addr); + if(pwrite(fs->fd, &b.d, sizeof b.d, addr) != sizeof b.d) + error("syncref: write: %r"); +} + +static void +syncrefs(void) +{ + Memblk *rb; + + fs->super->d.oddrefs = !fs->super->d.oddrefs; + xqlock(&fs->refs); + rb = fs->refs.hd; + xqunlock(&fs->refs); + for(; rb != nil; rb = rb->lnext){ + if(rb->changed) + syncref(rb->addr); + rb->changed = 0; + } +} + + +/* + * Write any dirty frozen state after a freeze. + * Only this function and initialization routines (i.e., super, refs) + * may lead to writes. + */ +static void +fswrite(void) +{ + vlong t0; + long nr, nb; + + dprint("writing fs...\n"); + t0 = nsec(); + xqlock(&fs->fzlk); + nfsopcalls[Write]++; + if(fs->fzsuper == nil) + fatal("can't fswrite if we didn't fsfreeze"); + if(catcherror()){ + fsoptime[Write] += nsec() - t0; + xqunlock(&fs->fzlk); + error(nil); + } + nr = writerefs(); + nb = writedata(); + writezsuper(); + nb++; + syncrefs(); + noerror(); + fsoptime[Write] += nsec() - t0; + xqunlock(&fs->fzlk); + dprint("fs written (2*%ld refs %ld data)\n", nr, nb); +} + static void fsinit(char *dev, int nblk) { @@ -309,15 +523,28 @@ if(nblk > 0 && nblk < fs->nablk) fs->nablk = nblk; fs->limit = disksize(fs->fd); - fs->limit = fs->limit/Dblksz*Dblksz; + fs->ndblk = fs->limit/Dblksz; + fs->limit = fs->ndblk*Dblksz; if(fs->limit < 10*Dblksz) fatal("buy a larger disk"); - if(fs->nablk > fs->limit/Dblksz){ - fs->nablk = fs->limit/Dblksz; - print("%s: using only %uld blocks (small disk)\n", argv0, fs->nablk); + if(fs->nablk > fs->ndblk){ + fprint(2, "%s: using %uld blocks and not %uld (small disk)\n", + argv0, fs->ndblk, fs->nablk); + fs->nablk = fs->ndblk; } fs->blk = malloc(fs->nablk * sizeof fs->blk[0]); - dDprint("fsys '%s' init\n", fs->dev); + dprint("fsys '%s' init\n", fs->dev); +} + +void +fssync(void) +{ + /* + * TODO: If active has not changed and we are just going + * to dump a new archive for no change, do nothing. + */ + fsfreeze(); + fswrite(); } /* @@ -332,7 +559,7 @@ { Memblk *super; - fsinit(dev, 16); /* enough # of blocks for fmt */ + fsinit(dev, Mmaxfree); /* enough # of blocks for fmt */ if(catcherror()) fatal("fsfmt: error: %r"); @@ -355,25 +582,11 @@ fs->archive = dfcreate(fs->root, "archive", getuser(), DMDIR|0555); rwunlock(fs->root, Wr); super->d.root = fs->archive->addr; - fsfreeze(); - fswrite(); + fssync(); noerror(); } -void -fssync(void) -{ - /* - * TODO: If active has not changed and we are just going - * to dump a new archive for no change, do nothing. - */ - dDprint("syncing\n"); - fsfreeze(); - fswrite(); - dDprint("synced\n"); -} - /* * One process per file system, so consume all the memory * for the cache. @@ -383,8 +596,8 @@ void fsopen(char *dev) { - Memblk *arch; - Memblk *last, *c; + Memblk *arch, *last, *c; + u64int id; int i; if(catcherror()) @@ -393,7 +606,7 @@ fsinit(dev, 0); readsuper(); - qlock(&fs->fzlk); + xqlock(&fs->fzlk); fs->root = dfcreate(nil, "", getuser(), DMDIR|0555); arch = dbget(DBfile, fs->super->d.root); fs->archive = arch; @@ -401,7 +614,7 @@ rwlock(arch, Wr); last = nil; for(i = 0; (c = dfchild(arch, i)) != nil; i++){ - if(last == nil || last->d.epoch < c->d.epoch){ + if(last == nil || last->mf->mtime < c->mf->mtime){ mbput(last); last = c; incref(c); @@ -411,7 +624,11 @@ if(last != nil){ rwlock(last, Rd); fs->active = dbdup(last); + mbput(last->mf->melted); /* could keep it, but no need */ + last->mf->melted = nil; wname(fs->active, "active", strlen("active")+1); + id = nsec(); + wid(fs->active, &id, sizeof id); rwlock(fs->active, Wr); dflink(fs->root, fs->active); rwunlock(fs->active, Wr); @@ -421,10 +638,10 @@ fs->active = dfcreate(fs->root, "active", getuser(), DMDIR|0775); dflink(fs->root, arch); rwunlock(arch, Wr); - fs->cons = dfcreate(fs->root, "cons", getuser(), DMEXCL|600); + fs->cons = dfcreate(nil, "cons", getuser(), DMEXCL|600); fs->consc = chancreate(sizeof(char*), 256); rwunlock(fs->root, Wr); - qunlock(&fs->fzlk); + xqunlock(&fs->fzlk); noerror(); } @@ -433,10 +650,10 @@ { uvlong nfree; - qlock(fs); + xqlock(fs); nfree = fs->nablk - fs->nblk; nfree += fs->nmfree; - qunlock(fs); + xqunlock(fs); return nfree; } @@ -446,86 +663,80 @@ * keeping some files/blocks locked. */ int -fslowmem(void) +fslru(void) { - int type; - ulong n, tot; Memblk *b, *bprev; + vlong t0; + int x; + long target, tot, n, ign; if(fsmemfree() > Mminfree) return 0; - dDprint("low on memory\n"); - tot = 0; + x = setdebug(); + dprint("fslru: low on memory %ulld free %d min\n", fsmemfree(), Mminfree); + tot = ign = 0; do{ - if(fsmemfree() > Mmaxfree) + target = Mmaxfree - fsmemfree(); + t0 = nsec(); + if(!canqlock(&fs->fzlk)) /* we'll get called later */ break; - qlock(&fs->fzlk); - qlock(&fs->llk); + xqlock(&fs->lru); + nfsopcalls[Lru]++; if(catcherror()){ - qunlock(&fs->llk); - qunlock(&fs->fzlk); - fprint(2, "%s: fslowmem: %r\n", argv0); + fsoptime[Lru] += t0 - nsec(); + xqunlock(&fs->lru); + xqunlock(&fs->fzlk); + fprint(2, "%s: fslru: %r\n", argv0); break; } - Again: n = 0; - for(b = fs->lru; b != nil && tot < Mmaxfree; b = bprev){ + for(b = fs->lru.tl; b != nil && target > 0; b = bprev){ bprev = b->lprev; - type = TAGTYPE(b->d.tag); - switch(type){ + if(b->dirty) + fatal("fslru: dirty block on lru\n"); + switch(b->type){ case DBfree: - goto Again; + /* can happen. but, does it? */ + fatal("fslru: DBfree on lru\n", argv0); case DBsuper: case DBref: - dDprint("out: ignored: m%#p\n", b); - continue; + fatal("fslru: type %d found on lru\n", b->type); case DBfile: if(b == fs->root || b == fs->active || b == fs->archive){ - dDprint("out: ignored: m%#p\n", b); + ign++; continue; } break; } - if(b->dirty || b->ref > 1){ - dDprint("out: ignored: m%#p\n", b); + if(b->ref > 1){ + ign++; continue; } /* * Blocks here have one ref because of the hash table, * which means they are are not used. * We release the hash ref to let them go. - * bprev might move while we put b, but it would - * only go to another place in the lru list, or to - * the free list, but that's ok. + * bprev can't move while we put b. */ - qunlock(&fs->llk); - dDprint("block out: m%#p d%#ullx\n", b, b->addr); - mbput(b); - qlock(&fs->llk); + dOprint("fslru: out: m%#p d%#010ullx\n", b, b->addr); + mbunhash(b, 1); n++; tot++; + target--; } noerror(); - qunlock(&fs->llk); - qunlock(&fs->fzlk); - }while(n > 0); - if(tot == 0) - fprint(2, "%s: low mem and everything in use or dirty.\n", argv0); - else - dDprint("out: %uld blocks\n", tot); - return 1; -} - -static uvlong -fsdiskfree(void) -{ - uvlong nfree; - - qlock(fs); - nfree = fs->super->d.ndfree; - nfree += (fs->limit - fs->super->d.eaddr)/Dblksz; - qunlock(fs); - return nfree; + fsoptime[Lru] += t0 - nsec(); + xqunlock(&fs->lru); + xqunlock(&fs->fzlk); + }while(n > 0 && target > 0); + if(tot == 0){ + fprint(2, "%s: low on mem (0 out; %uld ignored)\n", argv0, ign); + tot = -1; + }else + dprint("fslru: %uld out %uld ignored %ulld free %d min %d max\n", + tot, ign, fsmemfree(), Mminfree, Mmaxfree); + rlsedebug(x); + return tot; } /* @@ -541,15 +752,12 @@ if(1){ fprint(2, "file system full:\n"); - fsdump(0); + fsdump(0, 0); fatal("aborting"); } return 1; } -/* - * This should be called if fs->super->d.nfree < some number. - */ int fsreclaim(void) { @@ -558,32 +766,25 @@ u64int addr; Blksl sl; Dentry *de; - ulong n, tot; + long n, tot; - if(fsdiskfree() > Dminfree) - return 0; - - fprint(2, "%s: low on disk: reclaiming...\n", argv0); - qlock(&fs->fzlk); + fprint(2, "%s: %ulld free: reclaiming...\n", argv0, fsdiskfree()); + xqlock(&fs->fzlk); arch = fs->archive; rwlock(arch, Wr); if(catcherror()){ rwunlock(arch, Wr); - qunlock(&fs->fzlk); + xqunlock(&fs->fzlk); error(nil); } tot = 0; for(;;){ - if(fsdiskfree() > Dmaxfree){ - dDprint("fsreclaim: got >= %d free\n", Dmaxfree); - break; - } - dDprint("fsreclaim: reclaiming\n"); + dprint("fsreclaim: reclaiming\n"); victim = nil; for(i = 0; (c = dfchild(arch, i)) != nil; i++){ if(victim == nil) victim = c; - else if(victim->d.epoch > c->d.epoch){ + else if(victim->mf->mtime > c->mf->mtime){ mbput(victim); victim = c; }else @@ -592,11 +793,11 @@ } if(i < 2){ mbput(victim); - dDprint("nothing to reclaim\n"); + dprint("nothing to reclaim\n"); break; } fprint(2, "%s: reclaiming /archive/%s\n", argv0, victim->mf->name); - dDprint("victim is %H\n", victim); + dprint("victim is %H\n", victim); /* * Don't make a new archive. Edit in-place the one we have to @@ -612,44 +813,63 @@ } de = sl.data; de->file = 0; + changed(sl.b); + munlink(&fs->mdirty, sl.b, 0); dbwrite(sl.b); noerror(); mbput(sl.b); - n = dbgetref(victim->addr); - if(n != 1) - fatal("reclaim: victim disk ref is %d != 1", n); + n = dfreclaim(victim); + mbput(victim); fs->super->d.root = fs->archive->addr; - n = dfreclaim(victim); - mbput(victim); - dDprint("%uld block%s reclaimed\n", n, n?"s":""); + dprint("fsreclaim: %uld file%s reclaimed\n", n, n?"s":""); tot += n; - freezerefs(); - writerefs(); - freezesuper(); - writezsuper(); + if(fsdiskfree() > Dmaxfree){ + dprint("fsreclaim: %d free: done\n", Dmaxfree); + break; + } } - if(tot > 0) - fprint(2, "%s: %uld block%s reclaimed\n", argv0, tot, tot?"s":""); + if(tot == 0){ + fprint(2, "%s: low on disk: 0 files reclaimed %ulld blocks free\n", + argv0, fsdiskfree()); + tot = -1; + }else + fprint(2, "%s: %uld file%s reclaimed %ulld blocks free\n", + argv0, tot, tot?"s":"", fsdiskfree()); rwunlock(arch, Wr); - qunlock(&fs->fzlk); + xqunlock(&fs->fzlk); noerror(); - return 1; + return tot; } +/* + * Policy for memory and and disk block reclaiming. + * Should be called from time to time to guarantee that there are + * free blocks. + * + * If low on memory, move some blocks out. + * If we can't, sync to make some available the next time. + * Either way, reclaim old freezes if low on disk, but don't + * do that if we are low on memory, because that might + * require loading disk blocks. + */ void fspolicy(void) { - /* - * If low on memory, move some blocks out. - * Otherwise, reclaim old snapshots if low on disk. - */ - if(fslowmem()) + switch(fslru()){ + case -1: /* wanted blocks; lru had none */ fssync(); - else - fsreclaim(); + break; + case 0: /* did not want blocks */ + default: /* wanted blocks; lru had some */ + if(fsdiskfree() < Dminfree){ + if(fsreclaim() > 0) + fssync(); + } + break; + } } diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/ix.c --- a/sys/src/cmd/creepy/ix.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/ix.c Wed Mar 07 15:41:27 2012 +0000 @@ -76,10 +76,22 @@ static int ixrreadhdrsz; void -ixstats(void) +ixstats(int clr) { - print("srpcs:\t%4uld alloc %4uld free (%4uld bytes)\n", + int i; + + fprint(2, "srpcs:\t%4uld alloc %4uld free (%4uld bytes)\n", srpcalloc.nalloc, srpcalloc.nfree, srpcalloc.elsz); + for(i = 0; i < nelem(ixcalls); i++) + if(ixcalls[i] != nil && ncalls[i] > 0){ + fprint(2, "%-8s\t%5uld calls\t%11ulld µs\n", + callname[i], ncalls[i], + (calltime[i]/ncalls[i])/1000); + if(clr){ + ncalls[i] = 0; + calltime[i] = 0; + } + } } @@ -135,7 +147,7 @@ { putfid(rpc->fid); rpc->rpc0->fid = newfid(rpc->cli, -1); - attach(rpc->rpc0->fid, rpc->xt.aname, rpc->xt.uname); + fidattach(rpc->rpc0->fid, rpc->xt.aname, rpc->xt.uname); } static void @@ -153,7 +165,7 @@ if(rpc->rpc0->fid == nil) error("fid not set"); - nfid = clone(rpc->cli, rpc->rpc0->fid, -1); + nfid = fidclone(rpc->cli, rpc->rpc0->fid, -1); putfid(rpc->rpc0->fid); rpc->rpc0->fid = nfid; nfid->cflags = rpc->xt.cflags; @@ -164,7 +176,7 @@ { if(rpc->rpc0->fid == nil) error("fid not set"); - walk(rpc->rpc0->fid, rpc->xt.wname); + fidwalk(rpc->rpc0->fid, rpc->xt.wname); } static void @@ -191,11 +203,27 @@ rpc->rpc0->fid->cflags = cflags; } +static ulong +pixd(Memblk *f, uchar *buf, int nbuf) +{ + ulong n; + + if(nbuf < BIT32SZ) + return 0; + if(catcherror()) + return 0; + n = pmeta(buf+BIT32SZ, nbuf-BIT32SZ, f->mf); + noerror(); + PBIT32(buf, n); + return n+BIT32SZ; +} + static void rread(Rpc *rpc) { vlong off; Fid *fid; + int nmsg; fid = rpc->rpc0->fid; if(fid == nil) @@ -211,11 +239,12 @@ * As usual, the caller sends the last reply when we return. */ off = rpc->xt.offset; + nmsg = rpc->xt.nmsg; for(;;){ - rpc->xr.count = fidread(fid, rpc->xr.data, rpc->xt.count, off); + rpc->xr.count = fidread(fid, rpc->xr.data, rpc->xt.count, off, pixd); if(rpc->xr.count == 0) break; - if(rpc->xt.nmsg-- <= 0) + if(nmsg-- <= 0) break; if(reply(rpc) < 0) break; @@ -313,6 +342,7 @@ error(nil); } dfwattr(f, rpc->xt.attr, rpc->xt.value, rpc->xt.nvalue); + noerror(); rwunlock(f, Wr); } @@ -334,8 +364,8 @@ error(nil); } dfcattr(f, rpc->xt.op, rpc->xt.attr, rpc->xt.value, rpc->xt.nvalue); + noerror(); rwunlock(f, Rd); - noerror(); } static void @@ -361,7 +391,7 @@ nhdr = readn(fd, hdr, sizeof hdr); if(nhdr < 0){ - dxprint("readix: %r\n"); + dXprint("readix: %r\n"); return nil; } if(nhdr == 0){ @@ -400,6 +430,7 @@ freeixrpc(rpc); return nil; } + rpc->t0 = nsec(); if(ixunpack(rpc->data, sz-BIT16SZ, &rpc->xt) != sz-BIT16SZ){ freeixrpc(rpc); return nil; @@ -419,7 +450,7 @@ chan = rpc->chan&Tmask; if(rpc->xr.type == IXRerror || (rpc->chan&Tlast) != 0) chan |= Tlast; - qlock(&cli->wlk); + xqlock(&cli->wlk); if(largeix[rpc->xt.type]) buf = rpc->data; else @@ -436,18 +467,24 @@ p += sz; if(rpc->rpc0->flushed){ - qunlock(&cli->wlk); + xqunlock(&cli->wlk); werrstr("flushed"); - dxprint("write: flushed"); + dXprint("write: flushed"); return -1; } - dxprint("-> %G\n", &rpc->xr); + if(chan&Tlast){ + putfid(rpc->rpc0->fid); + rpc->rpc0->fid = nil; + } + dXprint("-> %G\n", &rpc->xr); if(write(cli->fd, buf, p-buf) != p-buf){ - qunlock(&cli->wlk); - dxprint("write: %r"); + xqunlock(&cli->wlk); + dXprint("write: %r"); return -1; } - qunlock(&cli->wlk); + calltime[rpc->xt.type] += nsec() - rpc->t0; + ncalls[rpc->xt.type]++; + xqunlock(&cli->wlk); return p-buf; } @@ -458,7 +495,9 @@ Cli *cli; Channel *c; char err[128]; - long nw; + long nw, count; + int nerr; + Fid *fid; c = v; if(*aux == nil){ @@ -473,6 +512,7 @@ threadsetname("rpcworkerix %s chan %d", cli->addr, rpc0->chan); dPprint("%s started\n", threadgetname()); do{ + nerr = nerrors(); rpc->xr.type = rpc->xt.type + 1; rpc->rpc0 = rpc0; if(catcherror()){ @@ -485,12 +525,41 @@ ixcalls[rpc->xt.type](rpc); noerror(); } + + fid = nil; + if(rpc->fid != nil && rpc0->fid->ref > 1){ + /* The fid is not clunked by this rpc; read/walk ahead ok*/ + fid = rpc0->fid; + incref(fid); + } + if(catcherror()){ + if(fid != nil) + putfid(fid); + } + nw = reply(rpc); + + if(fid != nil){ + switch(rpc->xt.type){ + case Tread: + count = rpc->xt.count; + if(rpc->xr.type == Rread) + if(rpc->xt.nmsg <= 1 && rpc->xr.count == count) + fidrahead(rpc0->fid, rpc->xt.offset+count); + break; + case Twalk: + if(rpc->xr.type == Rwalk) + fidwahead(rpc0->fid); + break; + } + putfid(fid); + } + if(rpc != rpc0) freeixrpc(rpc); + if(nerrors() != nerr) + fatal("%s: unbalanced error stack", threadgetname()); }while(!rpc0->closed && nw > 0 && err[0] == 0 && (rpc = recvp(c)) != nil); - if(rpc0->fid != nil && (rpc0->fid->cflags&OCEND) != 0) - putfid(rpc0->fid); /* clunk */ while((rpc = nbrecvp(c)) != nil) freeixrpc(rpc); replied(rpc0); @@ -521,15 +590,17 @@ ixinit(); for(;;){ + if(dbg['E']) + dumpfids(); loop: rpc = readix(cli->fd); if(rpc == nil){ - dxprint("%s: read: %r\n", cli->addr); + dXprint("%s: read: %r\n", cli->addr); break; } rpc->cli = cli; incref(cli); - qlock(&cli->rpclk); + xqlock(&cli->rpclk); for(r = cli->rpcs; r != nil; r = r->next) if((r->chan&Tmask) == (rpc->chan&Tmask)){ if(rpc->chan&Tlast) @@ -538,12 +609,12 @@ else r->closed = 1; sendp(r->c, rpc); - qunlock(&cli->rpclk); + xqunlock(&cli->rpclk); goto loop; } if((rpc->chan&Tfirst) == 0){ /* it's channel is gone */ freeixrpc(rpc); - qunlock(&cli->rpclk); + xqunlock(&cli->rpclk); goto loop; } @@ -553,7 +624,7 @@ if(rpc->c == nil) rpc->c = chancreate(sizeof(Rpc*), 64); cli->nrpcs++; - qunlock(&cli->rpclk); + xqunlock(&cli->rpclk); fspolicy(); diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/ix.h --- a/sys/src/cmd/creepy/ix.h Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/ix.h Wed Mar 07 15:41:27 2012 +0000 @@ -56,7 +56,8 @@ * There is no flush. Flushing is done by flushing the channel. */ -enum{ +enum +{ IXTversion = 50, IXRversion, IXTattach, diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/ixcall.c --- a/sys/src/cmd/creepy/ixcall.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/ixcall.c Wed Mar 07 15:41:27 2012 +0000 @@ -8,6 +8,86 @@ #include "ix.h" #include "net.h" +static char* cname[CMAX] = +{ + [CEQ] "==", + [CGE] ">=", + [CGT] "> ", + [CLT] "< ", + [CLE] "<=", + [CNE] "!=", +}; + +vlong calltime[Tmax]; +ulong ncalls[Tmax]; + +char* callname[] = +{ + /* ix requests */ + [IXTversion] "Tversion", + [IXRversion] "Rversion", + [IXTattach] "Tattach", + [IXRattach] "Rattach", + [IXTfid] "Tfid", + [IXRfid] "Rfid", + [__IXunused__] "__IXunused__", + [IXRerror] "Rerror", + [IXTclone] "Tclone", + [IXRclone] "Rclone", + [IXTwalk] "Twalk", + [IXRwalk] "Rwalk", + [IXTopen] "Topen", + [IXRopen] "Ropen", + [IXTcreate] "Tcreate", + [IXRcreate] "Rcreate", + [IXTread] "Tread", + [IXRread] "Rread", + [IXTwrite] "Twrite", + [IXRwrite] "Rwrite", + [IXTclunk] "Tclunk", + [IXRclunk] "Rclunk", + [IXTremove] "Tremove", + [IXRremove] "Rremove", + [IXTattr] "Tattr", + [IXRattr] "Rattr", + [IXTwattr] "Twattr", + [IXRwattr] "Rwattr", + [IXTcond] "Tcond", + [IXRcond] "Rcond", + [IXTmove] "Tmove", + [IXRmove] "Rmove", + + /* 9p requests */ + [Tversion] "Tversion", + [Rversion] "Rversion", + [Tauth] "Tauth", + [Rauth] "Rauth", + [Tattach] "Tattach", + [Rattach] "Rattach", + [Terror] "Terror", + [Rerror] "Rerror", + [Tflush] "Tflush", + [Rflush] "Rflush", + [Twalk] "Twalk", + [Rwalk] "Rwalk", + [Topen] "Topen", + [Ropen] "Ropen", + [Tcreate] "Tcreate", + [Rcreate] "Rcreate", + [Tread] "Tread", + [Rread] "Rread", + [Twrite] "Twrite", + [Rwrite] "Rwrite", + [Tclunk] "Tclunk", + [Rclunk] "Rclunk", + [Tremove] "Tremove", + [Rremove] "Rremove", + [Tstat] "Tstat", + [Rstat] "Rstat", + [Twstat] "Twstat", + [Rwstat] "Rwstat", +}; + static uchar* pstring(uchar *p, char *s) { @@ -530,83 +610,6 @@ return p - ap; } -static char* cname[CMAX] = -{ - [CEQ] "==", - [CGE] ">=", - [CGT] "> ", - [CLT] "< ", - [CLE] "<=", - [CNE] "!=", -}; - -static char* tname[] = -{ - /* ix requests */ - [IXTversion] "Tversion", - [IXRversion] "Rversion", - [IXTattach] "Tattach", - [IXRattach] "Rattach", - [IXTfid] "Tfid", - [IXRfid] "Rfid", - [__IXunused__] "__IXunused__", - [IXRerror] "Rerror", - [IXTclone] "Tclone", - [IXRclone] "Rclone", - [IXTwalk] "Twalk", - [IXRwalk] "Rwalk", - [IXTopen] "Topen", - [IXRopen] "Ropen", - [IXTcreate] "Tcreate", - [IXRcreate] "Rcreate", - [IXTread] "Tread", - [IXRread] "Rread", - [IXTwrite] "Twrite", - [IXRwrite] "Rwrite", - [IXTclunk] "Tclunk", - [IXRclunk] "Rclunk", - [IXTremove] "Tremove", - [IXRremove] "Rremove", - [IXTattr] "Tattr", - [IXRattr] "Rattr", - [IXTwattr] "Twattr", - [IXRwattr] "Rwattr", - [IXTcond] "Tcond", - [IXRcond] "Rcond", - [IXTmove] "Tmove", - [IXRmove] "Rmove", - - /* 9p requests */ - [Tversion] "Tversion", - [Rversion] "Rversion", - [Tauth] "Tauth", - [Rauth] "Rauth", - [Tattach] "Tattach", - [Rattach] "Rattach", - [Terror] "Terror", - [Rerror] "Rerror", - [Tflush] "Tflush", - [Rflush] "Rflush", - [Twalk] "Twalk", - [Rwalk] "Rwalk", - [Topen] "Topen", - [Ropen] "Ropen", - [Tcreate] "Tcreate", - [Rcreate] "Rcreate", - [Tread] "Tread", - [Rread] "Rread", - [Twrite] "Twrite", - [Rwrite] "Rwrite", - [Tclunk] "Tclunk", - [Rclunk] "Rclunk", - [Tremove] "Tremove", - [Rremove] "Rremove", - [Tstat] "Tstat", - [Rstat] "Rstat", - [Twstat] "Twstat", - [Rwstat] "Rwstat", -}; - int rpcfmt(Fmt *fmt) { @@ -617,8 +620,8 @@ return fmtprint(fmt, ""); if(rpc->t.type == 0) return fmtprint(fmt, "Tnull"); - if(rpc->t.type < nelem(tname) && tname[rpc->t.type]) - return fmtprint(fmt, "%s tag %ud", tname[rpc->t.type], rpc->t.tag); + if(rpc->t.type < nelem(callname) && callname[rpc->t.type]) + return fmtprint(fmt, "%s tag %ud", callname[rpc->t.type], rpc->t.tag); return fmtprint(fmt, "type=%d??? tag %ud", rpc->t.type, rpc->t.tag); } @@ -689,7 +692,7 @@ type = f->type; if(type < IXTversion || type >= IXTmax) return fmtprint(fmt, "", type); - s = seprint(buf, e, "%s", tname[type]); + s = seprint(buf, e, "%s", callname[type]); switch(type){ case IXTversion: case IXRversion: diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/mblk.c --- a/sys/src/cmd/creepy/mblk.c Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/mblk.c Wed Mar 07 15:41:27 2012 +0000 @@ -33,7 +33,6 @@ { static char*nms[] = { [DBfree] "DBfree", - [DBnew] "DBnew", [DBsuper] "DBsuper", [DBref] "DBref", [DBdata] "DBdata", @@ -53,37 +52,36 @@ return nms[t]; } -#define EP(e) ((e)&0xFFFFFFFFUL) +int fullfiledumps = 0; + /* * NO LOCKS. debug only */ static void -fmttab(Fmt *fmt, int t) +fmttab(Fmt *fmt, int t, int c) { - if(t-- > 0) - fmtprint(fmt, "\t"); while(t-- > 0) - fmtprint(fmt, " "); + fmtprint(fmt, "%c ", c?'.':' '); } int mbtab; static void -fmtptr(Fmt *fmt, u64int addr, char *tag, int n) +fmtptr(Fmt *fmt, int type, u64int addr, char *tag, int n) { Memblk *b; if(addr == 0) return; - b = mbget(addr, 0); + b = mbget(type, addr, 0); if(b == nil){ - fmttab(fmt, mbtab); - fmtprint(fmt, " %s[%d] = d%#ullx \n", tag, n, addr); + fmttab(fmt, mbtab, 0); + fmtprint(fmt, " %s[%d] = d%#010ullx \n", tag, n, addr); }else{ fmtprint(fmt, "%H", b); mbput(b); } } static void -dumpsomedata(Fmt *fmt, Memblk *b) +dumpdirdata(Fmt *fmt, Memblk *b) { long doff; u64int *p; @@ -93,10 +91,10 @@ return; doff = embedattrsz(b); if(doff < Embedsz){ - fmttab(fmt, mbtab); + fmttab(fmt, mbtab, 0); p = (u64int*)(b->d.embed+doff); for(i = 0; i < 5 && (uchar*)p < b->d.embed+Embedsz - BIT64SZ; i++) - fmtprint(fmt, "%s%#ullx", i?" ":" data: ", *p++); + fmtprint(fmt, "%sd%#010ullx", i?" ":"data: ", EP(*p++)); fmtprint(fmt, "\n"); } } @@ -105,25 +103,24 @@ mbfmt(Fmt *fmt) { Memblk *b; - int type, i, n; + int i, n; b = va_arg(fmt->args, Memblk*); if(b == nil) return fmtprint(fmt, "\n"); nodebug(); - type = TAGTYPE(b->d.tag); - fmttab(fmt, mbtab); - fmtprint(fmt, "m%#p d%#ullx", b, b->addr); + fmttab(fmt, mbtab, b->type == DBfile); + + fmtprint(fmt, "%s", tname(b->type)); + if(b->type == DBfile && b->mf != nil) + fmtprint(fmt, " '%s'", b->mf->name); if(b->frozen) fmtprint(fmt, " FZ"); if(b->dirty) fmtprint(fmt, " DT"); - if(b->written) - fmtprint(fmt, " WR"); - fmtprint(fmt, " %s r%d", tname(type), b->ref); - fmtprint(fmt, " tag %#ullx", EP(b->d.tag)); - if(0)fmtprint(fmt, " epoch %#ullx", EP(b->d.epoch)); - switch(type){ + fmtprint(fmt, " m%#p d%#010ullx", b, EP(b->addr)); + fmtprint(fmt, " r=%d", b->ref); + switch(b->type){ case DBfree: fmtprint(fmt, "\n"); break; @@ -132,56 +129,66 @@ fmtprint(fmt, " dr=%ulld\n", dbgetref(b->addr)); break; case DBref: - fmtprint(fmt, " rnext m%#p", b->rnext); + fmtprint(fmt, " next m%#p", b->lnext); for(i = n = 0; i < Drefperblk; i++) if(b->d.ref[i]){ - if(n++%4 == 0){ + if(n++%3 == 0){ fmtprint(fmt, "\n"); - fmttab(fmt, mbtab); + fmttab(fmt, mbtab, 0); } fmtprint(fmt, " "); - fmtprint(fmt, "[%d]d%#ullx=%#ullx", + fmtprint(fmt, "[%02d]d%#010ullx=%#ullx", i, addrofref(b->addr, i), b->d.ref[i]); } if(n == 0 || --n%4 != 0) fmtprint(fmt, "\n"); break; case DBfile: - fmtprint(fmt, " dr=%ulld\n", dbgetref(b->addr)); + fmtprint(fmt, " dr=%ulld", dbgetref(b->addr)); if(b->mf == nil){ fmtprint(fmt, " no mfile\n"); break; } - fmttab(fmt, mbtab); - fmtprint(fmt, " '%s' asz %#ullx aptr %#ullx melted m%#p\n", - b->mf->name, b->d.asize,b->d.aptr, b->mf->melted); - fmttab(fmt, mbtab); - fmtprint(fmt, " id %#ullx mode %M mt %#ullx sz %#ullx '%s'\n", - EP(b->mf->id), (ulong)b->mf->mode, EP(b->mf->mtime), - b->mf->length, b->mf->uid); - fmttab(fmt, mbtab); fmtprint(fmt, " nr%d nw%d\n", b->mf->readers, b->mf->writer); - dumpsomedata(fmt, b); + if(0) + fmtprint(fmt, " asz %#ullx aptr %#ullx", + b->d.asize, b->d.aptr); + fmttab(fmt, mbtab, 0); + fmtprint(fmt, " %M melted m%#p\n", + (ulong)b->mf->mode, b->mf->melted); + if(0){ + fmttab(fmt, mbtab, 0); + fmtprint(fmt, " id %#ullx mode %M mt %#ullx" + " sz %#ullx '%s'\n", + EP(b->mf->id), (ulong)b->mf->mode, + EP(b->mf->mtime), b->mf->length, b->mf->uid); + } mbtab++; + if(b->mf->mode&DMDIR) + dumpdirdata(fmt, b); for(i = 0; i < nelem(b->d.dptr); i++) - fmtptr(fmt, b->d.dptr[i], "d", i); + fmtptr(fmt, DBdata, b->d.dptr[i], "d", i); for(i = 0; i < nelem(b->d.iptr); i++) - fmtptr(fmt, b->d.iptr[i], "i", i); + fmtptr(fmt, DBptr0+i, b->d.iptr[i], "i", i); mbtab--; break; case DBsuper: fmtprint(fmt, "\n"); - fmttab(fmt, mbtab); - fmtprint(fmt, " free d%#ullx eaddr d%#ullx root d%#ullx\n", - b->d.free, b->d.eaddr, b->d.root); + fmttab(fmt, mbtab, 0); + fmtprint(fmt, " free d%#ullx eaddr d%#ullx root d%#ullx %s refs\n", + b->d.free, b->d.eaddr, b->d.root, + b->d.oddrefs?"odd":"even"); break; default: - if(type < DBptr0 || type >= DBptr0+Niptr) - fatal("", type); + if(b->type < DBptr0 || b->type >= DBptr0+Niptr){ + fmtprint(fmt, "", b->type); + break; + } fmtprint(fmt, " dr=%ulld\n", dbgetref(b->addr)); mbtab++; - for(i = 0; i < Dptrperblk; i++) - fmtptr(fmt, b->d.ptr[i], "p", i); + if(fullfiledumps) + for(i = 0; i < Dptrperblk; i++) + fmtptr(fmt, b->type-1, b->d.ptr[i], "p", i); mbtab--; break; } @@ -189,11 +196,14 @@ return 0; } -void -clean(Memblk *b) -{ - b->dirty = 0; -} +/* + * Blocks are kept in a hash while loaded, to look them up. + * When in the hash, they fall into exactly one of this cases: + * - a super block or a fake mem block (e.g., cons, /), unlinked + * - a ref block, linked in the fs->refs list + * - a clean block, linked in the fs mru/lru list + * - a dirty block, linked in the fs dirty list. + */ void ismelted(Memblk *b) @@ -203,110 +213,228 @@ } void -changed(Memblk *b) +munlink(List *l, Memblk *b, int isreclaim) { - if(TAGTYPE(b->d.tag) != DBsuper) - ismelted(b); - b->d.epoch = now(); - b->dirty = 1; - b->written = 0; + if(!isreclaim) + xqlock(l); + if(b->lprev != nil) + b->lprev->lnext = b->lnext; + else + l->hd = b->lnext; + if(b->lnext != nil) + b->lnext->lprev = b->lprev; + else + l->tl = b->lprev; + b->lnext = nil; + b->lprev = nil; + l->n--; + if(!isreclaim) + xqunlock(l); } static void -lruunlink(Memblk *b) +mlink(List *l, Memblk *b) { - if(b->lprev != nil) - b->lprev->lnext = b->lnext; + assert(b->lnext == nil && b->lprev == nil); + xqlock(l); + b->lnext = l->hd; + if(l->hd != nil) + l->hd->lprev = b; else - fs->mru = b->lnext; - if(b->lnext != nil) - b->lnext->lprev = b->lprev; - else - fs->lru = b->lprev; - b->lnext = nil; - b->lprev = nil; + l->tl = b; + l->hd = b; + l->n++; + xqunlock(l); } +static void +mlinklast(List *l, Memblk *b) +{ + xqlock(l); + b->lprev = l->tl; + if(l->tl != nil) + l->tl->lnext = b; + else + l->hd = b; + l->tl = b; + l->n++; + xqunlock(l); +} -static void -lrulink(Memblk *b) +void +mlistdump(char *tag, List *l) { - b->lnext = fs->mru; - b->lprev = nil; - if(fs->mru) - fs->mru->lprev = b; - else - fs->lru = b; - fs->mru = b; + Memblk *b; + int i; + + fprint(2, "%s:", tag); + i = 0; + for(b = l->hd; b != nil; b = b->lnext){ + if(i++ % 5 == 0) + fprint(2, "\n\t"); + fprint(2, "d%#010ullx ", EP(b->addr)); + } + fprint(2, "\n"); } static void mbused(Memblk *b) { - qlock(&fs->llk); - lruunlink(b); - lrulink(b); - qunlock(&fs->llk); + if(b->dirty != 0 || (b->addr&Fakeaddr) != 0) + return; + switch(b->type){ + case DBref: + case DBsuper: + break; + default: + munlink(&fs->lru, b, 0); + mlink(&fs->lru, b); + } +} + +void +mbunused(Memblk *b) +{ + if(b->dirty || (b->addr&Fakeaddr) != 0) /* not on the lru list */ + return; + if(b->type == DBsuper || b->type == DBref) /* idem */ + return; + munlink(&fs->lru, b, 0); + mlinklast(&fs->lru, b); +} + +void +changed(Memblk *b) +{ + if(b->type != DBsuper) + ismelted(b); + if(b->dirty || (b->addr&Fakeaddr) != 0) + return; + switch(b->type){ + case DBsuper: + case DBref: + b->dirty = 1; + break; + default: + assert(b->dirty == 0); + munlink(&fs->lru, b, 0); + b->dirty = 1; + mlink(&fs->mdirty, b); + } +} + +void +written(Memblk *b) +{ + assert(b->dirty != 0); + switch(b->type){ + case DBsuper: + case DBref: + b->dirty = 0; + break; + default: + /* + * data blocks are removed from the dirty list, + * then written. They are not on the list while + * being written. + */ + assert(b->lprev == nil && b->lnext == nil); + b->dirty = 0; + + + /* + * heuristic: frozen files that have a melted version + * are usually no longer useful. + */ + if(b->type == DBfile && b->mf->melted != nil) + mlinklast(&fs->lru, b); + else + mlink(&fs->lru, b); + } } static void linkblock(Memblk *b) { - if(TAGTYPE(b->d.tag) == DBref){ - qlock(fs); - b->rnext = fs->refs; - fs->refs = b; - qunlock(fs); + if((b->addr&Fakeaddr) != 0 || b->type == DBsuper) + return; + if(b->type == DBref) + mlink(&fs->refs, b); + else{ + assert(b->dirty == 0); + mlink(&fs->lru, b); } - qlock(&fs->llk); - lrulink(b); - qunlock(&fs->llk); } +static void +unlinkblock(Memblk *b, int isreclaim) +{ + if((b->addr&Fakeaddr) != 0) + return; + switch(b->type){ + case DBref: + fatal("unlinkblock: DBref"); + case DBsuper: + fatal("unlinkblock: DBsuper"); + } + + if(b->dirty){ + assert(!isreclaim); + munlink(&fs->mdirty, b, 0); + }else + munlink(&fs->lru, b, isreclaim); +} + +/* + * hashing a block also implies placing it in the refs/lru/dirty lists. + * mbget has also the guts of mbhash, for new blocks. + */ Memblk* mbhash(Memblk *b) { - Memblk **h, *ob; + Memblk **h; uint hv; hv = b->addr%nelem(fs->fhash); - qlock(&fs->fhash[hv]); - ob = nil; + xqlock(&fs->fhash[hv]); for(h = &fs->fhash[hv].b; *h != nil; h = &(*h)->next) - if((*h)->addr == b->addr) + if((*h)->addr == b->addr){ + fprint(2, "mbhash: dup blocks\n"); + fprint(2, "b=> %H\n*h=> %H\n", b, *h); fatal("mbhash: dup"); + } *h = b; if(b->next != nil) fatal("mbhash: next"); incref(b); linkblock(b); - - qunlock(&fs->fhash[hv]); - mbput(ob); + xqunlock(&fs->fhash[hv]); return b; } +/* + * unhashing a block also implies removing it from the refs/lru/dirty lists. + */ void -mbunhash(Memblk *b) +mbunhash(Memblk *b, int isreclaim) { Memblk **h; uint hv; - if(TAGTYPE(b->d.tag) == DBref) + if(b->type == DBref) fatal("mbunhash: DBref"); hv = b->addr%nelem(fs->fhash); - qlock(&fs->fhash[hv]); + xqlock(&fs->fhash[hv]); for(h = &fs->fhash[hv].b; *h != nil; h = &(*h)->next) if((*h)->addr == b->addr){ if(*h != b) fatal("mbunhash: dup"); *h = b->next; b->next = nil; - qlock(&fs->llk); - lruunlink(b); - qunlock(&fs->llk); - qunlock(&fs->fhash[hv]); + unlinkblock(b, isreclaim); + xqunlock(&fs->fhash[hv]); + mbput(b); return; } fatal("mbunhash: not found"); @@ -319,35 +447,38 @@ if(b == nil) return; - dDprint("mbfree %H\n", b); + dAprint("mbfree %H\n", b); if(b->ref > 0) - fatal("mbfree: has %d refs", b->ref); + fatal("mbfree: has %d refs\n%H", b->ref, b); + if(b->type == DBfree) + fatal("mbfree: double free:\n%H", b); if(b->next != nil) - fatal("mbfree: has next"); + fatal("mbfree: has next\n%H", b); + if(b->lnext != nil || b->lprev != nil) + fatal("mbfree: has lnext/lprev\n%H", b); - if(TAGTYPE(b->d.tag) != DBsuper) - mbunhash(b); /* this could panic, but errors reading a block might cause it */ - if(TAGTYPE(b->d.tag) == DBref) + if(b->type == DBref) fprint(2, "%s: free of DBref. i/o errors?\n", argv0); - if(TAGTYPE(b->d.tag) == DBfile && b->mf != nil){ + if(b->mf != nil){ mf = b->mf; b->mf = nil; mbput(mf->melted); assert(mf->writer == 0 && mf->readers == 0); afree(&mfalloc, mf); } + b->type = DBfree; b->d.tag = DBfree; - b->frozen = b->written = b->dirty = 0; + b->frozen = b->dirty = 0; b->addr = 0; - qlock(fs); + xqlock(fs); fs->nmused--; fs->nmfree++; b->next = fs->free; fs->free = b; - qunlock(fs); + xqunlock(fs); } Memblk* @@ -356,61 +487,65 @@ Memblk *b; b = nil; - qlock(fs); - if(fs->nblk < fs->nablk) - b = &fs->blk[fs->nblk++]; - else if(fs->free != nil){ + xqlock(fs); + if(fs->free != nil){ b = fs->free; fs->free = b->next; fs->nmfree--; - }else{ - qunlock(fs); + }else if(fs->nblk < fs->nablk) + b = &fs->blk[fs->nblk++]; + else{ + xqunlock(fs); fatal("mballoc: no free blocks"); } fs->nmused++; - qunlock(fs); + xqunlock(fs); memset(b, 0, sizeof *b); b->addr = addr; b->ref = 1; - dDprint("mballoc %#ullx -> %H", addr, b); + dAprint("mballoc %#ullx -> %H", addr, b); return b; } Memblk* -mbget(u64int addr, int mkit) +mbget(int type, u64int addr, int mkit) { Memblk *b; uint hv; hv = addr%nelem(fs->fhash); - qlock(&fs->fhash[hv]); + xqlock(&fs->fhash[hv]); for(b = fs->fhash[hv].b; b != nil; b = b->next) if(b->addr == addr){ + checktag(b->d.tag, type, addr); incref(b); break; } if(mkit) if(b == nil){ b = mballoc(addr); - b->d.tag = TAG(addr, DBnew); + b->loading = 1; + b->type = type; + b->d.tag = TAG(type, addr); + /* mbhash() it, without releasing the locks */ b->next = fs->fhash[hv].b; fs->fhash[hv].b = b; incref(b); linkblock(b); - qlock(&b->newlk); /* make others wait for it */ - }else if(TAGTYPE(b->d.tag) == DBnew){ - qunlock(&fs->fhash[hv]); - qlock(&b->newlk); /* wait for it */ - qunlock(&b->newlk); - if(TAGTYPE(b->d.tag) == DBnew){ + xqlock(&b->newlk); /* make others wait for it */ + }else if(b->loading){ + xqunlock(&fs->fhash[hv]); + xqlock(&b->newlk); /* wait for it */ + xqunlock(&b->newlk); + if(b->loading){ mbput(b); - dDprint("mbget %#ullx -> i/o error\n", addr); + dprint("mbget %#ullx -> i/o error\n", addr); return nil; /* i/o error reading it */ } dMprint("mbget %#ullx -> waited for m%#p\n", addr, b); return b; } - qunlock(&fs->fhash[hv]); + xqunlock(&fs->fhash[hv]); if(b != nil) mbused(b); dMprint("mbget %#ullx -> m%#p\n", addr, b); @@ -422,18 +557,8 @@ { if(b == nil) return; - dMprint("mbput m%#p pc=%#p\n", b, getcallerpc(&b)); + dAprint("mbput m%#p pc=%#p\n", b, getcallerpc(&b)); if(decref(b) == 0) mbfree(b); } -Memblk* -mbdup(Memblk *b) -{ - Memblk *nb; - - nb = mballoc(b->addr); - memmove(&nb->d, &b->d, sizeof b->d); - return nb; -} - diff -r 33a08947f013 -r 6fe89e1c4d07 sys/src/cmd/creepy/mkfile --- a/sys/src/cmd/creepy/mkfile Tue Mar 06 16:17:52 2012 +0100 +++ b/sys/src/cmd/creepy/mkfile Wed Mar 07 15:41:27 2012 +0000 @@ -15,13 +15,13 @@ attr.$O\ fsys.$O\ file.$O\ - cfg.$O\ tools.$O\ IXOFILES=\ ixcall.$O\ ix.$O\ 9p.$O\ + cfg.$O\ HFILES=\ dbg.h\ @@ -34,6 +34,8 @@ # not ready for install BIN=. +default:V:all + nblk, fs->nablk, fs->nmused, fs->nmfree, fs->super->d.ndfree); - print("paths:\t%4uld alloc %4uld free (%4uld bytes)\n", - pathalloc.nalloc, pathalloc.nfree, pathalloc.elsz); - print("mfs:\t%4uld alloc %4uld free (%4uld bytes)\n", - mfalloc.nalloc, mfalloc.nfree, mfalloc.elsz); - print("\n"); - print("Fsysmem:\t%uld\n", Fsysmem); - print("Dminfree:\t%d\n", Dminfree); - print("Dblksz: \t%uld\n", Dblksz); - print("Mblksz: \t%ud\n", sizeof(Memblk)); - print("Dminattrsz:\t%uld\n", Dminattrsz); - print("Nblkgrpsz:\t%uld\n", Nblkgrpsz); - print("Dblkdatasz:\t%d\n", Dblkdatasz); - print("Embedsz:\t%d\n", Embedsz); - print("Dentryperblk:\t%d\n", Dblkdatasz/sizeof(Dentry)); - print("Dptrperblk:\t%d\n\n", Dptrperblk); + va_list arg; + + va_start(arg, fmt); + vfprint(2, fmt, arg); + va_end(arg); + fprint(2, "\n"); + if(fatalaborts) + abort(); + threadexitsall("fatal"); +} + +uvlong +now(void) +{ + return nsec(); +} + +void +lockstats(int on) +{ + if(lstats == nil && on) + lstats = mallocz(sizeof lstats[0] * Nlstats, 1); + lstatson = on; +} + +void +dumplockstats(void) +{ + static char *tname[] = {"qlock", "rwlock", "lock"}; + int lon, i; + Lstat *lst; + + lon = lstatson; + lstatson = 0; + fprint(2, "locks\tpc\tntimes\tncant\twtime\tmtime\n"); + for(i = 0; i < Nlstats; i++){ + lst = &lstats[i]; + if(lst->ntimes != 0) + fprint(2, "src -n -s %#ulx %s\t# %s\t%d\t%d\t%ulld\t%ulld\t\n", + lst->pc, argv0, tname[lst->type], lst->ntimes, + lst->ncant, lst->wtime, lst->wtime/lst->ntimes); + } + lstatson = lon; +} + +Lstat* +getlstat(uintptr pc, int type) +{ + Lstat *lst; + int i, h; + + h = pc%Nlstats; + lock(&lstatslk); + for(i = 0; i < Nlstats; i++){ + lst = &lstats[(h+i)%Nlstats]; + if(lst->pc == 0){ + lst->type = type; + lst->pc = pc; + } + if(lst->pc == pc){ + unlock(&lstatslk); + return lst; + } + } + unlock(&lstatslk); + return &none; +} + +void +xqlock(QLock *q) +{ + vlong t; + Lstat *lst; + + lst = nil; + if(lstats != nil){ + lst = getlstat(getcallerpc(&q), Tqlock); + ainc(&lst->ntimes); + if(canqlock(q)) + return; + ainc(&lst->ncant); + t = nsec(); + } + qlock(q); + if(lstats != nil){ + t = nsec() - t; + lock(&lstatslk); + lst->wtime += t; + unlock(&lstatslk); + } +} + +void +xqunlock(QLock *q) +{ + qunlock(q); +} + +void +xrwlock(RWLock *rw, int iswr) +{ + vlong t; + Lstat *lst; + + lst = nil; + if(lstats != nil){ + lst = getlstat(getcallerpc(&rw), Trwlock); + ainc(&lst->ntimes); + if(iswr){ + if(canwlock(rw)) + return; + }else + if(canrlock(rw)) + return; + ainc(&lst->ncant); + t = nsec(); + } + if(iswr) + wlock(rw); + else + rlock(rw); + if(lstats != nil){ + t = nsec() - t; + lock(&lstatslk); + lst->wtime += t; + unlock(&lstatslk); + } +} + +void +xrwunlock(RWLock *rw, int iswr) +{ + if(iswr) + wunlock(rw); + else + runlock(rw); } void* @@ -51,7 +175,7 @@ Next *n; assert(a->elsz > 0); - qlock(a); + xqlock(a); n = a->free; if(n != nil){ a->free = n->next; @@ -60,7 +184,7 @@ a->nalloc++; n = mallocz(a->elsz, !a->zeroing); } - qunlock(a); + xqunlock(a); if(a->zeroing) memset(n, 0, a->elsz); return n; @@ -75,11 +199,11 @@ if(nd == nil) return; n = nd; - qlock(a); + xqlock(a); n->next = a->free; a->free = n; a->nfree++; - qunlock(a); + xqunlock(a); } static void