The severe cut for creepy after some testing, with the replacement for venti included. After testing it for some time, I found yet another bug in the code that had to do with temporary snapshots I was not using. Plus, the code I had in place to double check that the "production code" did its work, was more simple and did the same work. So, in short, I removed support for temporary snaps, and removed ref counters on disk. The disk is now a log that grows on and on. When low on disk, the tree is traversed (most of it should be in ram anyway) and reacheable blocks marked, everything else is considered free. System activity may continue in the mean time, of course. Note that is still supports archival because there's support to freeze the epoch (not to be confussed with fossil epochs) determining if a block is reachable or not. So, once per day, an archival may ask creepy/9pix (the fs) not to collect blocks newer than the current version, then read from disk and archive everything it wants, and then release the "free epoch". As an aid, mtimes are propagated up to the root, so you can locate quickly which subtree has been changed since the last archival. The rewrite (the cut, actually) was severe and thus creepy is now back in testing. I'll be sending more updates as I find out bugs. But the aim is to fix things by simplifying it more, if possible, so it ends up so simple that it could be rock solid once testing is finished. Reference: /n/patches.lsub.org/patch/creepcut Date: Fri May 11 16:52:12 CES 2012 Signed-off-by: nemo@lsub.org # rm /sys/src/cmd/creepy/archer.c # rm /sys/src/cmd/creepy/check.c --- /sys/src/cmd/creepy/9p.c Wed Apr 25 11:11:01 2012 +++ /sys/src/cmd/creepy/9p.c Fri May 11 15:50:44 2012 @@ -128,9 +128,11 @@ Fid *fid; static char spec[] = "proto=p9any role=server"; - if(noauth) + if(rpc->cli->noauth) error("no auth required"); + if(rpc->t.afid == ~0) + error("no auth fid"); fid = newfid(rpc->cli, rpc->t.afid); rpc->fid = fid; @@ -190,8 +192,11 @@ fid = newfid(rpc->cli, rpc->t.fid); rpc->fid = fid; afid = nil; - if(!noauth){ + if(!rpc->cli->noauth){ + if(catcherror()) + error("no auth fid: %r"); afid = getfid(rpc->cli, rpc->t.afid); + noerror(); if(catcherror()){ putfid(afid); error(nil); @@ -202,7 +207,7 @@ xauthread(afid, 0, buf); } fidattach(fid, rpc->t.aname, rpc->t.uname); - if(!noauth){ + if(!rpc->cli->noauth){ if(fid->uid != afid->uid) error("auth uid mismatch"); noerror(); @@ -210,9 +215,9 @@ } p = fid->p; f = p->f[p->nf-1]; - rwlock(f, Rd); + xrlock(f->mf); rpc->r.qid = mkqid(f); - rwunlock(f, Rd); + xrunlock(f->mf); if(rpc->cli->uid == -1) rpc->cli->uid = rpc->fid->uid; @@ -249,9 +254,9 @@ noerror(); p = nfid->p; nf = p->f[p->nf-1]; - rwlock(nf, Rd); + xrlock(nf->mf); rpc->r.wqid[i] = mkqid(nf); - rwunlock(nf, Rd); + xrunlock(nf->mf); rpc->r.nwqid++; USED(rpc->r.nwqid); /* damn error()s */ } @@ -280,9 +285,9 @@ rpc->r.iounit = rpc->cli->msize - IOHDRSZ; fidopen(rpc->fid, rpc->t.mode); f = fid->p->f[fid->p->nf-1]; - rwlock(f, Rd); + xrlock(f->mf); rpc->r.qid = mkqid(f); - rwunlock(f, Rd); + xrunlock(f->mf); } static void @@ -300,9 +305,9 @@ fidcreate(fid, rpc->t.name, rpc->t.mode, rpc->t.perm); p = fid->p; f = p->f[p->nf-1]; - rwlock(f, Rd); + xrlock(f->mf); rpc->r.qid = mkqid(f); - rwunlock(f, Rd); + xrunlock(f->mf); rpc->r.iounit = rpc->cli->msize-IOHDRSZ; } @@ -455,13 +460,13 @@ xqunlock(fid); error(nil); } - p = lastpath(&fid->p, fid->p->nf); + p = fid->p; f = p->f[p->nf-1]; - rwlock(f, Rd); + xrlock(f->mf); noerror(); xqunlock(fid); if(catcherror()){ - rwunlock(f, Rd); + xrunlock(f->mf); error(nil); } rpc->r.stat = rpc->data; @@ -469,7 +474,7 @@ if(rpc->r.nstat <= 2) fatal("rstat: convD2M"); noerror(); - rwunlock(f, Rd); + xrunlock(f->mf); } static void @@ -493,20 +498,16 @@ xqunlock(fid); error(nil); } - if(fs->worm) - error("read only file system"); - if(writedenied(fid->uid)) - error("user can't write"); + fidcanwrite(fid); p = fid->p; f = p->f[p->nf-1]; - if(fid->archived || isro(f)) - error("can't wstat archived or built-in files"); - p = meltedpath(&fid->p, fid->p->nf, 1); - f = p->f[p->nf-1]; + if(builtin(f)) + error("can't wstat built-in files"); + f = prenew(fid->p, fid->p->nf); noerror(); xqunlock(fid); if(catcherror()){ - rwunlock(f, Wr); + xwunlock(f->mf); error(nil); } @@ -517,14 +518,14 @@ error("can't resize a directory"); if(sd.length != 0) error("can't truncate to non-zero length"); - dfaccessok(f, fid->uid, AWRITE); + fidaccessok(fid, f, AWRITE); }else sd.length = ~0; if(sd.name[0] && strcmp(f->mf->name, sd.name) != 0){ - if(isro(f) || f == fs->active) - error("can't rename built-in files"); - dfaccessok(p->f[p->nf-2], fid->uid, AWRITE); + if(builtin(f)) + error("can't rename builtin files"); + fidaccessok(fid, p->f[p->nf-2], AWRITE); if(!catcherror()){ mbput(dfwalk(p->f[p->nf-2], sd.name)); error("file already exists"); @@ -533,12 +534,13 @@ sd.name[0] = 0; if(sd.uid[0] != 0 && strcmp(sd.uid, f->mf->uid) != 0){ - if(!allowed(f->d.uid)){ + if(!allowed(fid->uid)){ if(fid->uid != f->d.uid && !leader(f->d.gid, fid->uid)) error("not the owner or group leader"); if(!member(usrid(sd.uid), fid->uid) != 0) error("you are not a member"); } + chkusr(sd.uid); }else sd.uid[0] = 0; @@ -547,12 +549,13 @@ * Not std. in 9: leader must be member of the new gid, not * leader of the new gid. */ - if(!allowed(f->d.uid)){ + if(!allowed(fid->uid)){ if(fid->uid != f->d.uid && !leader(f->d.gid, fid->uid)) error("not the owner or group leader"); if(!member(usrid(sd.gid), fid->uid) != 0) error("you are not a member"); } + chkusr(sd.gid); }else sd.gid[0] = 0; @@ -561,15 +564,16 @@ * ignored otherwise. */ if(sd.muid[0] != 0 && strcmp(sd.muid, f->mf->muid) != 0){ - if(!allowed(f->d.uid)) + if(!allowed(fid->uid)) sd.muid[0] = 0; + chkusr(sd.muid); }else sd.muid[0] = 0; if(sd.mode != ~0 && f->d.mode != sd.mode){ if((sd.mode&DMBITS) != sd.mode) error("unknown bit set in mode"); - if(!allowed(f->d.uid)) + if(!allowed(fid->uid)) if(fid->uid != f->d.uid && !leader(f->d.gid, fid->uid)) error("not the owner or group leader"); if((sd.mode&DMDIR) ^ (f->d.mode&DMDIR)) @@ -580,14 +584,14 @@ /* * Not std. in 9: allowed users can also set atime. */ - if(sd.atime != ~0 && f->d.atime != sd.atime){ - if(!allowed(f->d.uid)) + if(sd.atime != ~0 && f->d.atime/NSPERSEC != sd.atime){ + if(!allowed(fid->uid)) sd.atime = ~0; /* ignore it */ }else sd.atime = ~0; - if(sd.mtime != ~0 && f->d.mtime != sd.mtime){ - if(!allowed(f->d.uid)) + if(sd.mtime != ~0 && f->d.mtime/NSPERSEC != sd.mtime){ + if(!allowed(fid->uid)) if(fid->uid != f->d.uid && !leader(f->d.gid, fid->uid)) error("not the owner or group leader"); }else @@ -599,22 +603,22 @@ if(sd.length != ~0) wstatint(f, "length", sd.length); if(sd.name[0]) - dfwattr(f, "name", sd.name); - if(sd.uid[0]) - dfwattr(f, "uid", sd.uid); - if(sd.gid[0]) - dfwattr(f, "gid", sd.gid); - if(sd.muid[0]) - dfwattr(f, "muid", sd.muid); + dfwattr(f, "name", sd.name); /* BUG: RACE */ if(sd.mode != ~0) wstatint(f, "mode", sd.mode); if(sd.atime != ~0) - wstatint(f, "atime", sd.atime); + wstatint(f, "atime", sd.atime*NSPERSEC); if(sd.mtime != ~0) - wstatint(f, "mtime", sd.mtime); - + wstatint(f, "mtime", sd.mtime*NSPERSEC); + if(sd.gid[0]) + dfwattr(f, "gid", sd.gid); + if(sd.uid[0]) + dfwattr(f, "uid", sd.uid); + if(sd.muid[0]) + dfwattr(f, "muid", sd.muid); noerror(); - rwunlock(f, Wr); + xwunlock(f->mf); + pchanged(p, p->nf-1, fid->uid); } static char* @@ -639,14 +643,12 @@ nerr = errstacksize(); - fspolicy(Pre); - rpc->r.tag = rpc->t.tag; rpc->r.type = rpc->t.type + 1; - quiescent(No); + xrlock(&fs->quiescence); if(catcherror()){ - quiescent(Yes); + xrunlock(&fs->quiescence); rpc->r.type = Rerror; rpc->r.ename = err; rerrstr(err, sizeof err); @@ -654,7 +656,7 @@ if(fs->halt != 0) error("file system halted"); fcalls[rpc->t.type](rpc); - quiescent(Yes); + xrunlock(&fs->quiescence); noerror(); } @@ -699,8 +701,6 @@ replied(rpc); freerpc(rpc); - - fspolicy(Post); dPprint("%s exiting\n", threadgetname()); --- /sys/src/cmd/creepy/9pix.c Thu Mar 29 22:49:56 2012 +++ /sys/src/cmd/creepy/9pix.c Thu May 10 20:02:56 2012 @@ -6,8 +6,8 @@ static void usage(void) { - fprint(2, "usage: %s [-DFLAGS] [-a] [-A addr] [-S srv] disk\n", argv0); - exits("usage"); + fprint(2, "usage: %s [-DFLAGS] [-ra] [-c n] [-A addr] [-S srv] disk\n", argv0); + threadexits("usage"); } int mainstacksize = Stack; @@ -16,9 +16,13 @@ threadmain(int argc, char *argv[]) { char *addr, *dev, *srv; + int noauth, mode, fsysmem; - addr = "tcp!*!9fs"; - srv = "9pix"; + noauth = 0; + mode = Normal; + fsysmem = 0; + srv = nil; + addr = nil; ARGBEGIN{ case 'A': addr = EARGF(usage()); @@ -26,9 +30,19 @@ case 'S': srv = EARGF(usage()); break; + case 'r': + dbg['W'] = 1; /* safety; report any write if buggy */ + mode = Rdonly; + break; case 'a': noauth = 1; break; + case 'c': + fsysmem = strtoul(EARGF(usage()), 0, 0); + if(fsysmem < 2 || fsysmem > Fsysmem/MiB) + sysfatal("mem size is too small or too large"); + fsysmem *= MiB; + break; default: if(ARGC() >= 'A' && ARGC() <= 'Z' || ARGC() == '9'){ dbg[ARGC()] = 1; @@ -38,10 +52,15 @@ }ARGEND; if(argc != 1) usage(); + if(srv == nil || addr == nil){ + addr = "tcp!*!9fs"; + srv = "9pix"; + } dev = argv[0]; if(dbg['d']) dbg['Z'] = 1; + threadsetname("9pix %s", dev); outofmemoryexits(1); workerthreadcreate = proccreate; fmtinstall('H', mbfmt); @@ -52,21 +71,20 @@ fmtinstall('R', rpcfmt); fmtinstall('A', usrfmt); fmtinstall('P', pathfmt); + fmtinstall('D', daddrfmt); + fmtinstall('N', namefmt); errinit(Errstack); if(catcherror()) fatal("uncatched error: %r"); rfork(RFNAMEG|RFNOTEG); - rwusers(nil); - fsopen(dev, Normal, Wr); + fsopen(dev, mode, fsysmem); if(srv != nil) - srv9pix(srv, cliworker9p); + srv9pix(srv, noauth, cliworker9p); if(addr != nil) - listen9pix(addr, cliworker9p); + listen9pix(addr, noauth, cliworker9p); consinit(); - proccreate(timeproc, nil, Stack); - proccreate(fssyncproc, nil, Stack); noerror(); threadexits(nil); } --- /sys/src/cmd/creepy/arch.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy/arch.c Fri May 11 13:44:47 2012 @@ -0,0 +1,376 @@ +#include "all.h" + +/* + * file system archival into a worm tree + */ + + +static Dir* +dfind(Dir *d, int nd, char *name) +{ + int i; + + for(i = 0; i < nd; i++) + if(strcmp(d[i].name, name) == 0) + return &d[i]; + return nil; +} + +static int +changed(Memblk *f, Dir *d) +{ + if(d == nil){ + dVprint("%N changed: new\n", f->mf->name); + return 1; + } + if(f->type == DBfile && f->d.length != d->length){ + dVprint("%N changed: size %ulld %ulld\n", + f->mf->name, f->d.length, d->length); + return 1; + } + if(f->d.mtime/NSPERSEC != d->mtime){ + dVprint("%N changed: mtime %ulld %uld\n", + f->mf->name, f->d.mtime/NSPERSEC, d->mtime); + return 1; + } + return 0; +} + +static int +metachanged(Memblk *f, Dir *d) +{ + if(changed(f, d)) + return 1; + if(strcmp(f->mf->uid, d->uid) != 0 || strcmp(f->mf->gid, d->gid) != 0){ + dVprint("%N metachanged: uid/gid\n", f->mf->name); + return 1; + } + if(strcmp(f->mf->muid, d->muid) != 0) + return 1; + return f->d.mode != d->mode; +} + +static void +archfile(Memblk *f) +{ + int fd; + Blksl sl; + ulong tot; + + dVprint("archfile %N %ulld bytes\n", f->mf->name, f->d.length); + fd = create(f->mf->name, OWRITE, f->d.mode); + if(fd < 0) + error("%N: %r", f->mf->name); + if(catcherror()){ + close(fd); + error(nil); + } + for(tot = 0;; tot += sl.len){ + sl = dfslice(f, Dblksz, tot, Rd); + if(sl.len == 0){ + assert(sl.b == nil); + break; + } + if(sl.data == nil){ + seek(fd, sl.len, 1); + assert(sl.b == nil); + continue; + } + if(write(fd, sl.data, sl.len) != sl.len){ + mbput(sl.b); + error("%N: write: %r", f->mf->name); + } + mbput(sl.b); + } + noerror(); + close(fd); +} + +static void +archmeta(Memblk *f, Dir *d) +{ + Dir nd; + + nulldir(&nd); + if(d == nil || f->d.mtime/NSPERSEC != d->mtime) + nd.mtime = f->d.mtime/NSPERSEC; + if(d == nil || f->d.mode != d->mode) + nd.mode = f->d.mode; + if(d == nil || strcmp(f->mf->uid, d->uid) != 0) + nd.uid = f->mf->uid; + if(d == nil || strcmp(f->mf->gid, d->gid) != 0) + nd.gid = f->mf->gid; + if(d == nil || strcmp(f->mf->muid, d->muid) != 0) + nd.muid = f->mf->muid; + + dVprint("archmeta: %N %M %ulld\n", f->mf->name, (ulong)f->d.mode, f->d.mtime); + if(dirwstat(f->mf->name, &nd) < 0) + warn("archmeta %N: %r", f->mf->name); /* ignore errors */ +} + +static void +archmkdir(char *dir) +{ + int fd; + + dVprint("archmkdir %N\n", dir); + fd = create(dir, OREAD, 0555|DMDIR); + if(fd < 0) + error("create %N: %r", dir); + close(fd); +} + +static void archdir(Memblk*); + +static void +archdent(daddrt addr, Dir *d, int nd) +{ + Memblk *f; + + if(catcherror()){ + warn("archdent: %D: %r", addr); + return; + } + f = dbget(DBfile, addr); + xrlock(f->mf); + dVprint("archdent %N\n", f->mf->name); + noerror(); + if(catcherror()){ + warn("archdent: %N: %r", f->mf->name); + xrunlock(f->mf); + mbput(f); + return; + } + d = dfind(d, nd, f->mf->name); + if(f->type == DBdir){ + if(d == nil) + archmkdir(f->mf->name); + if(chdir(f->mf->name) < 0) + error("cd: %r"); + if(catcherror()){ + if(chdir("..") < 0) + fatal("archdent: cd ..: %r"); + error(nil); + } + if(changed(f, d)) /* prune on mtime */ + archdir(f); + noerror(); + if(chdir("..") < 0) + fatal("archdent: cd ..: %r"); + }else if(changed(f, d)) + archfile(f); + if(metachanged(f, d)) + archmeta(f, d); + noerror(); + xrunlock(f->mf); + mbput(f); +} + +static void +archrm(char *name) +{ + if(remove(name) != -1) + return; + /* + * TODO: rip removes entire trees with just a single remove. + * For archiving into non-rip archives, + * if remove failed because the directory is not empty, + * recur and remove everything. + */ + warn("remove %N: %r", name); /* but ignore error */ +} + +/* + * b is a dir locked to archive in the current working directory. + */ +static void +archdir(Memblk *b) +{ + Dir *d; + int nd, fd, i; + Blksl sl; + Memblk *c; + vlong off; + daddrt *de; + static char buf[128]; + + dVprint("archdir %N\n", nil); + fd = open(".", OREAD); + if(fd < 0) + error("%r"); + nd = dirreadall(fd, &d); + close(fd); + if(nd < 0) + error("read: %r"); + if(catcherror()){ + free(d); + return; + } + /* remove from the archive files that are gone or changed DMDIR */ + for(i = 0; i < nd; i++){ + if(catcherror()){ + rerrstr(buf, sizeof buf); + if(strstr(buf, "not found") == nil){ /* actual error */ + warn("dfwarchdir: walk: %r"); + continue; + } + /* the file has been removed */ + archrm(d[i].name); + }else{ + c = dfwalk(b, d[i].name); + if(c->type == DBdir && (d[i].mode&DMDIR) == 0) + archrm(d[i].name); + if(c->type == DBfile && (d[i].mode&DMDIR) != 0) + archrm(d[i].name); + mbput(c); + noerror(); + } + } + /* archive new or modified files */ + for(off = 0;; off += sl.len){ + sl = dfslice(b, Dblksz, off, Rd); + if(sl.len == 0) + break; + if(sl.b == nil) + continue; + if(catcherror()){ + mbput(sl.b); + error(nil); + } + de = sl.data; + for(i = 0; i < sl.len/Daddrsz; i++) + if(de[i] != 0) + archdent(de[i], d, nd); + noerror(); + mbput(sl.b); + } + + noerror(); + free(d); +} + +static void +lastdir(char *dir, char *e) +{ + Dir *d; + int fd, nd, i, last; + + fd = open(dir, OREAD); + if(fd < 0) + error("%s: open: %r", dir); + nd = dirreadall(fd, &d); + close(fd); + if(nd <= 0) + error("%s: read: %r", dir); + last = 0; + for(i = 0; i < nd; i++) + if(d[last].mtime < d[i].mtime) + last = i; + seprint(dir+strlen(dir), e, "/%s", d[last].name); + free(d); +} + +static void +linkarch(char *last, char *arch) +{ + int fd; + char c[128]; + + if(strlen(last) < strlen(fs->archdir) || strlen(arch) < strlen(fs->archdir)) + error("bad arch path"); + last += strlen(fs->archdir); + arch += strlen(fs->archdir); + seprint(c, c+sizeof c, "%s/cons", fs->archdir); + dVprint("arch: %s -> link %s %s\n", c, last, arch); + fd = open(c, OWRITE); + if(fd < 0) + error("cons: %r"); + if(write(fd, "sync\n", 5) != 5) + error("archive sync: %r"); + seprint(c, c+sizeof c, "link %s %s\n", last, arch); + if(write(fd, c, strlen(c)) != strlen(c)){ + close(fd); + error("link: %r"); + } + close(fd); +} + +static void +mkdirp(char *dir) +{ + if(access(dir, AEXIST) < 0) + archmkdir(dir); +} + +static void +cdarchdir(void) +{ + char dir[80], last[80], *s, *e; + int n, y, m, d; + Tm *t; + + t = localtime(time(nil)); + y = t->year + 1900; + m = t->mon + 1; + d = t->mday; + e = dir+sizeof dir; + s = seprint(dir, e, "%s/root/%s", fs->archdir, fs->archname); + mkdirp(dir); + s = seprint(s, e, "/%04d", y); + mkdirp(dir); + s = seprint(s, e, "/%02d%02d", m, d); + for(n = 0; access(dir, AEXIST) == 0; n++) + seprint(s, e, ".%d", n); + + seprint(last, e, "%s/root/%s", fs->archdir, fs->archname); + lastdir(last, e); /* yyyy */ + lastdir(last, e); /* mmdd */ + linkarch(last, dir); + if(chdir(dir) < 0) + error("chdir %s: %r", dir); + warn("archiving at %s...", dir); +} + +void +fsarchive(void) +{ + daddrt addr; + Memblk *b; + + xrlock(&fs->dquiescence); + xqlock(&fs->archlk); + dVprint("fsarchive...\n"); + xqlock(&fs->superlk); + addr = fs->super->d.root; + xqunlock(&fs->superlk); + if(catcherror()){ + xqunlock(&fs->archlk); + xrunlock(&fs->dquiescence); + warn("archive: %r"); + return; + } + + cdarchdir(); + + b = dbget(DBdir, addr); + xrlock(b->mf); + if(b->state != MBclean){ + xrunlock(b->mf); + error("not synced"); + } + if(catcherror()){ + xrunlock(b->mf); + mbput(b); + error(nil); + } + + archdir(b); + + noerror(); + xrunlock(b->mf); + mbput(b); + noerror(); + dVprint("fsarchive: done\n"); + xqunlock(&fs->archlk); + xrunlock(&fs->dquiescence); +} --- /sys/src/cmd/creepy/attr.c Wed Apr 25 11:11:02 2012 +++ /sys/src/cmd/creepy/attr.c Fri May 11 13:27:40 2012 @@ -21,6 +21,7 @@ static long ratime(Memblk*, char*, long); static long rgid(Memblk*, char*, long); static long rid(Memblk*, char*, long); +static long rprev(Memblk*, char*, long); static long rlength(Memblk*, char*, long); static long rmode(Memblk*, char*, long); static long rmtime(Memblk*, char*, long); @@ -41,12 +42,13 @@ { {"name", 0, wname, rname}, {"id", BIT64SZ, nil, rid}, + {"prev", BIT64SZ, nil, rprev}, {"atime", BIT64SZ, watime, ratime}, {"mtime", BIT64SZ, wmtime, rmtime}, {"length", BIT64SZ, wlength, rlength}, {"uid", 0, wuid, ruid}, {"gid", 0, wgid, rgid}, - {"muid", 0, wuid, ruid}, + {"muid", 0, wmuid, rmuid}, {"mode", BIT64SZ, wmode, rmode}, {"*", 0, nil, rstar}, }; @@ -132,6 +134,7 @@ f->mf->name = val; maxsz = embedattrsz(f); if(metasize(f) > maxsz){ + ainc(&fs->nnoattrsz); f->mf->name = old; warnerror("no room to grow metadata"); } @@ -175,7 +178,7 @@ return rstr(f->mf->uid, buf, len); } -static u64int +u64int chkusr(char *buf) { int id; @@ -190,6 +193,7 @@ wuid(Memblk *f, char *buf) { f->d.uid = chkusr(buf); + f->mf->uid = usrname(f->d.uid); return strlen(buf)+1; } @@ -203,6 +207,7 @@ wgid(Memblk *f, char *buf) { f->d.gid = chkusr(buf); + f->mf->gid = usrname(f->d.gid); return strlen(buf)+1; } @@ -216,6 +221,7 @@ wmuid(Memblk *f, char *buf) { f->d.muid = chkusr(buf); + f->mf->muid = usrname(f->d.muid); return strlen(buf)+1; } @@ -245,6 +251,12 @@ } static long +rprev(Memblk *f, char *buf, long n) +{ + return ru64int(f->d.prev, buf, n); +} + +static long watime(Memblk *f, char *buf) { f->d.atime = chku64int(buf); @@ -283,36 +295,14 @@ return ru64int(f->d.mtime, buf, n); } -static uvlong -resized(Memblk *f, uvlong sz) -{ - ulong boff, bno, bend, doff; - - if(f->d.mode&DMDIR) - error("can't resize a directory"); - - if(sz > maxfsz) - error("max file size exceeded"); - if(sz >= f->d.length) - return sz; - bno = dfbno(f, sz, &boff); - if(boff > 0) - bno++; - bend = dfbno(f, sz, &boff); - if(boff > 0) - bend++; - doff = embedattrsz(f); - if(doff < Embedsz) - memset(f->d.embed+doff, 0, Embedsz-doff); - dfdropblks(f, bno, bend); - return sz; -} - static long wlength(Memblk *f, char *buf) { - f->d.length = chku64int(buf); - resized(f, f->d.length); + if(f->d.mode&DMDIR) + error("can't resize a directory"); + if(chku64int(buf) != 0) + error("can't resize to non-zero sizes"); + dftruncate(f); return strlen(buf)+1; } @@ -344,12 +334,6 @@ int i; long tot; - isfile(f); - ismelted(f); - isrwlocked(f, Wr); - if(fsdiskfree() < Dzerofree) - error("disk full"); - for(i = 0; i < nelem(adef); i++) if(strcmp(adef[i].name, name) == 0) break; @@ -358,7 +342,6 @@ if(adef[i].wattr == nil) error("can't write %s", name); tot = adef[i].wattr(f, val); - changed(f); return tot; } @@ -368,8 +351,6 @@ int i; long tot; - isfile(f); - isrwlocked(f, Rd); for(i = 0; i < nelem(adef); i++) if(strcmp(adef[i].name, name) == 0) break; @@ -424,9 +405,6 @@ int i; char buf[128]; - isfile(f); - isrwlocked(f, Rd); - dfrattr(f, name, buf, sizeof buf); for(i = 0; i < nelem(adef); i++) @@ -435,42 +413,4 @@ if(i == nelem(adef)) error("no such attribute"); cstring(f, op, buf, val); -} - -/* - * Does not check if the user can't write because of the "write" - * user. - * Does check if the user is allowed in config mode. - */ -void -dfaccessok(Memblk *f, int uid, int bits) -{ - uint mode; - - if(allowed(uid)) - return; - - bits &= 3; - - mode = f->d.mode &0777; - - if((mode&bits) == bits) - return; - mode >>= 3; - - if(member(f->d.gid, uid) && (mode&bits) == bits) - return; - mode >>= 3; - if(f->d.uid == uid && (mode&bits) == bits) - return; - - /* - * The process owner and elf can always access the console - * to configure the file system. - */ - if(f == fs->cons) - if(uid == usrid("elf") || uid == usrid(getuser())) - return; - - error("permission denied"); } --- /sys/src/cmd/creepy/cfg.c Wed Apr 25 11:11:02 2012 +++ /sys/src/cmd/creepy/cfg.c Fri May 11 16:23:57 2012 @@ -1,9 +1,6 @@ #include "all.h" /* - * Locking is coarse, only functions used from outside - * care to lock the user information. - * * Access checks are like those described in Plan 9's stat(5), but for: * * - to change gid, the group leader is not required to be a leader @@ -20,8 +17,8 @@ * uid is a number. * * This program insists on preserving uids already seen. - * That is, after editing /active/adm/users, the server program will notice - * and re-read the file, then clean it up, and upate its contents. + * That is, after editing /root/users, the server program will notice + * and re-read the file, then clean it up, and rewrite it. * * Cleaning ensures that uids for known users are kept as they were, and * that users not yet seen get unique uids. Numeric uids are only an internal @@ -38,12 +35,6 @@ "4:glenda:glenda:\n" "5:elf:elf:sys\n"; -static RWLock ulk; -static Usr *uids[Uhashsz]; -static Usr *unames[Uhashsz]; -static Usr *uwrite; -static int uidgen; - static uint usrhash(char* s) { @@ -73,7 +64,7 @@ { Usr *u; - for(u = uids[uid%Uhashsz]; u != nil; u = u->inext) + for(u = fs->uids[uid%Uhashsz]; u != nil; u = u->inext) if(u->id == uid) return u; return nil; @@ -86,15 +77,15 @@ uint h; h = usrhash(name); - for(u = unames[h]; u != nil; u = u->nnext) + for(u = fs->unames[h]; u != nil; u = u->nnext) if(strcmp(u->name, name) == 0) return u; if(mkit){ /* might be leaked. see freeusr() */ u = mallocz(sizeof *u, 1); strecpy(u->name, u->name+sizeof u->name, name); - u->nnext = unames[h]; - unames[h] = u; + u->nnext = fs->unames[h]; + fs->unames[h] = u; } return u; } @@ -104,13 +95,13 @@ { Usr *u; - xrwlock(&ulk, Rd); + xrlock(&fs->Usrs); u = finduid(uid); if(u == nil){ - xrwunlock(&ulk, Rd); /* zero patatero: */ + xrunlock(&fs->Usrs); /* zero patatero: */ return "ZP"; /* disgusting, isn't it? */ } - xrwunlock(&ulk, Rd); + xrunlock(&fs->Usrs); return u->name; } @@ -119,13 +110,13 @@ { Usr *u; - xrwlock(&ulk, Rd); + xrlock(&fs->Usrs); u = finduname(n, Dontmk); if(u == nil || !u->enabled){ - xrwunlock(&ulk, Rd); + xrunlock(&fs->Usrs); return -1; } - xrwunlock(&ulk, Rd); + xrunlock(&fs->Usrs); return u->id; } @@ -137,11 +128,11 @@ if(uid == member) return 1; - xrwlock(&ulk, Rd); + xrlock(&fs->Usrs); u = finduid(uid); r = u != nil && u->lead != nil && u->lead->id == member; r |= u != nil && findmember(u, member); - xrwunlock(&ulk, Rd); + xrunlock(&fs->Usrs); return r; } @@ -151,7 +142,7 @@ Usr *u; int r; - xrwlock(&ulk, Rd); + xrlock(&fs->Usrs); u = finduid(gid); r = 0; if(u != nil) @@ -159,7 +150,7 @@ r = u->lead->id == lead; else r = findmember(u, lead); - xrwunlock(&ulk, Rd); + xrunlock(&fs->Usrs); return r; } @@ -182,7 +173,6 @@ for(ml = &u->members; (m = *ml) != nil; ml = &m->next) if(strcmp(m->u->name, n) == 0){ - xrwunlock(&ulk, Wr); warn("'%s' is already a member of '%s'", n, u->name); return; } @@ -232,8 +222,8 @@ int i; Usr *usr; - for(i = 0; i < nelem(uids); i++) - for(usr = uids[i]; usr != nil; usr = usr->inext){ + for(i = 0; i < nelem(fs->uids); i++) + for(usr = fs->uids[i]; usr != nil; usr = usr->inext){ fprint(2, "%A\n", usr); } } @@ -254,13 +244,13 @@ u = finduname(name, Mkit); if(u->id == 0){ /* first seen! */ - u->id = ++uidgen; + u->id = ++fs->uidgen; h = u->id%Uhashsz; - u->inext = uids[h]; - uids[h] = u; + u->inext = fs->uids[h]; + fs->uids[h] = u; } if(strcmp(name, "write") == 0) - uwrite = u; + fs->uwrite = u; return u; } @@ -366,14 +356,14 @@ int i; Usr *usr; - xrwlock(&ulk, Wr); + xwlock(&fs->Usrs); if(catcherror()){ warn("users: %r"); goto update; } if(uf == nil){ rwdefaultusers(); - xrwunlock(&ulk, Wr); + xwunlock(&fs->Usrs); return; } tot = 0; @@ -402,23 +392,21 @@ warn("[%s]", p); } noerror(); - if(uf->frozen){ /* loaded at boot time */ - xrwunlock(&ulk, Wr); + if(uf->state == MBclean){ /* loaded at boot time */ + xwunlock(&fs->Usrs); return; } update: if(catcherror()){ - xrwunlock(&ulk, Wr); + xwunlock(&fs->Usrs); warn("users: %r\n"); return; /* what could we do? */ } - ismelted(uf); - isrwlocked(uf, Wr); wstatint(uf, "length", 0); off = 0; dprint("users updated:\n"); - for(i = 0; i < uidgen; i++) + for(i = 0; i < fs->uidgen; i++) if((usr=finduid(i)) != nil){ dprint("%A\n", usr); p = seprint(ubuf, ubuf+sizeof ubuf, "%A\n", usr); @@ -426,7 +414,7 @@ off += nw; } noerror(); - xrwunlock(&ulk, Wr); + xwunlock(&fs->Usrs); } int @@ -434,11 +422,11 @@ { int r; - if(uwrite == nil) + if(fs->uwrite == nil) return 0; - xrwlock(&ulk, Rd); - r = findmember(uwrite, uid) == 0; - xrwunlock(&ulk, Rd); + xrlock(&fs->Usrs); + r = findmember(fs->uwrite, uid) == 0; + xrunlock(&fs->Usrs); return r; } @@ -448,25 +436,18 @@ Usr *u; int r; - xrwlock(&ulk, Rd); + xrlock(&fs->Usrs); u = finduid(uid); r = 0; if(u) r = u->allow; - xrwunlock(&ulk, Rd); + xrunlock(&fs->Usrs); return r; } /* - * TODO: register multiple fids for the cons file by keeping a list - * of console channels. - * consread will have to read from its per-fid channel. - * conprint will have to bcast to all channels. - * - * With that, multiple users can share the same console. - * Although perhaps it would be easier to use C in that case. + * TODO: multiple fids for the cons file. */ - void consprint(char *fmt, ...) { @@ -476,10 +457,15 @@ va_start(arg, fmt); s = vsmprint(fmt, arg); va_end(arg); - /* consume some message if the channel is full */ - while(nbsendp(fs->consc, s) == 0) - if((x = nbrecvp(fs->consc)) != nil) + /* consume cons buffer if the channel is full */ + if(nbsendp(fs->consc, s) == 0){ + while((x = nbrecvp(fs->consc)) != nil) free(x); + if(nbsendp(fs->consc, s) == 0){ + warn("cons: %s", s); + free(s); + } + } } long @@ -490,9 +476,9 @@ if(count <= 0) /* shouldn't happen */ return 0; - quiescent(Yes); + xrunlock(&fs->quiescence); s = recvp(fs->consc); - quiescent(No); + xrlock(&fs->quiescence); tot = 0; do{ nr = strlen(s); @@ -514,19 +500,19 @@ static void cdump(int argc, char *argv[]) { - switch(argc){ - case 1: - fsdump(0, strcmp(argv[0], "dumpall") == 0); + char *argv0; + int verb; + + verb = 0; + argv0 = argv[0]; + ARGBEGIN{ + case 'l': + verb = 1; break; - case 2: - if(strcmp(argv[1], "-l") == 0){ - fsdump(1, strcmp(argv[0], "dumpall") == 0); - break; - } - /*fall*/ default: - error("usage: %s [-l]\n", argv[0]); - } + error("usage: %s [-l]\n", argv0); + }ARGEND; + fsdump(verb, strcmp(argv0, "dumpall") == 0); } static void @@ -549,53 +535,78 @@ int i; Usr *usr; - xrwlock(&ulk, Rd); + xrlock(&fs->Usrs); if(catcherror()){ - xrwunlock(&ulk, Rd); + xrunlock(&fs->Usrs); error(nil); } - for(i = 0; i < uidgen; i++) + for(i = 0; i < fs->uidgen; i++) if((usr=finduid(i)) != nil) consprint("%A\n", usr); noerror(); - xrwunlock(&ulk, Rd); + xrunlock(&fs->Usrs); +} + +static void +cuname(int, char *argv[]) +{ + Usr *usr; + + xrlock(&fs->Usrs); + if(catcherror()){ + xrunlock(&fs->Usrs); + error(nil); + } + usr = finduname(argv[1], Dontmk); + if(usr == nil) + error("no such user\n"); + else + consprint("%A\n", usr); + noerror(); + xrunlock(&fs->Usrs); } static void cstats(int argc, char *argv[]) { int clr, verb; - char *s; + char *argv0; + argv0 = argv[0]; clr = verb = 0; - if(argc == 2 && *argv[1] == '-'){ - for(s = argv[1]+1; *s; s++) - switch(*s){ - case -'c': - clr = 1; - break; - case 'v': - verb = 1; - break; - default: - usage: - error("usage: %s [-cv]\n", argv[0]); - return; - } - argc--; - } - if(argc != 1) - goto usage; + ARGBEGIN{ + case 'c': + clr = 1; + break; + case 'v': + verb = 1; + break; + default: + error("usage: %s [-cv]\n", argv0); + }ARGEND + if(argc != 0) + error("usage: %s [-cv]\n", argv0); consprint("%s\n", updatestats(clr, verb)); } static void -cdebug(int, char *argv[]) +cdebug(int argc, char *argv[]) { char *f; char flags[50]; int i; + if(argc != 1 && argc != 2) + error("usage: %s [[+-]FLAGS | on | off]\n", argv[0]); + if(argc == 1){ + f = flags; + for(i = 0; i < 256 && f < flags+sizeof flags -1; i++) + if(dbg[i]) + *f++ = i; + *f = 0; + consprint("debug %s\n", flags); + return; + } f = argv[1]; if(strcmp(f, "on") == 0){ dbg['D'] = 1; @@ -619,7 +630,7 @@ if(dbg[i]) *f++ = i; *f = 0; - consprint("debug = '%s'\n", flags); + consprint("debug %s\n", flags); } @@ -646,44 +657,24 @@ crwerr(int, char *argv[]) { if(*argv[0] == 'r'){ - swreaderr = atoi(argv[1]); - warn("sw read err count = %d", swreaderr); + fs->swreaderr = atoi(argv[1]); + warn("sw read err count = %d", fs->swreaderr); }else{ - swwriteerr = atoi(argv[1]); - warn("sw write err count = %d", swwriteerr); + fs->swwriteerr = atoi(argv[1]); + warn("sw write err count = %d", fs->swwriteerr); } } -static void -ccheck(int argc, char *argv[]) -{ - switch(argc){ - case 1: - fscheck(); - break; - case 2: - if(strcmp(argv[1], "-v") == 0){ - if(fscheck() > 0) - fsdump(1, 0); - }else - error("usage: %s [-v]\n", argv[0]); - break; - default: - error("usage: %s [-v]\n", argv[0]); - } -} - -static void -clru(int, char**) +void +allowuid(int uid) { - fslru(); -} - + Usr *usr; -static void -creclaim(int, char**) -{ - fsreclaim(); + xwlock(&fs->Usrs); + usr = finduid(uid); + if(usr != nil) + usr->allow = 1; + xwunlock(&fs->Usrs); } static void @@ -696,33 +687,155 @@ switch(argc){ case 1: if(*argv[0] == 'd') - for(i = 0; i < nelem(uids); i++) - for(u = uids[i]; u != nil; u = u->inext) + for(i = 0; i < nelem(fs->uids); i++) + for(u = fs->uids[i]; u != nil; u = u->inext) u->allow = 0; break; case 2: - xrwlock(&ulk, Wr); + xwlock(&fs->Usrs); usr = finduname(argv[1], Dontmk); if(usr == nil){ - xrwunlock(&ulk, Wr); - consprint("user not found\n"); - return; + xwunlock(&fs->Usrs); + error("user not found"); } usr->allow = (*argv[0] == 'a'); - xrwunlock(&ulk, Wr); + xwunlock(&fs->Usrs); break; default: consprint("usage: %s [uid]\n", argv[0]); return; } - xrwlock(&ulk, Rd); - for(i = 0; i < nelem(uids); i++) - for(u = uids[i]; u != nil; u = u->inext) + xrlock(&fs->Usrs); + for(i = 0; i < nelem(fs->uids); i++) + for(u = fs->uids[i]; u != nil; u = u->inext) if(u->allow) consprint("user '%s' is allowed\n", u->name); else if(u == usr) consprint("user '%s' is not allowed\n", u->name); - xrwunlock(&ulk, Rd); + xrunlock(&fs->Usrs); +} + +static int +mountrip(void) +{ + int rv, afd; + AuthInfo *ai; + int fd; + + + warn("mounting rip..."); + fd = open("/srv/rip", ORDWR); + if(fd < 0) + return -1; + + afd = fauth(fd, "wormwr"); + if(afd >= 0){ + ai = auth_proxy(afd, amount_getkey, "proto=p9any role=client %s", ""); + if(ai != nil) + auth_freeAI(ai); + else + warn("auth_proxy: %r"); + } + rv = mount(fd, afd, "/n/rip", MREPL|MCREATE, "wormwr"); + if(afd >= 0) + close(afd); + return rv; +} + +static int +canwritearch(char *dir) +{ + char *t; + int fd; + + t = smprint("%s/root/_", dir); + fd = create(t, OWRITE|ORCLOSE, 0664); + if(fd < 0){ + if(strcmp(dir, "/n/rip") == 0){ /* try to mount */ + if(mountrip() < 0){ + free(t); + return -1; + } + fd = create(t, OWRITE|ORCLOSE, 0664); + free(t); + if(fd < 0) + return -1; + close(fd); + return 0; + } + free(t); + return -1; + } + close(fd); + return 0; +} + +static void +carch(int argc, char *argv[]) +{ + int h; + char *path; + + if(fs->mode == Worm) + error("%s allowed only in non-worm mode\n", argv[0]); + if(strcmp(argv[0], "dontarch") == 0){ + if(xcanqlock(&fs->archlk) == 0) + error("archive in progress. try again later."); + fs->archt = 0; + fs->archhour = 0; + fs->archdir = nil; /* leaked */ + fs->archname = nil; /* leaked */ + xqunlock(&fs->archlk); + consprint("fsys not being archived\n"); + return; + } + if(argc == 1){ + if(fs->archdir == nil) + consprint("not archived\n"); + else + consprint("archived at %d:00 %s/root/%s\n", + fs->archhour, fs->archdir, fs->archname); + return; + } + if(argc == 2 && strcmp(argv[1], "now") == 0){ + if(fs->archdir == nil){ + consprint("not archived\n"); + return; + } + consprint("archive scheduled\n"); + fs->archt = time(nil); + return; + } + path = "/n/rip"; + switch(argc){ + case 3: + break; + case 4: + path = argv[3]; + break; + default: + error("usage: %s [name hour [path] | now]", argv[0]); + } + h = atoi(argv[2]); + if(h < 0 || h >= 24) + error("invalid hour %d\n", h); + if(canwritearch(path) < 0) + error("can't write archive: %r\n"); + if(xcanqlock(&fs->archlk) == 0) + error("archive in progress. try again later."); + fs->archdir = strdup(path); + fs->archname = strdup(argv[1]); + fs->archhour = h; + fs->archt = nextime(time(nil), fs->archhour); + xqunlock(&fs->archlk); + consprint("archived at %d:00 %s/root/%s\n", + fs->archhour, fs->archdir, fs->archname); +} + +static void +cwho(int, char**) +{ + consprintclients(); } static void @@ -730,66 +843,95 @@ { Path *dp, *sp; Memblk *df, *sf, *nf; - daddrt naddr; + char *nm; - if(fs->worm == 0) + if(fs->mode != Worm) error("%s allowed only in worm mode\n", argv[0]); sp = walkto(argv[1], nil); if(catcherror()){ putpath(sp); error(nil); } - dp = walkto(argv[2], nil); + sf = sp->f[sp->nf-1]; + xrlock(sf->mf); + if(catcherror()){ + xrunlock(sf->mf); + error(nil); + } + xqlock(&sf->slk); + if(sf->state != MBclean){ + xqunlock(&sf->slk); + error("source is not frozen"); + } + xqunlock(&sf->slk); + + dp = walkto(argv[2], &nm); if(catcherror()){ - putpath(sp); + putpath(dp); error(nil); } - meltedpath(&dp, dp->nf, 1); - sf = sp->f[sp->nf-1]; + prenew(dp, dp->nf); df = dp->f[dp->nf-1]; - naddr = 0; + if(catcherror()){ + xwunlock(df->mf); + error(nil); + } if(!catcherror()){ - nf = dfwalk(df, sf->mf->name); - naddr = nf->addr; + nf = dfwalk(df, nm); mbput(nf); noerror(); + error("'%s' already exists", nm); } - dfchdentry(df, naddr, sf->addr); + nf = dfcreate(df, nm, sf->d.uid, sf->d.mode); + addelem(&dp, nf); + mbput(nf); + nf->d = sf->d; /* in-place: copy all the pointers and info */ + wname(nf, nm); /* but keep the desired name */ + + noerror(); /* nf */ + xwunlock(df->mf); + noerror(); + putpath(dp); + + noerror(); + xrunlock(sf->mf); + noerror(); putpath(sp); - putpath(dp); } static void -cwho(int, char**) +ccheck(int, char*[]) { - consprintclients(); + fscheck(); } static void chelp(int, char**); static Cmd cmds[] = { - {"dump", cdump, 0, "dump [-l]"}, - {"dumpall", cdump, 0, "dumpall [-l]"}, - {"stats", cstats, 0, "stats [-c]"}, - {"sync", csync, 1, "sync"}, - {"halt", chalt, 1, "halt"}, - {"users", cusers, 1, "users"}, - {"debug", cdebug, 2, "cdebug [+-]FLAGS | on | off"}, - {"locks", clocks, 2, "locks [on|off|dump]"}, - {"fids", cfids, 1, "fids"}, - {"rerr", crwerr, 2, "rerr n"}, - {"werr", crwerr, 2, "werr n"}, - {"check", ccheck, 0, "check"}, - {"lru", clru, 1, "lru"}, - {"reclaim", creclaim, 1, "reclaim"}, - {"allow", callow, 0, "allow [uid]"}, - {"disallow", callow, 0, "disallow [uid]"}, - {"who", cwho, 1, "who"}, - {"link", clink, 3, "link old new"}, - {"?", chelp, 1, "?"}, + {"allow", callow, 0, "allow [uid]"}, + {"disallow", callow, 0, "disallow [uid]"}, + {"halt", chalt, 1, "halt", 1}, + {"stats", cstats, 0, "stats [-c]"}, + {"sync", csync, 1, "sync", 1}, + {"uname", cuname, 2, "uname uid"}, + {"users", cusers, 1, "users"}, + {"who", cwho, 1, "who"}, + {"arch", carch, 0, "arch [name hour [path] | now]", 1}, + {"dontarch", carch, 1, "dontarch", 1}, + {"link", clink, 3, "link file dir", 1}, + /* these will go in the future */ + {"check", ccheck, 1, "check"}, + {"debug", cdebug, 0, "debug [[+-]FLAGS | on | off]"}, + {"dump", cdump, 0, "dump [-l]"}, + {"dumpall", cdump, 0, "dumpall [-l]"}, + {"fids", cfids, 1, "fids"}, + {"locks", clocks, 2, "locks [on|off|dump]"}, + {"rerr", crwerr, 2, "rerr n"}, + {"werr", crwerr, 2, "werr n"}, + {"?", chelp, 1, "?"}, }; static void @@ -809,11 +951,63 @@ consprint("creepy> "); } +static void +xconsctl(char *p, int raise) +{ + char *args[5]; + int nargs, i; + + nargs = tokenize(p, args, nelem(args)); + if(nargs < 1) + return; + for(i = 0; i < nelem(cmds); i++){ + if(strcmp(args[0], cmds[i].name) != 0) + continue; + xrunlock(&fs->quiescence); + if(catcherror()){ + xrlock(&fs->quiescence); + if(raise) + error(nil); + consprint("%r\n"); + break; + } + if(cmds[i].nargs != 0 && cmds[i].nargs != nargs){ + if(raise) + error("usage: %s", cmds[i].usage); + consprint("usage: %s\n", cmds[i].usage); + }else{ + if(raise && cmds[i].isctl == 0) + error("command not available as a ctl"); + cmds[i].f(nargs, args); + } + noerror(); + xrlock(&fs->quiescence); + break; + } + if(i == nelem(cmds)){ + if(raise) + error("bad ctl"); + consprint("'%s'?\n", args[0]); + } +} + +long +consctl(char *p, long count) +{ + if(count == 0) + return 0; + p[count] = 0; + if(p[count-1] == '\n') + p[count-1] = 0; + xconsctl(p, 1); + return count; +} + long conswrite(char *ubuf, long count) { - char *c, *p, *np, *args[5]; - int nargs, i, nr; + char *c, *p, *np; + int i, nr; Rune r; static char buf[80]; static char *s, *e; @@ -827,11 +1021,14 @@ for(i = 0; i < count && s < e-UTFmax-1; i += nr){ nr = chartorune(&r, ubuf+i); memmove(s, ubuf+i, nr); - s += nr; - consprint("%C", r); + consprint("%C", r, r); + if(r == 0x08 && s > buf) /* ^H */ + s--; + else + s += nr; } *s = 0; - if(s == e-1){ + if(s >= e-UTFmax-1){ s = buf; *s = 0; error("command is too large"); @@ -846,28 +1043,7 @@ c = utfrune(p, '#'); if(c != nil) *c = 0; - nargs = tokenize(p, args, nelem(args)); - if(nargs < 1) - continue; - for(i = 0; i < nelem(cmds); i++){ - if(strcmp(args[0], cmds[i].name) != 0) - continue; - quiescent(Yes); - if(catcherror()){ - quiescent(No); - consprint("%r\n"); - break; - } - if(cmds[i].nargs != 0 && cmds[i].nargs != nargs) - consprint("usage: %s\n", cmds[i].usage); - else - cmds[i].f(nargs, args); - noerror(); - quiescent(No); - break; - } - if(i == nelem(cmds)) - consprint("'%s'?\n", args[0]); + xconsctl(p, 0); }while((p = np) != nil); s = buf; *s = 0; --- /sys/src/cmd/creepy/cmd.c Fri Apr 13 11:47:45 2012 +++ /sys/src/cmd/creepy/cmd.c Thu May 10 10:59:13 2012 @@ -1,9 +1,9 @@ #include "all.h" /* - * HUGE warning: + * CAUTION: * these commands do not perform checks at all. - * that means you know what you are doing if you use them. + * You must know what you are doing if you use them. * e.g., you can create multiple files with the same name * in the same directory. * @@ -43,11 +43,6 @@ } void -meltfids(void) -{ -} - -void rwusers(Memblk*) { } @@ -64,11 +59,6 @@ return s; } -void -countfidrefs(void) -{ -} - /* * Walks elems starting at f. * Ok if nelems is 0. @@ -90,13 +80,6 @@ fsdir = strdup(argv[1]); } -/* - * This is unrealistic in that it keeps the file locked - * during the entire put. This means that we can only give - * fslru() a chance before each put, and not before each - * write, because everything is going to be in use and dirty if - * we run out of memory. - */ static void fsput(int, char *argv[]) { @@ -129,32 +112,29 @@ putpath(p); error(nil); } - meltedpath(&p, p->nf, 1); - m = p->f[p->nf-1]; + m = prenew(p, p->nf); if(catcherror()){ - rwunlock(m, Wr); + xwunlock(m->mf); error(nil); } f = dfcreate(m, fn, usrid(d->uid), d->mode&(DMDIR|0777)); noerror(); addelem(&p, f); - decref(f); /* kept now in p */ - rwlock(f, Wr); - rwunlock(m, Wr); + mbput(f); /* kept now in p */ + xwlock(f->mf); + xwunlock(m->mf); if(catcherror()){ - rwunlock(f, Wr); + xwunlock(f->mf); error(nil); } if((d->mode&DMDIR) == 0){ off = 0; for(;;){ - if(fsmemfree() < Mminfree) - fslru(); nr = read(fd, buf, sizeof buf); if(nr <= 0) break; nw = dfpwrite(f, buf, nr, &off); - dprint("wrote %ld of %ld bytes\n", nw, nr); + dprint("%s: wrote %ld of %ld bytes\n", argv[0], nw, nr); off += nr; } } @@ -162,8 +142,8 @@ noerror(); noerror(); if(verb) - print("created %H\nat %H\n", f, m); - rwunlock(f, Wr); + print("%s: %H\nat: %H\n", argv[0], f, m); + xwunlock(f->mf); free(nm); putpath(p); close(fd); @@ -187,30 +167,29 @@ } p = walkto(nm, nil); f = p->f[p->nf-1]; - rwlock(f, Rd); + xrlock(f->mf); if(catcherror()){ - rwunlock(f, Rd); + xrunlock(f->mf); putpath(p); error(nil); } m = f->mf; - print("cat %-30s\t%M\t%5ulld\t%s %ulld refs\n", - m->name, (ulong)f->d.mode, f->d.length, m->uid, dbgetref(f->addr)); + print("cat %-30s\t%M\t%5ulld\t%s\n", + m->name, (ulong)f->d.mode, f->d.length, m->uid); if((f->d.mode&DMDIR) == 0){ off = 0; for(;;){ - if(fsmemfree() < Mminfree) - fslru(); nr = dfpread(f, buf, sizeof buf, off); if(nr <= 0) break; - write(1, buf, nr); + if(write(1, buf, nr) != nr) + break; off += nr; } } noerror(); noerror(); - rwunlock(f, Rd); + xrunlock(f->mf); putpath(p); free(nm); } @@ -237,20 +216,18 @@ } p = walkto(nm, nil); f = p->f[p->nf-1]; - rwlock(f, Rd); + xrlock(f->mf); if(catcherror()){ - rwunlock(f, Rd); + xrunlock(f->mf); putpath(p); error(nil); } m = f->mf; - print("get %-30s\t%M\t%5ulld\t%s %ulld refs\n", - m->name, (ulong)f->d.mode, f->d.length, m->uid, dbgetref(f->addr)); + print("get %-30s\t%M\t%5ulld\t%s\n", + m->name, (ulong)f->d.mode, f->d.length, m->uid); if((f->d.mode&DMDIR) == 0){ off = 0; for(;;){ - if(fsmemfree() < Mminfree) - fslru(); nr = dfpread(f, buf, sizeof buf, off); if(nr <= 0) break; @@ -264,7 +241,7 @@ close(fd); noerror(); noerror(); - rwunlock(f, Rd); + xrunlock(f->mf); putpath(p); free(nm); } @@ -276,13 +253,6 @@ } static void -fsrcl(int, char**) -{ - fsreclaim(); - fssync(); /* commit changes to disk */ -} - -static void fsdmp(int, char *argv[]) { fsdump(*argv[0] == 'l', strstr(argv[0], "all") != 0); @@ -299,12 +269,6 @@ } static void -fsout(int, char*[]) -{ - fslru(); -} - -static void fsrm(int, char *argv[]) { Memblk *f, *pf; @@ -323,21 +287,20 @@ } if(p->nf < 2) error("short path for rm"); - meltedpath(&p, p->nf-1, 1); f = p->f[p->nf-1]; - pf = p->f[p->nf-2]; - rwlock(f, Wr); + pf = prenew(p, p->nf-1); + xwlock(f->mf); if(catcherror()){ - rwunlock(f, Wr); - rwunlock(pf, Wr); + xwunlock(f->mf); + xwunlock(pf->mf); error(nil); } dfremove(pf, f); - p->f[p->nf-1] = nil; + p->nf--; noerror(); noerror(); noerror(); - rwunlock(pf, Wr); + xwunlock(pf->mf); putpath(p); free(nm); } @@ -349,35 +312,35 @@ } static void -fschk(int, char**) -{ - if(fscheck() != 0) - error("check failed"); -} - -static void fserr(int, char *argv[]) { if(*argv[0] == 'r'){ - swreaderr = atoi(argv[1]); - print("sw read err count = %d\n", swreaderr); + fs->swreaderr = atoi(argv[1]); + print("sw read err count = %d\n", fs->swreaderr); }else{ - swwriteerr = atoi(argv[1]); - print("sw write err count = %d\n", swwriteerr); + fs->swwriteerr = atoi(argv[1]); + print("sw write err count = %d\n", fs->swwriteerr); } } static void -fspol(int, char**) +fschk(int, char*[]) { - fspolicy(Post); + fscheck(); +} + +static void +fsgc(int, char*[]) +{ + sendul(fs->sweepc, 0); + recvul(fs->sweepec); } static void usage(void) { - fprint(2, "usage: %s [-DFLAGS] [-dv] [-f disk] cmd...\n", argv0); - exits("usage"); + fprint(2, "usage: %s [-DFLAGS] [-v] [-f disk] cmd...\n", argv0); + threadexits("usage"); } static Cmd cmds[] = @@ -390,17 +353,14 @@ {"dumpall", fsdmp, 1, "dumpall"}, {"ldump", fsdmp, 1, "ldump"}, {"ldumpall", fsdmp, 1, "ldumpall"}, + {"mark", fsgc, 1, "fsmark"}, {"sync", fssnap, 1, "sync"}, - {"snap", fssnap, 1, "snap"}, - {"rcl", fsrcl, 1, "rcl"}, {"dbg", fsdbg, 2, "dbg!n"}, - {"out", fsout, 1, "out"}, {"rm", fsrm, 2, "rm!what"}, {"stats", fsst, 1, "stats"}, - {"check", fschk, 1, "check"}, {"rerr", fserr, 2, "rerr!n"}, {"werr", fserr, 2, "werr!n"}, - {"pol", fspol, 1, "pol"}, + {"check", fschk, 1, "check"}, }; void @@ -408,14 +368,10 @@ { char *dev; char *args[Nels]; - int i, j, nargs, check; + int i, j, nargs; dev = "disk"; - check = 0; ARGBEGIN{ - case 'c': - check++; - break; case 'v': verb++; break; @@ -432,15 +388,17 @@ if(argc == 0) usage(); fatalaborts = 1; + fmtinstall('D', daddrfmt); fmtinstall('H', mbfmt); fmtinstall('M', dirmodefmt); fmtinstall('P', pathfmt); + threadsetname("cmd"); errinit(Errstack); if(catcherror()){ fprint(2, "cmd failed: %r\n"); threadexitsall("failed"); } - fsopen(dev, Normal, Wr); + fsopen(dev, Normal, 0); for(i = 0; i < argc; i++){ if(verb>1) fsdump(0, Mem); @@ -460,10 +418,6 @@ for(j = 0; j < nelem(cmds); j++) fprint(2, "\t%s\n", cmds[j].usage); break; - } - if(check){ - print("%% check\n"); - fscheck(); } } if(verb>1) --- /sys/src/cmd/creepy/conf.h Wed Apr 11 20:05:51 2012 +++ /sys/src/cmd/creepy/conf.h Thu May 10 12:55:12 2012 @@ -7,7 +7,6 @@ GiB = MiB * 1024UL, #ifdef TESTING - Incr = 1, Fsysmem = 800*KiB, /* size for in-memory block array */ /* disk parameters; don't change */ @@ -15,8 +14,7 @@ Ndptr = 2, /* # of direct data pointers */ Niptr = 2, /* # of indirect data pointers */ #else - Incr = 16, - Fsysmem = 1*GiB+GiB/2UL, /* size for in-memory block array */ + Fsysmem = 1*GiB, /* size for fs in-memory block array */ /* disk parameters; don't change */ Dblksz = 8*KiB, /* disk block size */ @@ -24,13 +22,13 @@ Niptr = 4, /* # of indirect data pointers */ #endif - + Incr = 16, + Wormmem = 256UL*MiB, /* size for worm in-memory block array */ Syncival = 5*60, /* desired sync intervals (s) */ - Mmaxdirtypcent = 50, /* Max % of blocks dirty in mem */ Mminfree = 200, /* # blocks when low on mem blocks */ Dminfree = 2000, /* # blocks when low on disk blocks */ Dminattrsz = Dblksz/2, /* min size for attributes */ - + Mmaxdirty = 1000, /* max # of dirty blocks */ Nahead = 10 * Dblksz, /* # of bytes to read ahead */ /* @@ -51,11 +49,14 @@ Mmaxfree = 10*Mminfree, /* high on mem blocks */ Dmaxfree = 2*Dminfree, /* high on disk blocks */ - Mzerofree = 10, /* out of memory blocks */ - Dzerofree = 10, /* out of disk blocks */ + + Mzerofree = Mminfree/2, /* we think we are out of mem blocks */ + Dzerofree = Dminfree/10, /* we think we are out of disk blocks */ Unamesz = 20, - Statsbufsz = 1024, + Statsbufsz = 4*KiB, + + Nscanfree = 16, /* # of times to retry in newdaddr */ }; --- /sys/src/cmd/creepy/dbg.c Tue Mar 6 15:51:08 2012 +++ /sys/src/cmd/creepy/dbg.c Fri May 11 13:12:25 2012 @@ -1,7 +1,6 @@ -#include -#include -#include +#include "all.h" +int fatalaborts; char dbg[256]; static char sdbg[256]; static Ref nodbg; @@ -41,3 +40,652 @@ memset(dbg, 0, sizeof dbg); } +void +fatal(char *fmt, ...) +{ + va_list arg; + char *s; + + va_start(arg, fmt); + s = vsmprint(fmt, arg); + va_end(arg); + if(fs != nil && fs->dev != nil) + fprint(2, "%s: %s: fatal: %s\n", argv0, fs->dev, s); + else + fprint(2, "%s: fatal: %s\n", argv0, s); + free(s); + if(fatalaborts) + abort(); + threadexitsall("fatal"); +} + +void +warn(char *fmt, ...) +{ + va_list arg; + char *s; + + va_start(arg, fmt); + s = vsmprint(fmt, arg); + va_end(arg); + if(fs != nil && fs->dev != nil) + fprint(2, "%s: %s: %s: %s\n", argv0, fs->dev, threadgetname(), s); + else + fprint(2, "%s: %s: %s\n", argv0, threadgetname(), s); + free(s); +} + +void +warnerror(char *fmt, ...) +{ + va_list arg; + char err[128]; + + va_start(arg, fmt); + vseprint(err, err+sizeof err, fmt, arg); + va_end(arg); + if(fs != nil && fs->dev != nil) + fprint(2, "%s: %s: %s: %s\n", argv0, fs->dev, threadgetname(), err); + else + fprint(2, "%s: %s: %s\n", argv0, threadgetname(), err); + error(err); +} + +static int +validaddr(daddrt addr) +{ + if(addr == 0) + return 1; + if(addr % Dblksz != 0) + return 0; + return ISDADDR(addr) && DADDR(addr) >= Dblksz && DADDR(addr) < fs->limit; +} + +static int +validmaddr(daddrt addr) +{ + Memblk *p; + + if(addr == 0) + return 1; + if(!ISDADDR(addr)){ + p = (Memblk*)addr; + return p >= fs->blk && p < fs->blk + fs->mballoc.nalloc; + } + if(addr % Dblksz != 0) + return 0; + return DADDR(addr) >= Dblksz && DADDR(addr) < fs->limit; +} + +void +checkblk(Memblk *b) +{ + Dsuperdata *d1, *d2; + int i, type; + daddrt *de; + long doff, sz; + + type = b->type & ~DBdirflag; + switch(type){ + case DBfree: + case DBattr: + warnerror("%s block on disk", tname(b->type)); + break; + case DBtag: + break; + case DBsuper: + if(b->d.magic != MAGIC) + warnerror("super: magic"); + d1 = &b->d.Dsuperdata; + d2 = &b->d.dup; + if(b->state == MBin && memcmp(d1, d2, sizeof(Dsuperdata)) != 0){ + warn("partially written superblock, using old."); + if(b->d.dup.epoch < b->d.epoch) + b->d.Dsuperdata = b->d.dup; + } + if(b->d.eaddr > fs->limit || b->d.eaddr < Dblk0addr) + warnerror("super: eaddr out of range"); + if(b->state != MBmem && !validaddr(b->d.root)) + warnerror("super: root %D out of range", b->d.root); + if(b->state == MBmem && !validmaddr(b->d.root)) + warnerror("super: root %D out of range", b->d.root); + if(b->d.dblksz != Dblksz) + warnerror("bad Dblksz"); + if(b->d.dminattrsz != Dminattrsz) + warnerror("bad Dminattrsz"); + if(b->d.ndptr != Ndptr) + warnerror("bad ndptr"); + if(b->d.niptr != Niptr) + warnerror("bad niptr"); + if(b->d.embedsz != Embedsz) + warnerror("bad Embedsz"); + if(b->d.dptrperblk != Dptrperblk) + warnerror("bad Dptrperblk"); + if(b->d.dtagperblk != Dtagperblk) + warnerror("bad Dtagperblk"); + break; + case DBdata: + if(b->type&DBdirflag){ + de = b->d.ptr; + for(i = 0; i < Dptrperblk; i++){ + if(b->state != MBmem && !validaddr(de[i])) + warnerror("dir: %D out of range", de[i]); + if(b->state == MBmem && !validmaddr(de[i])) + warnerror("dir: %D out of range", de[i]); + } + } + break; + case DBfile: + if(catcherror()){ + for(i = 0; i < nelem(b->d.dptr); i++) + b->d.dptr[i] = 0; + for(i = 0; i < nelem(b->d.iptr); i++) + b->d.iptr[i] = 0; + error(nil); + } + for(i = 0; i < nelem(b->d.dptr); i++){ + if(b->state != MBmem && !validaddr(b->d.dptr[i])) + warnerror("file: dptr %D out of range", b->d.dptr[i]); + if(b->state == MBmem && !validmaddr(b->d.dptr[i])) + warnerror("file: dptr %D out of range", b->d.dptr[i]); + } + + for(i = 0; i < nelem(b->d.iptr); i++){ + if(b->state != MBmem && !validaddr(b->d.iptr[i])) + warnerror("file: iptr %D out of range", b->d.iptr[i]); + if(b->state == MBmem && !validmaddr(b->d.iptr[i])) + warnerror("file: iptr %D out of range", b->d.iptr[i]); + } + /* dbget(DBfile) may load a directory, we don't know by now */ + if((b->type == DBdir) || (b->d.mode&DMDIR) != 0){ + doff = embedattrsz(b); + if(doff > Embedsz) + warnerror("file: wrong attr size %ld", doff); + sz = Embedsz-doff; + de = (daddrt*)(b->d.embed+doff); + for(i = 0; i < sz/Daddrsz; i++){ + if(b->state != MBmem && !validaddr(de[i])) + warnerror("file: %D out of range", de[i]); + if(b->state == MBmem && !validmaddr(de[i])) + warnerror("file: %D out of range", de[i]); + } + } + noerror(); + break; + default: + if(type < DBptr0 || type >= DBptr0 + Niptr) + warnerror("unknown block type %d", b->type); + if(catcherror()){ + for(i = 0; i < Dptrperblk; i++) + b->d.ptr[i] = 0; + error(nil); + } + for(i = 0; i < Dptrperblk; i++){ + if(b->state != MBmem && !validaddr(b->d.ptr[i])) + warnerror("ptr: %D out of range", b->d.ptr[i]); + if(b->state == MBmem && !validmaddr(b->d.ptr[i])) + warnerror("ptr: %D out of range", b->d.ptr[i]); + } + noerror(); + } +} + +char* +mname(int m) +{ + static char *nms[] = { + [Normal] "normal", + [Rdonly] "rdonly", + [Worm] "worm", + }; + + if(m < 0 || m >= nelem(nms)) + return "BADMODE"; + return nms[m]; + +} + +char* +tname(int t) +{ + static char*nms[] = { + [DBfree] "DBfree", + [DBattr] "UNUSED", + [DBsuper] "DBsuper", + [DBtag] "DBtag", + [DBfile] "DBfile", + [DBdata] "DBdata", + [DBptr0] "DBptr0", + [DBptr0+1] "DBptr1", + [DBptr0+2] "DBptr2", + [DBptr0+3] "DBptr3", + [DBptr0+4] "DBptr4", + [DBptr0+5] "DBptr5", + [DBptr0+6] "DBptr6", + [DBdir] "DBdir", + [DBdirdata] "DBdirdata", + [DBdirptr0] "DBdirptr0", + [DBdirptr0+1] "DBdirptr1", + [DBdirptr0+2] "DBdirptr2", + [DBdirptr0+3] "DBdirptr3", + [DBdirptr0+4] "DBdirptr4", + [DBdirptr0+5] "DBdirptr5", + [DBdirptr0+6] "DBdirptr6", + }; + + if(t < 0 || t >= nelem(nms)) + return "BADTYPE"; + return nms[t]; +} + +char* +sname(int s) +{ + static char*nms[] = { + [MBfree] "MBfree", + [MBmem] "MBmem", + [MBout] "MBout", + [MBclean] "MBclean", + [MBin] "MBin", + [MBlru] "MBlru", + [MBerr] "MBerr", + }; + + if(s < 0 || s >= nelem(nms)) + return "BADSTATE"; + return nms[s]; +} + +int fullfiledumps = 0; + +/* + * These are only for debug. + */ +int mbtab; + +static void +fmttab(Fmt *fmt, int t, int c) +{ + while(t-- > 0) + fmtprint(fmt, "%c ", c?'.':' '); +} + +static void +fmtptr(Fmt *fmt, daddrt addr, char *tag, int n) +{ + Memblk *b; + + if(addr == 0) + return; + b = mbget(addr); + if(b == nil){ + fmttab(fmt, mbtab, 0); + fmtprint(fmt, "%s[%d] = %D \n", tag, n, addr); + }else{ + fmtprint(fmt, "%H", b); + mbput(b); + } +} + +static void +dumpdirdata(Fmt *fmt, Memblk *b) +{ + long doff; + daddrt *p; + int i; + + if(b->d.length == 0) + return; + doff = embedattrsz(b); + if(doff < Embedsz){ + fmttab(fmt, mbtab, 0); + p = (daddrt*)(b->d.embed+doff); + for(i = 0; i < 5 && (uchar*)p < b->d.embed+Embedsz - Daddrsz; i++) + fmtprint(fmt, "%s%D", i?" ":"data: ", *p++); + fmtprint(fmt, "\n"); + } +} + +/* used in debug prints to print just part of huge values */ +#define EP(e) ((e)&0xFFFFFFFFUL) + +int +daddrfmt(Fmt *fmt) +{ + daddrt addr; + + addr = va_arg(fmt->args, daddrt); + if(ISDADDR(addr)) + return fmtprint(fmt, "d%#010ullx", EP(DADDR(addr))); + else + return fmtprint(fmt, "m%#010ullx", EP(addr)); +} + +int +namefmt(Fmt *fmt) +{ + char buf[128], *p; + + getwd(buf, sizeof buf); + p = va_arg(fmt->args, char*); + if(p == nil) + return fmtprint(fmt, "%s", buf); + if(p[0] == '/') + return fmtprint(fmt, "%s", p); + return fmtprint(fmt, "%s/%s", buf, p); +} + +int +mbfmt(Fmt *fmt) +{ + Memblk *b; + int i, n, type; + u64int e; + + b = va_arg(fmt->args, Memblk*); + if(b == nil) + return fmtprint(fmt, "\n"); + nodebug(); + type = b->type&~DBdirflag; + fmttab(fmt, mbtab, type == DBfile); + + fmtprint(fmt, "%s", tname(b->type)); + fmtprint(fmt, " %s", sname(b->state)); + if(type == DBfile && b->mf != nil) + fmtprint(fmt, " '%s'", b->mf->name); + fmtprint(fmt, " %D", b->addr); + e = 0; + if(fs->mode != Worm && type != DBtag && type != DBsuper && ISDADDR(b->addr)) + e = dbgettag(b->addr); + fmtprint(fmt, " r=%d e=%ulld", b->ref, e); + + switch(type){ + case DBfree: + case DBattr: + case DBdata: + fmtprint(fmt, "\n"); + break; + case DBtag: + for(i = n = 0; i < Dtagperblk; i++){ + if(n++%3 == 0){ + fmtprint(fmt, "\n"); + fmttab(fmt, mbtab, 0); + } + fmtprint(fmt, " "); + fmtprint(fmt, "[%02d]%D=%ud", + i, b->addr+i*Dblksz, + b->d.tag[i]); + } + fmtprint(fmt, "\n"); + break; + case DBfile: + if(b->mf == nil){ + fmtprint(fmt, " no mfile\n"); + break; + } + fmtprint(fmt, " nr%d nw%d prev %D\n", + b->mf->readers, b->mf->writer, b->d.prev); + fmttab(fmt, mbtab, 0); + fmtprint(fmt, " %M %s %s %s len %ulld ndents %ulld\n", + (ulong)b->d.mode, + usrname(b->d.uid), usrname(b->d.gid), usrname(b->d.muid), + b->d.length, b->d.ndents); + mbtab++; + if(b->d.mode&DMDIR) + dumpdirdata(fmt, b); + for(i = 0; i < nelem(b->d.dptr); i++) + fmtptr(fmt, b->d.dptr[i], "d", i); + for(i = 0; i < nelem(b->d.iptr); i++) + fmtptr(fmt, b->d.iptr[i], "i", i); + mbtab--; + break; + case DBsuper: + fmtprint(fmt, "\n"); + fmttab(fmt, mbtab, 0); + fmtprint(fmt," epoch %ulld fepoch %ulld", b->d.epoch, b->d.fepoch); + fmtprint(fmt," eaddr %D root %D", b->d.eaddr, b->d.root); + fmtprint(fmt," %s\n", mname(b->d.fsmode)); + break; + default: + if(type < DBptr0 || type >= DBptr0+Niptr) + fmtprint(fmt, " \n", b->type); + else if(fullfiledumps){ + fmtprint(fmt, "\n"); + mbtab++; + for(i = 0; i < Dptrperblk; i++) + fmtptr(fmt, b->d.ptr[i], "p", i); + mbtab--; + }else + fmtprint(fmt, "\n"); + } + debug(); + return 0; +} + +/* + * nind is | number of indirections. + */ +static void +ptrfetch(daddrt addr, int nind, int isdisk) +{ + int i; + Memblk *b; + + if(addr == 0) + return; + if(isdisk) + b = dbget(DBdata+nind, addr); + else{ + b = mbget(addr); + if(b == nil) + return; + } + if(catcherror()){ + mbput(b); + return; + } + if(nind > 0) + for(i = 0; i < Dptrperblk; i++) + ptrfetch(b->d.ptr[i], nind-1, isdisk); + noerror(); + mbput(b); +} + +static void +dumpdents(Memblk *d, int isdisk) +{ + Blksl sl; + vlong off; + daddrt *de; + Memblk *f; + int i; + + mbtab++; + if(catcherror()){ + mbtab--; + return; + } + + off = 0; + for(;;){ + sl = dfslice(d, Dblksz, off, Rd); + if(sl.len == 0) + break; + if(sl.b == nil) + continue; + if(catcherror()){ + mbput(sl.b); + error(nil); + } + de = sl.data; + for(i = 0; i < sl.len/Daddrsz; i++) + if(de[i] != 0){ + if(catcherror()){ + warn("%D: %r", de[i]); + continue; + } + if(isdisk) + f = dbget(DBfile, de[i]); + else + f = mbget(de[i]); + dfdump(f, isdisk); + noerror(); + mbput(f); + } + off += sl.len; + noerror(); + mbput(sl.b); + } + noerror(); + mbtab--; +} + +void +dfdump(Memblk *f, int isdisk) +{ + int i; + + if(f == nil) + return; + + /* visit the blocks to fetch them if needed, so %H prints them. */ + for(i = 0; i < nelem(f->d.dptr); i++) + ptrfetch(f->d.dptr[i], f->type&DBdirflag, isdisk); + for(i = 0; i < nelem(f->d.iptr); i++) + ptrfetch(f->d.iptr[i], (f->type&DBdirflag)+i+1, isdisk); + fprint(2, "%H\n", f); + if(f->type == DBdir) + dumpdents(f, isdisk); +} + +void +fsdump(int full, int disktoo) +{ + int i, n, x; + Memblk *b; + extern int fullfiledumps; + daddrt addr, eaddr; + + x = fullfiledumps; + fullfiledumps = full; + nodebug(); + fprint(2, "\n\nfsys '%s' limit %#ullx super %D root %D:\n", + fs->dev, fs->limit, fs->super->addr, fs->root->addr); + fprint(2, "%H\n", fs->super); + dfdump(fs->root, disktoo); + if(1){ + n = 0; + fprint(2, "hash:\t"); + for(i = 0; i < nelem(fs->fhash); i++) + for(b = fs->fhash[i].b; b != nil; b = b->next){ + if(++n % 5 == 0) + fprint(2, "\n\t"); + fprint(2, "%D ", b->addr); + } + fprint(2, "\n"); + } + if(0){ + mlistdump("dtags", &fs->dtags); + mlistdump("clean", &fs->clean); + mlistdump("out", &fs->out); + } + if(0){ + eaddr = fs->super->d.eaddr; + fprint(2, "tags:\n"); + for(addr = Dblk0addr; addr < eaddr; addr += Dblksz*Dtagperblk){ + b = dbget(DBtag, MKDADDR(addr)); + fprint(2, " %H", b); + mbput(b); + } + } + fprint(2, "%s\n", updatestats(0, 1)); + fullfiledumps = x; + debug(); +} + +long +fscheck(void) +{ + Memblk *b; + long err, i, n; + + xwlock(&fs->quiescence); + dprint("mem check...\n"); + xwlock(&fs->dquiescence); + if(catcherror()){ + xwunlock(&fs->dquiescence); + xwunlock(&fs->quiescence); + error(nil); + } + err = 0; + if(fs->ndfree > fs->ndblk){ + err++; + warn("ndfree out of range: %ulld > %ulld", fs->ndfree, fs->ndblk); + } + for(b = fs->clean.hd; b != nil; b = b->lnext) + if(b->state != MBclean){ + err++; + warn("clean list: %H", b); + } + for(b = fs->out.hd; b != nil; b = b->lnext) + if(b->state != MBout){ + err++; + warn("out list: %H", b); + } + n = 0; + for(i = 0; i < nelem(fs->fhash); i++) + for(b = fs->fhash[i].b; b != nil; b = b->next) + n++; + if(n != fs->mballoc.nalloc - fs->mballoc.nfree){ + err++; + warn("%ld hashed != %uld blocks - %uld free", + n, fs->mballoc.nalloc, fs->mballoc.nfree); + } + for(b = fs->blk; b < fs->blk + fs->mballoc.nalloc; b++){ + switch(b->state){ + case MBfree: + case MBerr: + continue; + case MBclean: + if(b->type != DBsuper) + if(b->lnext == nil && b->lprev == nil && b != fs->clean.hd){ + err++; + warn("clean miss: %H", b); + } + break; + case MBout: + if(b->type != DBsuper) + if(b->lnext == nil && b->lprev == nil && b != fs->out.hd){ + err++; + warn("out miss: %H", b); + } + break; + case MBmem: + if(b->type != DBsuper && b->type != DBtag) + if(b->addr != (uintptr)b){ + err++; + warn("bad addr: %H", b); + } + break; + case MBin: + case MBlru: + err++; + warn("io miss: %H", b); + continue; + } + if(catcherror()){ + err++; + continue; + } + checkblk(b); + noerror(); + } + noerror(); + xwunlock(&fs->dquiescence); + xwunlock(&fs->quiescence); + if(err != 0) + warn("check: %ld errors", err); + else + dprint("mem check: passes\n"); + return err; +} --- /sys/src/cmd/creepy/dbg.h Thu Mar 22 10:15:26 2012 +++ /sys/src/cmd/creepy/dbg.h Thu May 10 19:56:44 2012 @@ -1,30 +1,34 @@ /* * '9': 9p - * 'N': mblk/dblk alloc/free chdentry, drefs + * 'A': address * 'D': disk * 'E': fids * 'F': slices, indirects, dirnth - * 'K': reclaim * 'M': mblk/dblk gets puts + * 'N': mblk/dblk alloc/free chdentry + * 'O': lru blocks out * 'P': procs * 'R': block read + * 'S': block state changes * 'W': block write * 'X': ix - * 'd': general debug - * 'O': lru blocks out * 'Z': policy + * 'V': archive + * 'd': general debug */ #define d9print(...) if(!dbg['9']){}else fprint(2, __VA_ARGS__) -#define dNprint(...) if(!dbg['N']){}else fprint(2, __VA_ARGS__) +#define dAprint(...) if(!dbg['A']){}else fprint(2, __VA_ARGS__) #define dEprint(...) if(!dbg['E']){}else fprint(2, __VA_ARGS__) #define dFprint(...) if(!dbg['F']){}else fprint(2, __VA_ARGS__) -#define dKprint(...) if(!dbg['K']){}else fprint(2, __VA_ARGS__) #define dMprint(...) if(!dbg['M']){}else fprint(2, __VA_ARGS__) +#define dNprint(...) if(!dbg['N']){}else fprint(2, __VA_ARGS__) +#define dOprint(...) if(!dbg['O']){}else fprint(2, __VA_ARGS__) #define dPprint(...) if(!dbg['P']){}else fprint(2, __VA_ARGS__) #define dRprint(...) if(!dbg['R']){}else fprint(2, __VA_ARGS__) +#define dSprint(...) if(!dbg['S']){}else fprint(2, __VA_ARGS__) #define dWprint(...) if(!dbg['W']){}else fprint(2, __VA_ARGS__) #define dXprint(...) if(!dbg['X']){}else fprint(2, __VA_ARGS__) -#define dOprint(...) if(!dbg['O']){}else fprint(2, __VA_ARGS__) +#define dVprint(...) if(!dbg['V']){}else fprint(2, __VA_ARGS__) #define dZprint(...) if(!dbg['Z']){}else fprint(2, __VA_ARGS__) #define dprint(...) if(!dbg['d']){}else fprint(2, __VA_ARGS__) extern char dbg[256]; --- /sys/src/cmd/creepy/dblk.c Thu Apr 26 19:07:03 2012 +++ /sys/src/cmd/creepy/dblk.c Wed May 9 12:30:18 2012 @@ -1,726 +1,347 @@ #include "all.h" /* - * disk blocks, built upon memory blocks provided by mblk.c - * see dk.h + * disk blocks. */ -int swreaderr, swwriteerr; - -static void -okaddr(daddrt addr) -{ - if((addr&Fakeaddr) == 0 && (addr < Dblksz || addr >= fs->limit)) - warnerror("bad address d%#010ullx", addr); -} - -static void -okdiskaddr(daddrt addr) -{ - if((addr&Fakeaddr) != 0 || addr < Dblksz || addr >= fs->limit) - fatal("okdiskaddr %#ullx", addr); -} - -void -dbcopy(daddrt dst, daddrt src) -{ - static Diskblk d; - static QLock lk; - - xqlock(&lk); - dWprint("dbcopy d%#010ullx -> d%#010ullx\n", src, dst); - if(pread(fs->fd, &d, sizeof d, src) != Dblksz){ - xqunlock(&lk); - warnerror("dbcopy: read: d%#ullx: %r", src); - } - if(pwrite(fs->fd, &d, sizeof d, dst) != Dblksz){ - xqunlock(&lk); - warnerror("dbcopy: write: d%#ullx: %r", dst); - } - xqunlock(&lk); -} - -void -dbclear(u64int tag, daddrt addr) -{ - static Diskblk d; - static QLock lk; - - dWprint("dbclear d%#010ullx\n", addr); - xqlock(&lk); - d.tag = tag; - if(pwrite(fs->fd, &d, sizeof d, addr) != Dblksz){ - xqunlock(&lk); - warnerror("dbclear: d%#ullx: %r", addr); - } - xqunlock(&lk); -} - void -meltedref(Memblk *rb) +okdiskaddr(daddrt addr) { - if(canqlock(&fs->refs)) - fatal("meltedref rlk"); - if(rb->frozen){ - dWprint("melted ref dirty=%d\n", rb->dirty); - dbwrite(rb); - rb->frozen = 0; - } + if(addr % Dblksz != 0) + fatal("okdiskaddr %D", addr); + if(ISDADDR(addr) == 0 || DADDR(addr) < Dblksz || DADDR(addr) >= fs->limit) + fatal("okdiskaddr %D", addr); } static daddrt -newblkaddr(void) -{ - daddrt addr, naddr; - - xqlock(fs); - if(catcherror()){ - xqunlock(fs); - error(nil); - } -Again: - if(fs->super == nil) - addr = Dblksz; - else if(fs->super->d.free != 0){ - addr = fs->super->d.free; - okdiskaddr(addr); - /* - * Caution: can't acquire new locks while holding the fs lock, - * but dbgetref may allocate blocks. - */ - xqunlock(fs); - if(catcherror()){ - xqlock(fs); /* restore the default in this fn. */ - error(nil); - } - naddr = dbgetref(addr); /* acquires locks */ - if(naddr != 0) - okdiskaddr(naddr); - noerror(); - xqlock(fs); - if(addr != fs->super->d.free){ - /* had a race */ - goto Again; - } - fs->super->d.free = naddr; - fs->super->d.ndfree--; - changed(fs->super); - }else if(fs->super->d.eaddr < fs->limit){ - addr = fs->super->d.eaddr; - fs->super->d.eaddr += Dblksz; - changed(fs->super); - /* - * ref blocks are allocated and initialized on demand, - * and they must be zeroed before used. - * do this holding the lock so others find everything - * initialized. - */ - if(((addr-Dblk0addr)/Dblksz)%Nblkgrpsz == 0){ - dprint("new ref blk addr = d%#ullx\n", addr); - /* on-demand fs initialization */ - dbclear(TAG(DBref, 0, addr), addr); - dbclear(TAG(DBref, 0, addr+Dblksz), addr+Dblksz); - dbclear(TAG(DBref, 0, addr+2*Dblksz), addr+2*Dblksz); - addr += 3*Dblksz; - fs->super->d.eaddr += 3*Dblksz; - if(fs->super->d.eaddr > fs->limit) - sysfatal("disk is full"); - } - }else{ - addr = 0; - /* preserve backward compatibility with fossil */ - sysfatal("disk is full"); - } - - noerror(); - xqunlock(fs); - okaddr(addr); - dNprint("newblkaddr = d%#ullx\n", addr); - return addr; -} - -daddrt -addrofref(daddrt refaddr, int idx) +tagaddr(daddrt addr, int *idx) { - return refaddr + idx*Dblksz; -} - -daddrt -refaddr(daddrt addr, int *idx) -{ - daddrt bno, refaddr; + daddrt bno; + addr = DADDR(addr); addr -= Dblk0addr; bno = addr/Dblksz; - *idx = bno%Nblkgrpsz; - refaddr = Dblk0addr + bno/Nblkgrpsz * Nblkgrpsz * Dblksz; - return refaddr; + *idx = bno%Dtagperblk; + return MKDADDR(Dblk0addr + bno/Dtagperblk * Dtagperblk * Dblksz); } /* - * db*ref() functions update the on-disk reference counters. - * memory blocks use Memblk.Ref instead. Beware. + * BUG: disktohost must convert from little endian if we are + * on a big endian system. + * Also, dbwrite should convert to little endian by calling hosttodisk(). */ -static daddrt -dbaddref(daddrt addr, int delta, int set, Memblk **rbp, int *ip) -{ - Memblk *rb; - daddrt raddr, ref; - int i; - - if(addr == 0) - return 0; - if(addr&Fakeaddr) /* root and ctl files don't count */ - return 0; - - raddr = refaddr(addr, &i); - rb = dbget(DBref, raddr); - - xqlock(&fs->refs); - if(catcherror()){ - mbput(rb); - xqunlock(&fs->refs); - debug(); - error(nil); - } - if(delta != 0 || set != 0){ - if(delta != 0){ - if(rb->d.ref[i] >= Dblksz) - fatal("dbaddref: d%#010ullx in free list", rb->d.ref[i]); - if(rb->d.ref[i] == 0) - fatal("dbaddref: d%#010ullx double free", rb->d.ref[i]); - } - meltedref(rb); - if(set) - rb->d.ref[i] = set; - else - rb->d.ref[i] += delta; - rb->dirty = 1; - } - ref = rb->d.ref[i]; - noerror(); - xqunlock(&fs->refs); - if(rbp == nil) - mbput(rb); - else - *rbp = rb; - if(ip != nil) - *ip = i; - return ref; -} - -daddrt -dbgetref(daddrt addr) -{ - if(fs->worm) - return 6ULL; - return dbaddref(addr, 0, 0, nil, nil); -} - -void -dbsetref(daddrt addr, int ref) -{ - daddrt n; - - if(fs->worm) - return; - n = dbaddref(addr, 0, ref, nil, nil); - dNprint("dbsetref %#010ullx -> %ulld\tpc %#p\n", addr, n, getcallerpc(&addr)); -} - -daddrt -dbincref(daddrt addr) +static void +disktohost(Memblk *) { - daddrt n; + static union + { + u64int i; + uchar m[BIT64SZ]; + } u; - if(fs->worm) - return 6ULL; - n = dbaddref(addr, +1, 0, nil, nil); - dNprint("dbincref %#010ullx -> %ulld\tpc %#p\n", addr, n, getcallerpc(&addr)); - return n; + u.i = 0x1122334455667788ULL; + if(u.m[0] != 0x88) + fatal("implement hosttodisk/disktohost for big endian"); } -static daddrt -dbdecref(daddrt addr, Memblk **rb, int *idx) +u64int +dbgettag(daddrt addr) { - daddrt n; + int idx; + Memblk *tb; + u64int e; - if(fs->worm) - return 6ULL; - n = dbaddref(addr, -1, 0, rb, idx); - dNprint("dbdecref %#010ullx -> %ulld\tpc %#p\n", addr, n, getcallerpc(&addr)); - return n; + tb = dbgetlocked(DBtag, tagaddr(addr, &idx)); + e = tb->d.tag[idx]; + xqunlock(&tb->slk); + mbput(tb); + return e; } -static void -nodoublefree(daddrt addr) +u64int +dbsettag(daddrt addr, u64int e) { - daddrt a; + int idx; + Memblk *tb; + u64int old; - if(addr == 0) - return; - for(a = fs->super->d.free; a != 0; a = dbgetref(a)) - if(a == addr) - fatal("double free for addr d%#ullx", addr); + tb = dbget(DBtag, tagaddr(addr, &idx)); + mbrenewlocked(tb); + old = tb->d.tag[idx]; + tb->d.tag[idx] = e; + xqunlock(&tb->slk); + mbput(tb); + return old; } -static long xdbput(Memblk *b, int type, daddrt addr, int isdir); - static long -dropdentries(void *p, int n) +dbread(Memblk *b) { - int i; - daddrt *d; - long tot; + static int nr; + long tot, n; + uchar *p; + daddrt addr; - tot = 0; - d = p; - for(i = 0; i < n; i++) - if(d[i] != 0) - tot += xdbput(nil, DBfile, d[i], 0); - return tot; -} + okdiskaddr(b->addr); + if(b->state != MBin) + fatal("dbread: %s", sname(b->state)); -/* - * Drop a on-disk reference. - * When no references are left, the block is unlinked from the hash - * (and its hash ref released), and disk references to blocks pointed to by - * this blocks are also decremented (and perhaps such blocks released). - * - * More complex than needed, because we don't want to read a data block - * just to release a reference to it, unless it's a data block for a directory. - * - * b may be nil if type and addr are given, for recursive calls. - */ + p = b->d.data; + addr = DADDR(b->addr); -static long -xdbput(Memblk *b, int type, daddrt addr, int isdir) -{ - daddrt ref; - Memblk *mb, *rb; - int i, idx, sz; - uvlong doff; - long tot; - - if(b == nil && addr == 0) - return 0; - if(fs->worm) - return 1; - okdiskaddr(addr); - ref = dbgetref(addr); - if(ref > 2*Dblksz) - fatal("dbput: d%#010ullx: double free", addr); - - ref = dbdecref(addr, &rb, &idx); - if(ref != 0){ - dKprint("dbput: d%#010ullx dr %#ullx type %s\n", - addr, ref, tname(type)); - mbput(rb); - return 0; - } - /* - * Gone from disk, be sure it's also gone from memory. - */ - if(catcherror()){ - mbput(rb); - error(nil); - } - mb = b; - if(mb == nil){ - if(isdir || type != DBdata) - mb = dbget(type, addr); - else - mb = mbget(type, addr, Dontmk); - } - - dKprint("dbput: free: %H\n", mb); - tot = 1; - if(mb != nil){ - isdir |= DBDIR(mb); - assert(type == mb->type && addr == mb->addr && mb->ref > 1); - mbunhash(mb, 0); - } - if(catcherror()){ - if(mb != b) - mbput(mb); - error(nil); - } - switch(type){ - case DBsuper: - case DBref: - fatal("dbput: super or ref"); - case DBdata: - if(isdir) - tot += dropdentries(mb->d.data, Dblkdatasz/Daddrsz); - break; - case DBattr: - break; - case DBfile: - if(isdir) - assert(mb->d.mode&DMDIR); - else - assert((mb->d.mode&DMDIR) == 0); - tot += xdbput(nil, DBattr, mb->d.aptr, 0); - for(i = 0; i < nelem(mb->d.dptr); i++){ - tot += xdbput(nil, DBdata, mb->d.dptr[i], isdir); - mb->d.dptr[i] = 0; - } - for(i = 0; i < nelem(mb->d.iptr); i++){ - tot += xdbput(nil, DBptr0+i, mb->d.iptr[i], isdir); - mb->d.iptr[i] = 0; - } - if(isdir){ - doff = embedattrsz(mb); - sz = Embedsz-doff; - tot += dropdentries(mb->d.embed+doff, sz/Daddrsz); - } - break; - default: - if(type < DBptr0 || type >= DBptr0+Niptr) - fatal("dbput: type %d", type); - for(i = 0; i < Dptrperblk; i++){ - tot += xdbput(nil, mb->type-1, mb->d.ptr[i], isdir); - mb->d.ptr[i] = 0; - } + for(tot = 0; tot < Dblksz; tot += n){ + if(fs->swreaderr != 0 && ++nr > fs->swreaderr) + warnerror("dbread: sw fault"); + n = pread(fs->fd, p+tot, Dblksz-tot, addr+tot); + if(n == 0) + werrstr("disk truncated"); + if(n <= 0) + warnerror("dbread: %D: %r", b->addr); } - noerror(); - - if(mb != b) - mbput(mb); - - if(dbg['d']) - assert(mbget(type, addr, Dontmk) == nil); - - if(dbg['K']) - nodoublefree(addr); - xqlock(fs); - xqlock(&fs->refs); - rb->d.ref[idx] = fs->super->d.free; - fs->super->d.free = addr; - fs->super->d.ndfree++; - xqunlock(&fs->refs); - xqunlock(fs); - noerror(); - mbput(rb); - + assert(tot == sizeof b->d && tot == Dblksz); + disktohost(b); + checkblk(b); + dRprint("dbread %D %H\n", b->addr, b); return tot; } long -dbput(Memblk *b) -{ - if(b == nil) - return 0; - return xdbput(b, b->type, b->addr, DBDIR(b)); -} - -static daddrt -newfakeaddr(void) +dbwrite(Memblk *b) { - static daddrt addr = ~0; - daddrt n; + static int nw; + daddrt addr; - xqlock(fs); - addr -= Dblksz; - n = addr; - xqunlock(fs); - return n|Fakeaddr; + dWprint("dbwrite %D %H\n", b->addr, b); + okdiskaddr(b->addr); + if(b->state != MBout) + fatal("dbwrite: bad state %s", sname(b->state)); + if(dbg['d']){ + if(catcherror()) + fatal("dbwrite: %r\n%H", b); + checkblk(b); + noerror(); + } + if(fs->swwriteerr != 0 && ++nw > fs->swwriteerr) + warnerror("dbwrite: sw fault"); + addr = DADDR(b->addr); + if(pwrite(fs->fd, &b->d, sizeof b->d, addr) != Dblksz) + warnerror("dbwrite: d%#ullx: %r", b->addr); + return Dblksz; } -Memblk* -dballocz(uint type, int dbit, int zeroit) +/* + * Caller may specify DBfile when it might be DBdir, if only + * the address and not the type is known. + * We adjust the type once we know. + */ +static Memblk* +xdbget(int type, daddrt addr, int locked) { Memblk *b; - daddrt addr; - int ctl; + int i, n; - ctl = type == DBctl; - if(ctl){ - type = DBfile; - addr = newfakeaddr(); - }else - addr = newblkaddr(); - b = mballocz(addr, zeroit); - b->d.tag = TAG(type, dbit, b->addr); + dMprint("dbget %D\n", addr); + b = mbload(addr, locked); + if(b->state == MBerr){ + mbput(b); + error("corrupt block"); + } + if(b->state != MBin) + return b; + + if(ISDADDR(addr) == 0) + fatal("xdbget: load: %D", addr); + + /* complete the load for the block, + * others wait in b->ldlk. + */ + if(xcanqlock(&b->ldlk)) + fatal("mbloaded:ldlk bug"); b->type = type; if(catcherror()){ + if(b->type == DBsuper) + fatal("can't read super: %r"); + mbset(b, MBerr); + if(locked) + xqunlock(&b->slk); + xqunlock(&b->ldlk); mbput(b); - debug(); error(nil); } - if((addr&Fakeaddr) == 0 && addr >= Dblk0addr) - dbsetref(addr, 1); - if(type == DBfile) + + dbread(b); + + if(b->type == DBfile || b->type == DBdir){ + assert(b->mf == nil); b->mf = anew(&mfalloc); - b = mbhash(b); - changed(b); + gmeta(b, b->d.embed, Embedsz); + if(b->d.mode&DMDIR) + b->type = DBdir; /* DBfile -> DBdir once we know */ + } + if(locked == 0) + xqlock(&b->slk); + if(b->type == DBtag){ + n = 0; + for(i = 1; i < Dtagperblk; i++) + if(b->d.tag[i] == 0) + n++; + if(n > 0){ + xqlock(&fs->superlk); + fs->ndfree += n; + dZprint("dbget: %D: +%d free:\n", b->addr, n); + xqunlock(&fs->superlk); + } + b->d.tag[0] = n; + } + mbset(b, MBclean); + if(b->type != DBsuper) + mlink(&fs->clean, b); + if(locked == 0) + xqunlock(&b->slk); + xqunlock(&b->ldlk); noerror(); - dNprint("dballoc %s -> %H\n", tname(type), b); return b; } -/* - * BUG: these should ensure that all integers are converted between - * little endian (disk format) and the machine endianness. - * We know the format of all blocks and the type of all file - * attributes. Those are the integers to convert to fix the bug. - */ -static Memblk* -hosttodisk(Memblk *b) +Memblk* +dbget(int type, daddrt addr) { - if(catcherror()) - fatal("hosttodisk: bad tag"); - checktag(b->d.tag, b->type, b->addr); - noerror(); - incref(b); - return b; + return xdbget(type, addr, Unlocked); } -static void -disktohost(Memblk *b) -{ - static union - { - u64int i; - uchar m[BIT64SZ]; - } u; - - u.i = 0x1122334455667788ULL; - if(u.m[0] != 0x88) - fatal("fix hosttodisk/disktohost for big endian"); - checkblk(b); -} - -static int -isfakeref(daddrt addr) +Memblk* +dbgetlocked(int type, daddrt addr) { - addr -= Dblk0addr; - addr /= Dblksz; - return (addr%Nblkgrpsz) == 2; + return xdbget(type, addr, Locked); } -/* - * Write the block a b->addr. - * DBrefs are written at even (b->addr) or odd (b->addr+DBlksz) - * reference blocks as indicated by the frozen super block to be written. - * See comment in fsys.c:/^freezesuperrefs - */ -long -dbwrite(Memblk *b) +void +markdentries(void *p, int n, u64int e) { - Memblk *nb; - static int nw; - daddrt addr; - - if(b->addr&Fakeaddr) - fatal("dbwrite: fake addr %H", b); - if(b->dirty == 0) - return 0; - addr = b->addr; - /* - * super switches between even/odd DBref blocks, plus there's a - * fake DBref block used just for fscheck() counters. - */ - if(b->type == DBref){ - assert(fs->fzsuper != nil); - if(fs->fzsuper->d.oddrefs && !isfakeref(b->addr)) - addr += Dblksz; - } - dWprint("dbwriting at d%#010ullx %H\n",addr, b); - nb = hosttodisk(b); - if(swwriteerr != 0 && ++nw > swwriteerr){ - written(b); /* what can we do? */ - mbput(nb); - warnerror("dbwrite: sw fault"); - } - if(pwrite(fs->fd, &nb->d, sizeof nb->d, addr) != Dblksz){ - written(b); /* what can we do? */ - mbput(nb); - warnerror("dbwrite: d%#ullx: %r", b->addr); - } - written(b); - mbput(nb); + int i; + daddrt *d; - return Dblksz; + d = p; + for(i = 0; i < n; i++) + if(d[i] != 0) + dfmark(d[i], e); } /* - * Read a block from b->addr. - * DBrefs are written at even (b->addr) or odd (b->addr+DBlksz) - * reference blocks as indicated by the super block in used. - * See comment in fsys.c:/^freezesuperrefs + * TODO: This is a good place to do self repairs. + * see also fblk.c:/^dfmark */ -long -dbread(Memblk *b) -{ - static int nr; - long tot, n; - uchar *p; - daddrt addr; - - if(b->addr&Fakeaddr) - fatal("dbread: fake addr %H", b); - p = b->d.ddata; - addr = b->addr; - /* - * super switches between even/odd DBref blocks, plus there's a - * fake DBref block used just for fscheck() counters. - */ - if(b->type == DBref && fs->super->d.oddrefs && !isfakeref(b->addr)) - addr += Dblksz; - for(tot = 0; tot < Dblksz; tot += n){ - if(swreaderr != 0 && ++nr > swreaderr) - warnerror("dbread: sw fault"); - n = pread(fs->fd, p+tot, Dblksz-tot, addr + tot); - if(n == 0) - werrstr("eof on disk file"); - if(n <= 0) - warnerror("dbread: d%#ullx: %r", b->addr); - } - assert(tot == sizeof b->d && tot == Dblksz); - - dRprint("dbread from d%#010ullx tag %#ullx %H\n", addr, b->d.tag, b); - disktohost(b); - if(b->type != DBref) - b->frozen = 1; - - return tot; -} - -Memblk* -dbget(uint type, daddrt addr) +void +dbmark(int type, daddrt addr, u64int e) { Memblk *b; + int i; + u64int old; - dMprint("dbget %s d%#010ullx\n", tname(type), addr); - okaddr(addr); - b = mbget(type, addr, Mkit); - if(b == nil) - error("i/o error"); - if(b->loading == 0) - return b; - - /* the file is new, must read it */ + if(ISDADDR(addr)){ + if(catcherror()){ + warn("dbmark %D: %r", addr); + return; + } + old = dbsettag(addr, e); + noerror(); + if(old == e) /* already marked */ + return; + } + if(type == DBdata) + return; + b = mbgetlocked(addr); + if(b == nil){ + if(!ISDADDR(addr)) + fatal("dbmark: unloaded mem %D", addr); + b = dbgetlocked(type, addr); + } if(catcherror()){ - xqunlock(&b->newlk); /* awake those waiting for it */ - mbunhash(b, 0); /* put our ref and the hash ref */ - mbput(b); - error(nil); + warn("dbmark: %r"); + goto done; } - dbread(b); - checktag(b->d.tag, type, addr); - assert(b->type == type); - if(type == DBfile){ - assert(b->mf == nil); - b->mf = anew(&mfalloc); - gmeta(b, b->d.embed, Embedsz); - if(b->d.mode&DMDIR) - assert(DBDIR(b)); - else - assert(!DBDIR(b)); + if(type == DBdirdata){ + markdentries(b->d.data, Dptrperblk, e); + noerror(); + goto done; } - b->loading = 0; + + if((type&~DBdirflag) < DBptr0) + fatal("dbmark: type %s", tname(type)); + for(i = 0; i < Dptrperblk; i++) + if(b->d.ptr[i] != 0) + dbmark(type-1, b->d.ptr[i], e); noerror(); - xqunlock(&b->newlk); - return b; +done: + xqunlock(&b->slk); + mbput(b); } -static void -dupdentries(void *p, int n) +void +addressdentries(void *p, int n) { int i; daddrt *d; d = p; for(i = 0; i < n; i++) - if(d[i] != 0){ - dNprint("add ref on dup d%#ullx\n", d[i]); - dbincref(d[i]); - } + if(d[i] != 0) + d[i] = dfaddress(d[i]); } -/* - * caller responsible for locking. - * On errors we may leak disk blocks because of added references. - * Isdir flags that the block belongs to a dir, so we could add references - * to dir entries. - */ -Memblk* -dbdup(Memblk *b) +daddrt +address(Memblk *b) { - Memblk *nb; + daddrt addr; + + ainc(&fs->naddress); + xqlock(&b->slk); + mbunhash(b, 0); + addr = newdaddr(); + dAprint("address %D: %H", addr, b); + b->addr = addr; + assert(b->state == MBmem); + mbset(b, MBout); + mbhash(b); + mlink(&fs->out, b); + xqunlock(&b->slk); + return b->addr; +} + +daddrt +dbaddress(int type, daddrt addr) +{ + Memblk *b; int i; - ulong doff, sz; - nb = dballocz(b->type, DBDIR(b), 0); + if(ISDADDR(addr)) + return addr; + + b = mbget(addr); + if(b == nil) + fatal("dbaddress: mem %D not in hash", addr); + if((b->type&~DBdirflag) != (type&~DBdirflag)) + fatal("dbaddress: %D: type %s", addr, tname(b->type)); + if(b->state != MBmem) + fatal("dbaddress: %D: state %s", addr, sname(b->state)); if(catcherror()){ - mbput(nb); + mbput(b); error(nil); } - switch(b->type){ - case DBfree: - case DBref: - case DBsuper: - case DBattr: - fatal("dbdup: %s", tname(b->type)); - case DBdata: - memmove(nb->d.data, b->d.data, Dblkdatasz); - if(DBDIR(b) != 0) - dupdentries(b->d.data, Dblkdatasz/Daddrsz); - break; - case DBfile: - if(!b->frozen) - isrwlocked(b, Rd); - nb->d.asize = b->d.asize; - nb->d.aptr = b->d.aptr; - nb->d.ndents = b->d.ndents; - if(nb->d.aptr != 0) - dbincref(b->d.aptr); - for(i = 0; i < nelem(b->d.dptr); i++){ - nb->d.dptr[i] = b->d.dptr[i]; - if(nb->d.dptr[i] != 0) - dbincref(b->d.dptr[i]); - } - for(i = 0; i < nelem(b->d.iptr); i++){ - nb->d.iptr[i] = b->d.iptr[i]; - if(nb->d.iptr[i] != 0) - dbincref(b->d.iptr[i]); - } - nb->d.Dmeta = b->d.Dmeta; - memmove(nb->d.embed, b->d.embed, Embedsz); - gmeta(nb, nb->d.embed, Embedsz); - if(DBDIR(b) != 0){ - doff = embedattrsz(nb); - sz = Embedsz-doff; - dupdentries(nb->d.embed+doff, sz/Daddrsz); - } - /* - * no race: caller takes care. - */ - if(b->frozen && b->mf->melted == nil){ - incref(nb); - b->mf->melted = nb; - } - break; - default: - if(b->type < DBptr0 || b->type >= DBptr0 + Niptr) - fatal("dbdup: bad type %d", b->type); - for(i = 0; i < Dptrperblk; i++){ - nb->d.ptr[i] = b->d.ptr[i]; - if(nb->d.ptr[i] != 0) - dbincref(nb->d.ptr[i]); - } - } - changed(nb); - noerror(); - /* when b is a frozen block, it's likely we won't use it more, - * because we now have a melted one. - * pretend it's the lru one. - */ - if(b->frozen) - mbunused(b); + if(type == DBdirdata) + addressdentries(b->d.data, Dptrperblk); + else if(type != DBdata){ + if((type&~DBdirflag) < DBptr0) + fatal("dbaddress: %D: type %s", b->addr, tname(type)); + for(i = 0; i < Dptrperblk; i++) + if(b->d.ptr[i] != 0) + b->d.ptr[i] = dbaddress(type-1, b->d.ptr[i]); + } - return nb; + noerror(); + addr = address(b); + mbput(b); + return addr; } --- /sys/src/cmd/creepy/dk.h Thu Apr 26 19:07:03 2012 +++ /sys/src/cmd/creepy/dk.h Fri May 11 16:07:05 2012 @@ -1,129 +1,58 @@ +typedef struct Alloc Alloc; +typedef struct Blksl Blksl; +typedef struct Cmd Cmd; typedef struct Ddatablk Ddatablk; -typedef struct Dptrblk Dptrblk; -typedef struct Drefblk Drefblk; -typedef struct Dattrblk Dattrblk; typedef struct Dfileblk Dfileblk; +typedef union Diskblk Diskblk; +typedef struct Dmeta Dmeta; +typedef struct Dptrblk Dptrblk; typedef struct Dsuperblk Dsuperblk; typedef struct Dsuperdata Dsuperdata; -typedef union Diskblk Diskblk; -typedef struct Diskblkhdr Diskblkhdr; -typedef struct Memblk Memblk; +typedef struct Dtagblk Dtagblk; +typedef struct Fsstat Fsstat; typedef struct Fsys Fsys; -typedef struct Dmeta Dmeta; -typedef struct Blksl Blksl; -typedef struct Mfile Mfile; -typedef struct Cmd Cmd; -typedef struct Path Path; -typedef struct Alloc Alloc; -typedef struct Next Next; -typedef struct Lstat Lstat; -typedef struct List List; typedef struct Link Link; -typedef struct Usr Usr; +typedef struct List List; +typedef struct Lstat Lstat; typedef struct Member Member; +typedef struct Memblk Memblk; typedef struct Meminfo Meminfo; - -/* - * Conventions: - * - * References: - * - Ref is used for in-memory RCs. This has nothing to do with on-disk refs. - * - Mem refs include the reference from the hash. That one keeps the file - * loaded in memory while unused. - * - The hash ref also accounts for refs from the lru/ref/dirty lists. - * - Disk refs count only references within the tree on disk. - * - There are two copies of disk references, even, and odd. - * Only one of them is active. Every time the system is written, - * the inactive copy becomes active and vice-versa. Upon errors, - * the active copy on disk is always coherent because the super is - * written last. - * - Children do not add refs to parents; parents do not add ref to children. - * - 9p, fscmd, ix, and other top-level shells for the fs are expected to - * keep Paths for files in use, so that each file in the path - * is referenced once by the path - * - example, on debug fsdump()s: - * r=2 -> 1 (from hash) + 1 (while dumping the file info). - * (block is cached, in the hash, but unused otherwise). - * r=3 in /active: 1 (hash) + 1(fs->active) + 1(dump) - * r is greater: - * - some fid is referencing the block - * - it's a melt and the frozen f->mf->melted is a ref. - * - some rpc is using it (reading/writing/...) - * - * Assumptions: - * - /active is *never* found on disk, it's memory-only. - * - b->addr is worm. - * - parents of files loaded in memory are also in memory. - * (but this does not hold for pointer and data blocks). - * - We try not to hold more than one lock, using the - * reference counters when we need to be sure that - * an unlocked resource does not vanish. - * - reference blocks are never removed from memory. - * - disk refs are frozen while waiting to go to disk during a fs freeze. - * in which case db*ref functions write the block in place and melt it. - * - frozen blocks are quiescent. - * - mb*() functions do not raise errors. - * - * Locking: - * - the caller to functions in [mbf]blk.c acquires the locks before - * calling them, and makes sure the file is melted if needed. - * This prevents races and deadlocks. - * - blocks are locked by the file responsible for them, when not frozen. - * - next fields in blocks are locked by the list they are used for. - * - * Lock order: - * - fs, super,... : while locked can't acquire fs or blocks. - * - parent -> child - * (but a DBfile protects all ptr and data blocks under it). - * - block -> ref block - * - * All the code assumes outofmemoryexits = 1. - */ +typedef struct Mfile Mfile; +typedef struct Next Next; +typedef struct Opstat Opstat; +typedef struct Path Path; +typedef struct Usr Usr; +typedef struct Usrs Usrs; /* * these are used by several functions that have flags to indicate - * mem-only, also on disk; and read-access/write-access. (eg. dfmap). + * read-access/write-access, etc. */ -enum{ - Mem=0, - Disk, - +enum +{ Rd=0, Wr, + Unlocked = 0, + Locked, + Dontmk = 0, Mkit, - Tqlock = 0, - Trwlock, - Tlock, - - No = 0, - Yes, - Normal = 0, + Rdonly, Worm, - Pre = 0, - Post, - - Halting = 1, + Halting, Halted, + Mem = 0, + Disk, + /* mtime is ns in creepy, but s in 9p */ NSPERSEC = 1000000000ULL, }; - -struct Lstat -{ - int type; - uintptr pc; - int ntimes; - int ncant; - vlong wtime; -}; - enum { DMUSERS = 0x01000000ULL, @@ -140,47 +69,60 @@ * * blk 0: unused * blk 1: super - * even ref blk + odd ref blk + check ref blk + Nblkgrpsz-3 blocks + * tag blk + Dtagperblk-1 blocks * ... - * even ref blk + odd ref blk + check ref blk + Nblkgrpsz-3 blocks + * tag blk + Dtagperblk-1 blocks (last group may have less blocks) * * The code assumes these structures are packed. * Be careful if they are changed to make things easy for the * compiler and keep them naturally aligned. */ -/* - * header for all disk blocks. - */ -struct Diskblkhdr +enum { - u64int tag; /* block tag */ - u64int cnt; /* ref counter during checks */ + /* + * disk addresses always have the high two bits set as 01, + * to know if ptr[i] is a memory or disk address. + * Address 0 is always invalid. + */ + Diskaddrflag = 0x4000000000000000ULL, + Diskaddrmask = 0x3FFFFFFFFFFFFFFFULL, + Noaddr = 0ULL, }; +#define MKMADDR(p) ((daddrt)(p)) +#define MKDADDR(d) ((d)|Diskaddrflag) +#define DADDR(d) ((d)&Diskaddrmask) +#define ISDADDR(d) (((d)&~Diskaddrmask) == Diskaddrflag) + enum { - /* block types */ - DBfree = 0, - DBref, - DBattr, - DBfile, - DBsuper, - DBdata, /* 5: data block */ - DBptr0 = DBdata+1, /* simple-indirect block */ - /* double */ - /* triple */ - /*...*/ - DBctl = ~0, /* DBfile, never on disk. arg for dballoc */ + DBdirflag = 0x100, + /* block types */ + DBfree = 0, /* free block */ + DBattr, /* unused */ + DBsuper, /* super block */ + DBtag, /* tag block */ + DBfile, /* file */ + DBdata, /* data block */ + DBptr0 = DBdata+1, /* simple-indirect block */ + /* double */ + /* triple */ + /*...*/ + DBdir = DBfile+DBdirflag, /* dir */ + DBdirdata, /* dir data block */ + DBdirptr0 = DBdirdata+1, /* dir simple indirect */ + /* dir double indirect */ + /* ... */ Daddrsz = BIT64SZ, - Dblkhdrsz = sizeof(Diskblkhdr), - Nblkgrpsz = (Dblksz - Dblkhdrsz) / Daddrsz, - Dblk0addr = 2*Dblksz, - + Dtagsz = BIT32SZ, + Dsuperaddr = Dblksz, + Dblk0addr = Dsuperaddr+Dblksz, }; typedef u64int daddrt; /* disk addreses and sizes */ +typedef u32int dtagt; /* disk block tag (epoch) */ struct Ddatablk { @@ -189,23 +131,29 @@ struct Dptrblk { - daddrt ptr[1]; /* array of block addresses */ -}; - -struct Drefblk -{ - daddrt ref[1]; /* disk RC or next block in free list */ + daddrt ptr[1]; /* array of disk addresses | mem ptrs */ }; -struct Dattrblk +struct Dtagblk { - daddrt next; /* next block used for attribute data */ - uchar attr[1]; /* raw attribute data */ + dtagt tag[1]; /* array of epochs tagging blocks */ }; +/* + * File attributes are name/value pairs. + * By now, only mandatory attributes are implemented, which + * have names implied by their position in the Dmeta structure. + * other name=value could follow. + * + * prev refers to the address of the previous version for the file, + * BUT, beware that such block might have been reused if the disk is collected, + * which means that it's just a hint and the type/id of that block must be + * double checked. + */ struct Dmeta /* mandatory metadata */ { u64int id; /* ctime, actually */ + u64int prev; /* address of previous version, hint */ u64int mode; u64int atime; u64int mtime; @@ -222,9 +170,7 @@ * At least Dminattrsz is reserved for attributes, at most * all the remaining embedded space. * Past the attributes, starts the file data. - * If more attribute space is needed, an attribute block is allocated. - * For huge attributes, it is suggested that a file is allocated and - * the attribute value refers to that file. + * If more attribute space is needed, attribute blocks should be implemented. * The pointer in iptr[n] is an n-indirect data pointer. * * Directories are also files, but their data is simply an array of @@ -237,7 +183,7 @@ { u64int asize; /* attribute size */ u64int ndents; /* # of directory entries, for dirs */ - daddrt aptr; /* attribute block pointer */ + daddrt aptr; /* attribute block pointer; unused */ daddrt dptr[Ndptr]; /* direct data pointers */ daddrt iptr[Niptr]; /* indirect data pointers */ Dmeta; /* predefined attributes, followed by name */ @@ -247,12 +193,6 @@ #define MAGIC 0x6699BCB06699BCB0ULL /* * Superblock. - * The stored tree is: - * archive/ root of the archived tree - * - * ... - * (/ and /active are only memory and never on disk, parts - * under /active that are on disk are shared with entries in /archive) * * It contains two copies of the information, Both should be identical. * If there are errors while writing this block, the one with the @@ -261,21 +201,20 @@ struct Dsuperdata { u64int magic; /* MAGIC */ - u64int epoch; - daddrt free; /* first free block on list */ + u64int epoch; /* current epoch */ + u64int fepoch; /* if epoch <= fepoch then it's free */ daddrt eaddr; /* end of the assigned disk portion */ - daddrt root; /* address of /archive in disk */ - u64int oddrefs; /* use odd ref blocks? or even ref blocks? */ - u64int ndfree; /* # of blocks in free list */ - u64int maxuid; /* 1st available uid */ + daddrt root; /* address of / in disk */ + daddrt free; /* cached free block list */ + u64int fsmode; /* Normal, Worm */ + u64int dblksz; /* only for checking */ - u64int nblkgrpsz; /* only for checking */ u64int dminattrsz; /* only for checking */ u64int ndptr; /* only for checking */ u64int niptr; /* only for checking */ - u64int dirtyrefs; /* 0 || we were writing ref blocks and failed */ u64int embedsz; /* only for checking */ u64int dptrperblk; /* only for checking */ + u64int dtagperblk; /* only for checking */ }; struct Dsuperblk @@ -287,70 +226,56 @@ Dsuperdata dup; }; -enum -{ - /* addresses for ctl files and / have this bit set, and are never - * found on disk. - */ - Fakeaddr = 0x8000000000000000ULL, - Noaddr = ~0ULL, -}; - -enum -{ - DFdir = 0x80U, /* flag for directories in tags */ - DFreg = 0, /* flag for files in tags */ -}; -#define TAG(type,dir,addr) ((addr)<<8|((dir)&DFdir)|((type)&0x7F)) -#define TAGTYPE(t) ((t)&0x7F) -#define TAGDIR(t) ((t)&DFdir) -#define DBDIR(b) ((b)->d.tag&DFdir) -#define TAGADDROK(t,addr) (((t)&~0xFF) == ((addr)<<8)) - /* * disk blocks */ - union Diskblk { - struct{ - Diskblkhdr; - union{ - Ddatablk; /* data block */ - Dptrblk; /* pointer block */ - Drefblk; /* reference counters block */ - Dattrblk; /* attribute block */ - Dfileblk; /* file block */ - Dsuperblk; - }; + union{ + Dtagblk; + Dfileblk; + Dsuperblk; + Ddatablk; + Dptrblk; }; - uchar ddata[Dblksz]; + uchar diskblockdata[Dblksz]; /* align: ensure Dblksz size */ }; /* - * These are derived. * Embedsz must compensate that embed[] was declared as embed[Daddrsz], * to make it easy for the compiler to keep things aligned on 64 bits. */ enum { - Dblkdatasz = sizeof(Diskblk) - sizeof(Diskblkhdr), - Embedsz = Dblkdatasz - sizeof(Dfileblk) + Daddrsz, - Dptrperblk = Dblkdatasz / Daddrsz, - Drefperblk = Dblkdatasz / Daddrsz, + Embedsz = Dblksz - sizeof(Dfileblk) + Daddrsz, + Dptrperblk = Dblksz / Daddrsz, + Dtagperblk = Dblksz / Dtagsz, }; /* - * File attributes are name/value pairs. - * By now, only mandatory attributes are implemented, and - * have names implied by their position in the Dmeta structure. - */ - -/* * ##### On memory structures. ##### */ +struct List +{ + QLock; + Memblk *hd; + Memblk *tl; + long n; +}; + +struct Link +{ + Memblk *lprev; + Memblk *lnext; +}; + +struct Next +{ + Next *next; +}; + /* * On memory file information. */ @@ -364,7 +289,6 @@ char *muid; /* reference to the user table */ char *name; /* reference to the disk block */ - Memblk* melted; /* next version for this one, if frozen */ ulong lastbno; /* last accessed block nb within this file */ ulong sequential; /* access has been sequential */ @@ -373,18 +297,36 @@ uvlong raoffset; /* we did read ahead up to this offset */ }; -struct List -{ - QLock; - Memblk *hd; - Memblk *tl; - long n; -}; - -struct Link +/* + * Memory block states: + * mem: memory block, hashed by mem addr, unlinked + * clean: frozen, synced block, hashed by disk addr, in clean list. + * in: block being loaded, hashed by disk addr, unlinked. + * out: frozen block, hashed by disk addr, in out list. + * err: block with i/o errors, hashed by disk addr. unlinked. + * lru: moving out mem, unhashed, unlinked. + * Transitions: + * fswrite(): mem -> out (fs quiescent) + * syncproc(): out -> clean | err + * dbget(): in -> clean | err + * mbrenew(): clean -> mem + * lruproc(): clean -> lru + * + * The super is never linked at any list, but it's hashed. + * Frozen copies of the super are sent for writing and discarded when written. + * Blocks with disk addresses never change, unless renewed and + * moved back to the mem state. All their references must be + * disk addresses. + */ +enum { - Memblk *lprev; - Memblk *lnext; + MBfree = 0, + MBmem, + MBout, + MBclean, + MBin, + MBlru, + MBerr, }; /* @@ -392,22 +334,19 @@ */ struct Meminfo { + Memblk *next; /* in hash or free list */ + Link; /* clean, out, ... lists */ Ref; - daddrt addr; /* block address */ - Memblk *next; /* in hash or free list */ - Link; /* clean / dirty / ref lists */ + int type; /* block type */ + int state; /* Mfree, Mnew, ... */ + daddrt addr; /* block address | 0 */ - Mfile *mf; /* DBfile on-memory info. */ + QLock slk; /* state change */ + QLock ldlk; /* wait while loading */ - int type; - Lock dirtylk; - int dirty; /* must be written */ - int frozen; /* is frozen */ - int loading; /* block is being read */ - QLock newlk; /* only to wait on DBnew blocks */ - - uintptr unlinkpc; + Mfile *mf; /* DBfile on-memory info. */ + Channel *wc; /* for write super */ }; struct Memblk @@ -426,61 +365,164 @@ long len; }; +struct Alloc +{ + QLock; + Next *free; + ulong nfree; + ulong nalloc; + usize elsz; + int zeroing; + int fixedsz; +}; + +/* + * user group member. + */ +struct Member +{ + Member *next; + Usr *u; +}; + +struct Usr +{ + Usr *nnext; /* next by name */ + Usr *inext; /* next by id */ + + int id; + int enabled; + int allow; + Usr *lead; + char name[Unamesz]; + Member *members; +}; + +struct Usrs +{ + RWLock; + Usr *uids[Uhashsz]; + Usr *unames[Uhashsz]; + Usr *uwrite; + int uidgen; +}; + +struct Opstat +{ + Lock; + int ntimes; + vlong tot; + vlong max; +}; + +enum +{ + Opwriteq, /* waiting for quiescence in fswrite */ + Opwrite, /* in fswrite */ + Opsync, /* in syncproc */ + Oplru, /* in lruproc */ + Opmark, /* in fsmark */ + Opsweep, /* in sweepproc */ + Opmax, +}; + +struct Fsstat +{ + int nmballocw; /* # of waits in mballocz */ + int nmbunhashbusy; /* # of unhash found busy */ + int mbreneww; /* # of waits in mbrenew */ + int mbgetrace; /* # of mbgets that did race */ + int newdaddrw; /* # of waits in newdaddr */ + int nmbrenew; /* # of times a block was renewed */ + int ndirmiss; /* # of failed dfdirlookups */ + int nnoattrsz; /* # of wattr failed due to sz */ + int nindirs[Niptr]; + int naddress; /* # of blocks addressed */ + int nwrite; /* # of calls to fswrite */ + int nlruign; /* # of total ignores in lru */ + int ngc; /* # of blocks gc'd */ + int nrenewpath; /* # of times a path had to be renewed */ + Opstat opstat[Opmax]; +}; + struct Fsys { QLock; + char *dev; /* name for disk */ + + int fd; /* of disk */ + daddrt limit; /* address for end of disk */ + uvlong ndblk; /* # of disk blocks in dev */ struct{ QLock; Memblk *b; - } fhash[Fhashsz]; /* hash of blocks by address */ + } fhash[Fhashsz]; /* hash of blocks by disk address */ Memblk *blk; /* static global array of memory blocks */ - uvlong nblk; /* # of entries used */ - uvlong nablk; /* # of entries allocated */ - uvlong nmused; /* blocks in use */ - uvlong nmfree; /* free blocks */ - Memblk *free; /* free list of unused blocks in blk */ - - List clean; /* hd: mru; tl: lru */ - List dirty; /* dirty blocks */ - List refs; /* DBref blocks, neither in clean nor dirty lists */ - - QLock mlk; - Mfile *mfree; /* unused list */ - - - Memblk *super; /* locked by blklk */ - Memblk *root; /* only in memory */ - Memblk *active; /* /active */ - Memblk *archive; /* /archive */ + Alloc mballoc; /* memory block allocator */ + List dtags; /* list of dirty MBmem DBtags */ + Channel *lruc; /* wakeup lruproc */ + List clean; /* hd: lru; tl: mru */ + Channel *syncc; /* wakeup syncproc */ + List out; /* dirty blocks waiting in write queue */ + + Lock nmblklk; + uvlong nmblk; /* # of DBmem blocks */ + + QLock freelk; /* one newdaddr() at a time */ + daddrt isweep; /* next group to sweep for free blocks */ + daddrt ifree; /* next group to scan for free blocks */ + uvlong ndfree; /* aprox. # of free disk blocks */ + + Channel *sweepc; /* wakeup sweepproc */ + Channel *sweepec; /* sweepproc done */ + int nlastfree; /* sweepproc <-> newdaddr hint */ + + QLock superlk; + Memblk *super; /* super block */ + + Memblk *root; /* / (only in memory) */ + Memblk *active; /* /root ("/" on disk, if not worm) */ Memblk *cons; /* /cons */ Memblk *stats; /* /stats */ - Channel *consc; /* of char*; output for /cons */ - - Memblk *fzsuper; /* frozen super */ + Usrs; - char *dev; /* name for disk */ - int fd; /* of disk */ - daddrt limit; /* address for end of disk */ - daddrt ndblk; /* # of disk blocks in dev */ + Channel *consc; /* of char*; output for /cons */ + Channel *syncwc; /* used by fssync */ - int nindirs[Niptr]; /* stats */ - int nmelts; + Fsstat; - QLock fzlk; /* freeze, melt, check, write */ - RWLock quiescence; /* any activity rlocks() this */ - QLock lrulk; /* fslru */ - QLock policy; /* fspolicy */ + RWLock quiescence; /* any user activity rlocks() this */ + RWLock dquiescence; /* any fs daemon proc rlocks() this */ uvlong atime; /* updated on each request */ - uvlong wtime; /* time for last fswrite */ int profile; /* measure times for rpcs */ + int halt; /* user wants to halt */ - int check; /* checking in progress */ - int worm; /* operate in worm mode */ - int mode; /* Rd | Wr */ + int mode; /* Normal, Rdonly, Worm */ + + QLock archlk; /* fsarchive and sweepproc */ + char *archdir; /* path to mounted archive */ + long archt; /* time() for next archival, 0 if don't */ + int archhour; /* archive hour (0-23) */ + char* archname; /* name of the archive */ + + int swreaderr, swwriteerr; /* fault injection */ +}; + +/* + * Used to keep references to all files in fid's paths. + */ +struct Path +{ + Path* next; /* in free list */ + Ref; + Memblk** f; /* array of file references */ + int nf; /* # of used entries */ + int naf; /* # of allocated entries */ + int nroot; /* "/" for user is f->[nroot] */ }; /* @@ -493,75 +535,39 @@ void (*f)(int, char**); int nargs; char *usage; + int isctl; }; -struct Next -{ - Next *next; -}; - -struct Alloc -{ - QLock; - Next *free; - ulong nfree; - ulong nalloc; - usize elsz; - int zeroing; -}; - -/* - * Used to keep references to parents crossed to - * reach files, to be able to build a melted version of the - * children. Also to know the parent of a file for things like - * removals. - */ -struct Path -{ - Path* next; /* in free list */ - Ref; - Memblk** f; - int nroot; - int nf; - int naf; -}; - -struct Member +enum /* Lstat.type */ { - Member *next; - Usr *u; + Tqlock = 0, + Trwlock, + Tlock, }; -struct Usr +struct Lstat { - Usr *nnext; /* next by name */ - Usr *inext; /* next by id */ - - int id; - int enabled; - int allow; - Usr *lead; - char name[Unamesz]; - Member *members; + int type; + uintptr pc; + int ntimes; + int ncant; + vlong wtime; }; - +#pragma varargck type "D" daddrt #pragma varargck type "H" Memblk* #pragma varargck type "A" Usr* #pragma varargck type "P" Path* +#pragma varargck type "N" char* #pragma varargck argpos fatal 1 #pragma varargck argpos warn 1 #pragma varargck argpos warnerror 1 -/* used in debug prints to print just part of huge values */ -#define EP(e) ((e)&0xFFFFFFFFUL) -typedef int(*Blkf)(Memblk*, void*); -typedef int (*Dirf)(Memblk *b, daddrt *de, void *p); +extern Fsys *fs; +extern Alloc mfalloc, pathalloc; +extern int fatalaborts; /* debug */ -extern Fsys*fs; -extern uvlong maxfsz; -extern Alloc mfalloc, pathalloc; -extern int swreaderr, swwriteerr; -extern int fatalaborts; +/* defined to preserve the caller pc in qunlock() checks */ +#define xqunlock(x) qunlock(x) --- /sys/src/cmd/creepy/fblk.c Thu Apr 26 19:07:04 2012 +++ /sys/src/cmd/creepy/fblk.c Fri May 11 12:27:07 2012 @@ -3,159 +3,46 @@ /* * File blocks. * see dk.h + * + * These functions assume that files are rwlocked by the caller, that + * they are in MBmem state if needed, and that MBmem blocks do not freeze + * while there's system activity. */ -Path* -walkpath(Memblk *f, char *elems[], int nelems) -{ - int i; - Memblk *nf; - Path *p; - - p = newpath(f); - if(catcherror()){ - putpath(p); - error(nil); - } - isfile(f); - for(i = 0; i < nelems; i++){ - if((f->d.mode&DMDIR) == 0) - error("not a directory"); - rwlock(f, Rd); - if(catcherror()){ - rwunlock(f, Rd); - error("walk: %r"); - } - nf = dfwalk(f, elems[i]); - rwunlock(f, Rd); - addelem(&p, nf); - mbput(nf); - f = nf; - USED(&f); /* in case of error() */ - noerror(); - } - noerror(); - return p; -} - -Path* -walkto(char *a, char **lastp) -{ - char *els[Npathels]; - int nels, n; - - n = strlen(a); - nels = gettokens(a, els, nelem(els), "/"); - if(nels < 1) - error("invalid path"); - if(lastp != nil){ - *lastp = a + n - strlen(els[nels-1]); - return walkpath(fs->root, els, nels-1); - }else - return walkpath(fs->root, els, nels); -} - -void -rwlock(Memblk *f, int iswr) -{ - xrwlock(f->mf, iswr); -} - -void -rwunlock(Memblk *f, int iswr) -{ - xrwunlock(f->mf, iswr); -} - -void -isfile(Memblk *f) -{ - if((f->d.mode&DMDIR) != 0) - assert(DBDIR(f)); - else - assert(!DBDIR(f)); - if(f->type != DBfile || f->mf == nil) - fatal("isfile: not a file at pc %#p", getcallerpc(&f)); -} - -void -isrwlocked(Memblk *f, int iswr) -{ - if(f->type != DBfile || f->mf == nil) - fatal("isrwlocked: not a file at pc %#p", getcallerpc(&f)); - if((iswr && canrlock(f->mf)) || (!iswr && canwlock(f->mf))) - fatal("is%clocked at pc %#p", iswr?'w':'r', getcallerpc(&f)); -} - -static void -isdir(Memblk *f) -{ - if((f->d.mode&DMDIR) != 0) - assert(DBDIR(f)); - else - assert(!DBDIR(f)); - if(f->type != DBfile || f->mf == nil) - fatal("isdir: not a file at pc %#p", getcallerpc(&f)); - if((f->d.mode&DMDIR) == 0) - fatal("isdir: not a dir at pc %#p", getcallerpc(&f)); -} - -/* for dfblk only */ +/* + * For dfblk. + * Ensure *addrp has a MBmem block we could write. + */ static Memblk* -getmelted(uint type, uint dbit, daddrt *addrp, int *chg) +getmk(uint type, daddrt *addrp) { - Memblk *b, *nb; + Memblk *b; - *chg = 0; if(*addrp == 0){ - b = dballocz(type, dbit, 1); + b = mbnew(type); *addrp = b->addr; - *chg = 1; return b; } b = dbget(type, *addrp); - assert(DBDIR(b) == dbit); - nb = nil; - if(!b->frozen) - return b; - if(catcherror()){ - mbput(b); - mbput(nb); - error(nil); - } - nb = dbdup(b); - USED(&nb); /* for error() */ - *addrp = nb->addr; - *chg = 1; - dbput(b); - noerror(); - mbput(b); - return nb; + mbrenew(b); + *addrp = b->addr; + return b; } /* - * Get a file data block, perhaps allocating it on demand - * if mkit. The file must be r/wlocked and melted if mkit. - * - * Adds disk refs for dir entries copied during melts and - * considers that /archive is always melted. - * - * Read-ahead is not considered here. The file only records - * the last accessed block number, to help the caller do RA. - * + * Get a file block #bno, allocating it on demand if mkit. */ static Memblk* dfblk(Memblk *f, ulong bno, int mkit) { ulong prev, nblks; - int i, idx, nindir, type, chg; + int i, idx, nindir, type; Memblk *b, *pb; daddrt *addrp; - if(mkit) - ismelted(f); - + if(mkit && f->state != MBmem) + fatal("dfblk: %D: state %s", f->addr, sname(f->state)); if(bno != f->mf->lastbno){ f->mf->sequential = (!mkit && bno == f->mf->lastbno + 1); f->mf->lastbno = bno; @@ -166,27 +53,24 @@ * prev: # of blocks before the current one. */ prev = 0; - chg = 0; /* - * Direct block? + * Direct block */ + type = DBdata | (f->type&DBdirflag); if(bno < nelem(f->d.dptr)){ if(mkit) - b = getmelted(DBdata, DBDIR(f), &f->d.dptr[bno], &chg); + return getmk(type, &f->d.dptr[bno]); else - b = dbget(DBdata, f->d.dptr[bno]); - if(chg) - changed(f); - return b; + return dbget(type, f->d.dptr[bno]); } - bno -= nelem(f->d.dptr); - prev += nelem(f->d.dptr); /* * Indirect block * nblks: # of data blocks addressed by the block we look at. */ + bno -= nelem(f->d.dptr); + prev += nelem(f->d.dptr); nblks = Dptrperblk; for(i = 0; i < nelem(f->d.iptr); i++){ if(bno < nblks) @@ -198,17 +82,15 @@ if(i == nelem(f->d.iptr)) error("offset exceeds file capacity"); ainc(&fs->nindirs[i]); - type = DBptr0+i; - dFprint("dfblk: indirect %s nblks %uld (ppb %ud) bno %uld\n", + type = (DBptr0+i) | (f->type&DBdirflag); + dFprint("dfblk: indirect %s nblks %uld (ppb %uld) bno %uld\n", tname(type), nblks, Dptrperblk, bno); addrp = &f->d.iptr[i]; if(mkit) - b = getmelted(type, DBDIR(f), addrp, &chg); + b = getmk(type, addrp); else b = dbget(type, *addrp); - if(chg) - changed(f); pb = b; if(catcherror()){ mbput(pb); @@ -222,9 +104,9 @@ * nblks: # of data blocks addressed by b */ for(nindir = i+1; nindir >= 0; nindir--){ - chg = 0; - dFprint("indir %s d%#ullx nblks %uld ptrperblk %d bno %uld\n\n", - tname(DBdata+nindir), *addrp, nblks, Dptrperblk, bno); + dFprint("indir %s d%#ullx nblks %uld ptrperblk %uld bno %uld\n\n", + tname(DBdata+nindir+(f->type&DBdirflag)), + *addrp, nblks, Dptrperblk, bno); idx = 0; if(nindir > 0){ nblks /= Dptrperblk; @@ -235,13 +117,11 @@ warn("HOLE"); b = nil; }else{ - assert(type >= DBdata); + assert((type&~DBdirflag) >= DBdata); if(mkit) - b = getmelted(type, DBDIR(f), addrp, &chg); + b = getmk(type, addrp); else b = dbget(type, *addrp); - if(chg) - changed(pb); addrp = &b->d.ptr[idx]; } mbput(pb); @@ -256,78 +136,28 @@ return b; } -/* - * Remove [bno:bend) file data blocks. - * The file must be r/wlocked and melted. - */ void -dfdropblks(Memblk *f, ulong bno, ulong bend) -{ - Memblk *b; - - isrwlocked(f, Wr); - ismelted(f); - assert(!DBDIR(f)); - - dprint("dfdropblks: could remove d%#ullx[%uld:%uld]\n", - f->addr, bno, bend); - /* - * Instead of releasing the references on the data blocks, - * considering that the file might grow again, we keep them. - * Consider recompiling again and again and... - * - * The length has been adjusted and data won't be returned - * before overwritten. - * - * We only have to zero the data, because the file might - * grow using holes and the holes must read as zero, and also - * because directories assume all data blocks are initialized. - */ - for(; bno < bend; bno++){ - if(catcherror()) - continue; - b = dfblk(f, bno, Dontmk); - noerror(); - memset(b->d.data, 0, Dblkdatasz); - changed(b); - mbput(b); - } -} - -/* - * block # for the given offset (first block in file is 0). - * embedded data accounts also as block #0. - * If boffp is not nil it returns the offset within that block - * for the given offset. - */ -ulong -dfbno(Memblk *f, uvlong off, ulong *boffp) +dftruncate(Memblk *f) { - ulong doff, dlen; + ulong doff; + if(f->state != MBmem) + fatal("dftruncate: %D: state %s", f->addr, sname(f->state)); + f->d.length = 0; doff = embedattrsz(f); - dlen = Embedsz - doff; - if(off < dlen){ - *boffp = doff + off; - return 0; - } - off -= dlen; - if(boffp != nil) - *boffp = off%Dblkdatasz; - return off/Dblkdatasz; + if(doff < Embedsz) + memset(f->d.embed+doff, 0, Embedsz-doff); + memset(f->d.dptr, 0, sizeof f->d.dptr); + memset(f->d.iptr, 0, sizeof f->d.iptr); } /* * Return a block slice for data in f. - * The slice returned is resized to keep in a single block. + * The slice returned is does not cross block boundaries. * If there's a hole in the file, Blksl.data == nil && Blksl.len > 0. * * If mkit, the data block (and any pointer block crossed) - * is allocated/melted if needed, but the file length is NOT updated. - * - * The file must be r/wlocked by the caller, and melted if mkit. - * The block is returned referenced but unlocked, - * (it's still protected by the file lock.) + * is allocated and MBmem if needed, but the file length is NOT updated. */ Blksl dfslice(Memblk *f, ulong len, uvlong off, int iswr) @@ -338,10 +168,9 @@ memset(&sl, 0, sizeof sl); dFprint("slice m%#p[%#ullx:+%#ulx]%c...\n",f, off, len, iswr?'w':'r'); - if(iswr) - ismelted(f); - else - if(off >= f->d.length) + if(iswr && f->state != MBmem) + fatal("dftruncate: %D: state %s", f->addr, sname(f->state)); + if(!iswr && off >= f->d.length) goto done; doff = embedattrsz(f); @@ -353,22 +182,18 @@ sl.data = f->d.embed + doff + off; sl.len = dlen - off; }else{ - bno = (off-dlen) / Dblkdatasz; - boff = (off-dlen) % Dblkdatasz; + bno = (off-dlen) / Dblksz; + boff = (off-dlen) % Dblksz; sl.b = dfblk(f, bno, iswr); - if(iswr) - ismelted(sl.b); if(sl.b != nil) sl.data = sl.b->d.data + boff; - sl.len = Dblkdatasz - boff; + sl.len = Dblksz - boff; } if(sl.len > len) sl.len = len; - if(off + sl.len > f->d.length) - if(!iswr) - sl.len = f->d.length - off; - /* else the file size will be updated by the caller */ + if(off + sl.len > f->d.length && iswr == 0) + sl.len = f->d.length - off; done: if(sl.b == nil) dFprint("slice m%#p[%#ullx:+%#ulx]%c -> 0[%#ulx]\n", @@ -377,28 +202,20 @@ dFprint("slice m%#p[%#ullx:+%#ulx]%c -> m%#p:%#uld[%#ulx]\n", f, off, len, iswr?'w':'r', sl.b, (uchar*)sl.data - sl.b->d.data, sl.len); - assert(sl.b == nil || sl.b->ref > 1); return sl; } - -uvlong -dfdirmap(Memblk *d, Dirf dirf, void *arg, int iswr) +static vlong +dfdirlookup(Memblk *d, daddrt addr) { Blksl sl; + vlong off; daddrt *de; - uvlong off; int i; - isdir(d); - assert(d->d.length/Daddrsz >= d->d.ndents); - if(iswr){ - isrwlocked(d, iswr); - ismelted(d); - } off = 0; for(;;){ - sl = dfslice(d, Dblkdatasz, off, iswr); + sl = dfslice(d, Dblksz, off, Rd); if(sl.len == 0) break; if(sl.b == nil) @@ -409,7 +226,7 @@ } de = sl.data; for(i = 0; i < sl.len/Daddrsz; i++) - if(dirf(sl.b, &de[i], arg) < 0){ + if(de[i] == addr){ noerror(); mbput(sl.b); return off + i*Daddrsz; @@ -418,240 +235,209 @@ noerror(); mbput(sl.b); } - return Noaddr; -} - -static int -chdentryf(Memblk *b, daddrt *de, void *p) -{ - daddrt *addrs, addr, naddr; - - addrs = p; - addr = addrs[0]; - naddr = addrs[1]; - if(*de != addr) - return 0; /* continue searching */ - - if(naddr != addr){ - *de = naddr; - changed(b); - } - return -1; /* found: stop */ + ainc(&fs->ndirmiss); + return -1; } /* - * Find a dir entry for addr (perhaps 0 == avail) and change it to - * naddr. If iswr, the entry is allocated if needed and the blocks - * melted on demand. - * Return the offset for the entry in the file or Noaddr - * Does not adjust disk refs. + * Find a dir entry for addr (0 == avail) and change it to naddr (0 == free). */ -uvlong +void dfchdentry(Memblk *d, daddrt addr, daddrt naddr) { - uvlong off; - daddrt addrs[2] = {addr, naddr}; + vlong off; + Blksl sl; + daddrt *de; - dNprint("dfchdentry d%#010ullx -> d%#010ullx\nin %H\n", addr, naddr, d); - off = dfdirmap(d, chdentryf, addrs, Wr); - if(addr == 0 && naddr != 0){ + dNprint("dfchdentry %D -> %D\nin %H\n", addr, naddr, d); + off = dfdirlookup(d, addr); + if(off < 0){ + if(addr != 0) + error("address not found in dir"); + off = d->d.length; + } + sl = dfslice(d, Daddrsz, off, Wr); + de = sl.data; + *de = naddr; + if(addr == 0){ if(d->d.length < off+Daddrsz) d->d.length = off+Daddrsz; d->d.ndents++; - changed(d); - }else if(addr != 0 && naddr == 0){ - d->d.ndents--; - changed(d); - } - return off; -} - -typedef -struct Walkarg -{ - char *name; - Memblk *f; -} Walkarg; - -static int -findname(Memblk*, daddrt *de, void *p) -{ - Walkarg *w; - - w = p; - if(*de == 0) - return 0; - - w->f = dbget(DBfile, *de); - if(strcmp(w->f->mf->name, w->name) != 0){ - mbput(w->f); - return 0; } - - /* found */ - dprint("dfwalk '%s' -> %H\n", w->name, w->f); - return -1; -} - -/* - * Walk to a child and return it referenced. - */ -Memblk* -dfwalk(Memblk *d, char *name) -{ - Walkarg w; - - if(strcmp(name, "..") == 0) - fatal("dfwalk: '..'"); - w.name = name; - w.f = nil; - if(dfdirmap(d, findname, &w, Rd) == Noaddr) - error("file not found"); - return w.f; + if(naddr == 0) + d->d.ndents--; + mbput(sl.b); } /* - * Return the last version for *fp, rwlocked, be it frozen or melted. + * TODO: This is a good place to do self reparing: + * see also dblk.c:/^dbmark + * We are called also with MBmem blocks, to mark all on-disk blocks + * reachable from the mutable tree. */ void -dflast(Memblk **fp, int iswr) +dfmark(daddrt addr, u64int e) { - Memblk *f; + Memblk *b; + int i, type; + ulong doff, sz; + u64int old; - f = *fp; - isfile(f); - rwlock(f, iswr); - while(f->mf->melted != nil){ - incref(f->mf->melted); - *fp = f->mf->melted; - rwunlock(f, iswr); - mbput(f); - f = *fp; - rwlock(f, iswr); - if(!f->frozen) + if(ISDADDR(addr)){ + if(catcherror()){ + warn("dfmark %D: %r", addr); return; + } + old = dbsettag(addr, e); + noerror(); + if(old == e) /* already marked */ + return; + } + b = mbgetlocked(addr); + if(b == nil){ + if(!ISDADDR(addr)) + fatal("dfmark: unloaded mem %D", addr); + b = dbgetlocked(DBfile, addr); + } + if(b->type != DBfile && b->type != DBdir) + fatal("dfmark: %D: type %s", addr, tname(b->type)); + if(ISDADDR(addr) && b->state != MBclean) + warn("dfmark: %D: state %s", addr, sname(b->state)); /* does it happen? */ + if(catcherror()){ + warn("dfmark: %r"); + goto fail; } + if(b->type&DBdirflag){ + doff = embedattrsz(b); + sz = Embedsz-doff; + markdentries(b->d.embed+doff, sz/Daddrsz, e); + } + type = DBdata | (b->type&DBdirflag); + for(i = 0; i < nelem(b->d.dptr); i++) + if(b->d.dptr[i] != 0) + dbmark(type, b->d.dptr[i], e); + type = DBptr0 | (b->type&DBdirflag); + for(i = 0; i < nelem(b->d.iptr); i++) + if(b->d.iptr[i] != 0) + dbmark(type+i, b->d.iptr[i], e); + noerror(); +fail: + xqunlock(&b->slk); + mbput(b); } /* - * Return *fp melted, by melting it if needed, and wlocked. - * The reference from the (already melted) parent is adjusted, - * as are the memory and disk references for the old file *fp. - * - * The parent is wlocked by the caller and unlocked upon return. + * Give addresses to mutable blocks, to write them out. */ -Memblk* -dfmelt(Memblk *parent, Memblk **fp) +daddrt +dfaddress(daddrt addr) { - Memblk *of, *nf; + Memblk *b; + int i, type; + ulong doff, sz; - ismelted(parent); - isrwlocked(parent, Wr); - dflast(fp, Wr); - of = *fp; - if(of->frozen == 0){ - rwunlock(parent, Wr); - return of; - } - if(catcherror()){ - rwunlock(of, Wr); - rwunlock(parent, Wr); - error(nil); - } - nf = dbdup(of); - noerror(); + if(ISDADDR(addr)) + return addr; - rwlock(nf, Wr); - rwunlock(of, Wr); + b = mbget(addr); + if(b == nil) + fatal("dfaddress: mem %D not in hash", addr); + if(b->type != DBfile && b->type != DBdir) + fatal("dfaddress: %D: type %s", addr, tname(b->type)); + if(b->state != MBmem) + fatal("dfaddress: %D: state %s", addr, sname(b->state)); if(catcherror()){ - rwunlock(nf, Wr); - mbput(nf); + mbput(b); error(nil); } - dfchdentry(parent, of->addr, nf->addr); - dbput(of); - mbput(of); - *fp = nf; - noerror(); - rwunlock(parent, Wr); - return nf; -} -void -dfused(Path *p) -{ - Memblk *f; + if(b->type&DBdirflag){ + doff = embedattrsz(b); + sz = Embedsz-doff; + addressdentries(b->d.embed+doff, sz/Daddrsz); + } + type = DBdata | (b->type&DBdirflag); + for(i = 0; i < nelem(b->d.dptr); i++) + if(b->d.dptr[i] != 0) + b->d.dptr[i] = dbaddress(type, b->d.dptr[i]); + type = DBptr0 | (b->type&DBdirflag); + for(i = 0; i < nelem(b->d.iptr); i++) + if(b->d.iptr[i] != 0) + b->d.iptr[i] = dbaddress(type+i, b->d.iptr[i]); - f = p->f[p->nf-1]; - isfile(f); - rwlock(f, Wr); - f->d.atime = fstime(0); - rwunlock(f, Wr); + noerror(); + addr = address(b); + mbput(b); + nbsendul(fs->syncc, 0); + return addr; } -/* - * Report that a file has been modified. - * Modification times propagate up to the root of the file tree. - * But frozen files are never changed. - */ -void -dfchanged(Path *p, int muid) +Memblk* +dfwalk(Memblk *d, char *name) { + Blksl sl; + vlong off; + daddrt *de; Memblk *f; - u64int t; int i; - t = fstime(0); - for(i = 0; i < p->nf; i++){ - f = p->f[i]; - rwlock(f, Wr); - if(f->frozen == 0) - if(!catcherror()){ - f->d.mtime = t; - f->d.atime = t; - f->d.muid = muid; - changed(f); + off = 0; + for(;;){ + sl = dfslice(d, Dblksz, off, Rd); + if(sl.len == 0) + break; + if(sl.b == nil) + continue; + if(catcherror()){ + mbput(sl.b); + error(nil); + } + de = sl.data; + for(i = 0; i < sl.len/Daddrsz; i++) + if(de[i] != 0){ + if(catcherror()){ + warn("'%s': dentry %D: %r", + d->mf->name, de[i]); + de[i] = 0; /* repair */ + continue; + } + f = dbget(DBfile, de[i]); noerror(); + if(strcmp(f->mf->name, name) == 0){ + noerror(); + mbput(sl.b); + return f; + } + mbput(f); } - rwunlock(f, Wr); + off += sl.len; + noerror(); + mbput(sl.b); } + error("'%s' not found", name); + return nil; } /* - * May be called with null parent, for root and ctl files. - * The first call with a null parent is root, all others are ctl - * files linked at root. + * May be called with null parent, for the root file. */ Memblk* dfcreate(Memblk *parent, char *name, int uid, ulong mode) { Memblk *nf; Mfile *m; - int isctl; - - if(fsfull()) - error("file system full"); - isctl = parent == nil; - if(parent == nil) - parent = fs->root; - if(parent != nil){ + if(parent != nil) dprint("dfcreate '%s' %M at\n%H\n", name, mode, parent); - isdir(parent); - isrwlocked(parent, Wr); - ismelted(parent); - }else - dprint("dfcreate '%s' %M", name, mode); - - if(isctl) - nf = dballocz(DBctl, (mode&DMDIR)?DFdir:0, 1); else - nf = dballocz(DBfile, (mode&DMDIR)?DFdir:0, 1); + dprint("dfcreate root: '%s' %M\n", name, mode); + if(mode&DMDIR) + nf = mbnew(DBdir); + else + nf = mbnew(DBfile); if(catcherror()){ mbput(nf); - if(parent != nil) - rwunlock(parent, Wr); error(nil); } @@ -669,7 +455,6 @@ nf->d.muid = nf->d.uid; m->name = name; nf->d.asize = pmeta(nf->d.embed, Embedsz, nf); - changed(nf); if(parent != nil){ m->gid = parent->mf->gid; @@ -677,44 +462,25 @@ dfchdentry(parent, 0, nf->addr); } noerror(); - dprint("dfcreate-> %H\n within %H\n", nf, parent); + dprint("dfcreate -> %H\n", nf); return nf; } +/* + * Drops the given ref for f. + */ void dfremove(Memblk *p, Memblk *f) { - - /* funny as it seems, we may need extra blocks to melt */ - if(fsfull()) - error("file system full"); - - isrwlocked(f, Wr); - isrwlocked(p, Wr); - ismelted(p); - if(DBDIR(f) != 0 && f->d.ndents > 0) + if(canrlock(f->mf)) + fatal("dfremove: not wlocked: %H", f); + if(f->d.ndents > 0 && fs->mode != Worm) /* Worm can rm a tree */ error("directory not empty"); - incref(p); - if(catcherror()){ - mbput(p); - error(nil); - } dfchdentry(p, f->addr, 0); - /* shouldn't fail now. it's unlinked */ - - if(p->d.ndents == 0 && p->d.length > 0){ /* all gone, make it public */ + if(p->d.ndents == 0 && p->d.length > 0) p->d.length = 0; - changed(p); - } - - noerror(); - rwunlock(f, Wr); - if(!catcherror()){ - dbput(f); - noerror(); - } + xwunlock(f->mf); mbput(f); - mbput(p); } /* @@ -728,7 +494,6 @@ char *p; p = a; - isrwlocked(f, Rd); for(tot = 0; tot < count; tot += sl.len){ sl = dfslice(f, count-tot, off+tot, Rd); if(sl.len == 0){ @@ -754,11 +519,6 @@ ulong tot; char *p; - if(fsfull()) - error("file system full"); - - isrwlocked(f, Wr); - ismelted(f); p = a; if(f->d.mode&DMAPPEND) *off = f->d.length; @@ -767,250 +527,10 @@ if(sl.len == 0 || sl.data == nil) fatal("dfpwrite: bug"); memmove(sl.data, p+tot, sl.len); - changed(sl.b); mbput(sl.b); tot += sl.len; - if(*off+tot > f->d.length){ + if(*off+tot > f->d.length) f->d.length = *off+tot; - changed(f); - } - } - return tot; -} - -int -ptrmap(daddrt addr, int nind, Blkf f, void *a, int isdisk) -{ - int i; - Memblk *b; - long tot; - - if(addr == 0) - return 0; - if(isdisk) - b = dbget(DBdata+nind, addr); - else{ - b = mbget(DBdata+nind, addr, Dontmk); - if(b == nil) - return 0; /* on disk */ - } - if(catcherror()){ - mbput(b); - error(nil); - } - tot = 0; - if(f == nil || f(b, a) == 0){ - tot++; - if(nind > 0){ - for(i = 0; i < Dptrperblk; i++) - tot += ptrmap(b->d.ptr[i], nind-1, f, a, isdisk); - } - } - noerror(); - mbput(b); - return tot; -} - -static int -dumpf(Memblk*, daddrt *de, void *p) -{ - int isdisk; - Memblk *f; - - if(*de == 0) - return 0; - - isdisk = *(int*)p; - if(isdisk) - f = dbget(DBfile, *de); - else - f = mbget(DBfile, *de, Dontmk); - if(f != nil){ - if(catcherror()){ - mbput(f); - error(nil); - } - dfdump(f, isdisk); - noerror(); - mbput(f); - } - return 0; -} - -void -dfdump(Memblk *f, int isdisk) -{ - int i; - extern int mbtab; - - isfile(f); - /* visit the blocks to fetch them if needed, so %H prints them. */ - for(i = 0; i < nelem(f->d.dptr); i++) - ptrmap(f->d.dptr[i], 0, nil, nil, isdisk); - for(i = 0; i < nelem(f->d.iptr); i++) - ptrmap(f->d.iptr[i], i+1, nil, nil, isdisk); - fprint(2, "%H\n", f); - if(DBDIR(f) != 0){ - mbtab++; - if(!catcherror()){ - dfdirmap(f, dumpf, &isdisk, Rd); - noerror(); - } - mbtab--; - } -} - -static void -freezeaddr(daddrt addr) -{ - Memblk *f; - - if(addr == 0) - return; - f = mbget(DBfile, addr, Dontmk); - if(f == nil) /* must be frozen */ - return; - if(catcherror()){ - mbput(f); - error(nil); - } - dffreeze(f); - noerror(); - mbput(f); - -} - -static int -bfreeze(Memblk *b, void*) -{ - int i; - - if(b->frozen) - return -1; - b->frozen = 1; - if(b->type == DBdata && DBDIR(b)) - for(i = 0; i < Dblkdatasz/Daddrsz; i++) - freezeaddr(b->d.ptr[i]); - return 0; -} - -long -dffreeze(Memblk *f) -{ - int i; - long tot; - ulong doff; - - isfile(f); - if(f->frozen) - return 0; - rwlock(f, Wr); - if(catcherror()){ - rwunlock(f, Wr); - error(nil); } - f->frozen = 1; - tot = 1; - if(DBDIR(f)) - for(doff = embedattrsz(f); doff < Embedsz; doff += Daddrsz) - freezeaddr(*(daddrt*)(f->d.embed+doff)); - for(i = 0; i < nelem(f->d.dptr); i++) - tot += ptrmap(f->d.dptr[i], 0, bfreeze, nil, Mem); - for(i = 0; i < nelem(f->d.iptr); i++) - tot += ptrmap(f->d.iptr[i], i+1, bfreeze, nil, Mem); - noerror(); - rwunlock(f, Wr); return tot; } - - -/* - * Caller walked down p, and now requires the nth element to be - * melted, and wlocked for writing. (nth count starts at 1); - * - * Return the path with the version of f that we must use, - * locked for writing and melted. - * References kept in the path are traded for the ones returned. - * - * Calls from user requests wait until /archive is melted. - * Calls from fsfreeze(), fsreclaim(), etc. melt /archive. - */ -Path* -meltedpath(Path **pp, int nth, int user) -{ - int i; - Memblk *f, **fp; - Path *p; - - ownpath(pp); - p = *pp; - assert(nth >= 1 && p->nf >= nth && p->nf >= 2); - assert(p->f[0] == fs->root); - fp = &p->f[nth-1]; - - /* - * 1. Optimistic: Try to get a loaded melted version for f. - */ - dflast(fp, Wr); - f = *fp; - if(!f->frozen) - return p; - ainc(&fs->nmelts); - rwunlock(f, Wr); - - /* - * 2. Realistic: - * walk down the path, melting every frozen thing until we - * reach f. Keep wlocks so melted files are not frozen while we walk. - * /active is special, because it's only frozen temporarily while - * creating a frozen version of the tree. Instead of melting it, - * we should just wait for it if the call is from a user RPC. - * p[0] is / - * p[1] is /active or /archive - */ - if(!user){ - rwlock(p->f[0], Wr); - i = 1; - }else{ - for(;;){ - dflast(&p->f[1], Wr); - if(p->f[1]->frozen == 0) - break; - rwunlock(p->f[1], Wr); - yield(); - } - i = 2; - } - for(; i < nth; i++) - dfmelt(p->f[i-1], &p->f[i]); - return p; -} - -/* - * Advance path to use the most recent version of each file. - */ -Path* -lastpath(Path **pp, int nth) -{ - Memblk *f; - Path *p; - int i; - - p = *pp; - for(i = 0; i < nth; i++){ - f = p->f[i]; - if(f != nil && f->mf != nil && f->mf->melted != nil) - break; - } - if(i == nth) - return p; /* all files have the last version */ - - ownpath(pp); - p = *pp; - for(i = 0; i < nth; i++){ - dflast(&p->f[i], Rd); - rwunlock(p->f[i], Rd); - } - return p; -} - --- /sys/src/cmd/creepy/fid.c Wed Apr 25 11:11:02 2012 +++ /sys/src/cmd/creepy/fid.c Fri May 11 15:47:40 2012 @@ -1,17 +1,18 @@ #include "all.h" static RWLock fidhashlk; -static Fid *fidshd, *fidstl; static Fid *fidhash[Fidhashsz]; static uint fidgen; -int noauth; +static QLock clientslk; +static Cli *clients; Alloc fidalloc = { .elsz = sizeof(Fid), .zeroing = 1, }; + Alloc rpcalloc = { .elsz = sizeof(Largerpc), @@ -24,36 +25,6 @@ .zeroing = 1, }; -static QLock clientslk; -static Cli *clients; - -static void -fidlink(Fid *fid) -{ - fid->next = fidshd; - fid->prev = nil; - if(fidshd != nil) - fidshd->prev = fid; - else - fidstl = fid; - fidshd = fid; -} - -static void -fidunlink(Fid *fid) -{ - if(fid->prev != nil) - fid->prev->next = fid->next; - else - fidshd = fid->next; - if(fid->next != nil) - fid->next->prev = fid->prev; - else - fidstl = fid->prev; - fid->next = nil; - fid->prev = nil; -} - int fidfmt(Fmt *fmt) { @@ -64,8 +35,8 @@ fid = va_arg(fmt->args, Fid*); if(fid == nil) return fmtprint(fmt, ""); - fmtprint(fmt, "fid %#p no %d r%d, omode %d arch %d", - fid, fid->no, fid->ref, fid->omode, fid->archived); + fmtprint(fmt, "fid %#p no %d r%d, omode %d", + fid, fid->no, fid->ref, fid->omode); p = fid->p; if(p == nil) return 0; @@ -79,70 +50,15 @@ dumpfids(void) { Fid *fid; - int n; + int n, i; - xrwlock(&fidhashlk, Rd); + xrlock(&fidhashlk); fprint(2, "fids:\n"); n = 0; - for(fid = fidshd; fid != nil; fid = fid->next) - fprint(2, "[%d] = %X\n", n++, fid); - xrwunlock(&fidhashlk, Rd); -} - -/* - * Similar to lastpath(), but does not need to lock anything, - * because the fs is quiescent - */ -static int -meltpath(Path *p) -{ - int i, n; - Memblk *f; - - n = 0; - for(i = 0; i < p->nf; i++) - while((f = p->f[i]->mf->melted) != nil){ - n++; - incref(f); - mbput(p->f[i]); - p->f[i] = f; - } - return n; -} - -void -meltfids(void) -{ - Fid *fid; - int n; - - xrwlock(&fidhashlk, Rd); - n = 0; - for(fid = fidshd; fid != nil; fid = fid->next) - if(canqlock(fid)){ - if(!fid->archived && fid->p != nil) - n += meltpath(fid->p); - qunlock(fid); - }else - warn("meltfids: couldn't lock"); - xrwunlock(&fidhashlk, Rd); - dprint("meltfids: %d fids advanced\n", n); -} - -void -countfidrefs(void) -{ - Fid *fid; - Path *p; - int i; - - xrwlock(&fidhashlk, Rd); - for(fid = fidshd; fid != nil; fid = fid->next){ - p = fid->p; - for(i = 0; i < p->nf; i++) - mbcountref(p->f[i]); - } - xrwunlock(&fidhashlk, Rd); + for(i = 0; i < nelem(fidhash); i++) + for(fid = fidhash[i]; fid != nil; fid = fid->next) + fprint(2, "[%d] = %X\n", n++, fid); + xrunlock(&fidhashlk); } Rpc* @@ -180,9 +96,9 @@ { Fid *fid, **fidp; - xrwlock(&fidhashlk, Wr); + xwlock(&fidhashlk); if(catcherror()){ - xrwunlock(&fidhashlk, Wr); + xwunlock(&fidhashlk); error(nil); } if(no < 0) @@ -197,21 +113,20 @@ fid->no = no; fid->cli = cli; fid->ref = 2; /* one for the caller; another because it's kept */ - fidlink(fid); noerror(); - xrwunlock(&fidhashlk, Wr); + xwunlock(&fidhashlk); dEprint("new fid %X\n", fid); return fid; } Fid* -getfid(Cli* cli, int no) +getfid(Cli* cli, uint no) { Fid *fid; - xrwlock(&fidhashlk, Rd); + xrlock(&fidhashlk); if(catcherror()){ - xrwunlock(&fidhashlk, Rd); + xrunlock(&fidhashlk); error(nil); } for(fid = fidhash[no%Fidhashsz]; fid != nil; fid = fid->hnext) @@ -219,11 +134,11 @@ incref(fid); noerror(); dEprint("getfid %d -> %X\n", no, fid); - xrwunlock(&fidhashlk, Rd); + xrunlock(&fidhashlk); return fid; } error("fid not found"); - return fid; + return nil; } void @@ -236,18 +151,17 @@ d9print("clunk %X\n", fid); putpath(fid->p); fid->p = nil; - xrwlock(&fidhashlk, Wr); + xwlock(&fidhashlk); if(catcherror()){ - xrwunlock(&fidhashlk, Wr); + xwunlock(&fidhashlk); warn("putfid: %r"); error(nil); } for(fidp = &fidhash[fid->no%Fidhashsz]; *fidp != nil; fidp = &(*fidp)->hnext) if(*fidp == fid){ *fidp = fid->hnext; - fidunlink(fid); noerror(); - xrwunlock(&fidhashlk, Wr); + xwunlock(&fidhashlk); afree(&fidalloc, fid); return; } @@ -327,9 +241,6 @@ warn("unknown user '%s'. using 'none'", uname); fid->uid = usrid("none"); } - - if(fs->worm && strcmp(uname, getuser()) != 0) - error("user '%s' can't access rip main", uname); } void @@ -341,24 +252,28 @@ p = newpath(fs->root); fid->p = p; - if(fs->worm){ - if(strcmp(aname, "main") == 0){ - aname = ""; - }else if(strcmp(aname, "archive") != 0) - error("unknown tree '%s'", aname); - }else if(strcmp(aname, "main") == 0) - aname = ""; - else if(strncmp(aname, "main/", 5) == 0) - aname += 5; - - if(aname[0] != 0) - if(strcmp(aname, "active") == 0){ - addelem(&p, fs->active); - }else if(strcmp(aname, "archive") == 0){ - addelem(&p, fs->archive); - fid->archived = 1; - }else + if(aname[0] == 0 || strcmp(aname, "main/active") == 0) + aname = "root"; + else if(strcmp(aname, "main/archive") == 0){ + if(fs->mode != Worm) error("unknown tree '%s'", aname); + aname = "root"; + }else if(strcmp(aname, "wormwr") == 0){ + if(strcmp(uname, getuser()) != 0) + error("'%s' is not the worm owner", uname); + aname = "main"; + fid->wormwr = 1; + allowuid(fid->uid); + warn("worm write access for '%s'", uname); + } + + if(strcmp(aname, "root") == 0) + addelem(&p, fs->active); + else if(strcmp(aname, "main") != 0) + error("unknown tree '%s'", aname); + + /* TODO: could accept paths and walk to them */ + p->nroot = p->nf; } @@ -375,197 +290,14 @@ nfid = newfid(cli, no); nfid->p = clonepath(fid->p); nfid->uid = fid->uid; - nfid->archived = fid->archived; nfid->consopen = fid->consopen; nfid->buf = fid->buf; + nfid->wormwr = fid->wormwr; noerror(); xqunlock(fid); return nfid; } -static Memblk* -pickarch(Memblk *d, uvlong t) -{ - Blksl sl; - daddrt *de; - uvlong off; - int i; - uvlong cmtime; - daddrt cdaddr; - Memblk *f; - - off = 0; - cmtime = 0; - cdaddr = 0; - for(;;){ - sl = dfslice(d, Dblkdatasz, off, Rd); - if(sl.len == 0){ - assert(sl.b == nil); - break; - } - if(sl.b == nil) - continue; - if(catcherror()){ - mbput(sl.b); - error(nil); - } - for(i = 0; i < sl.len/Daddrsz; i++){ - de = sl.data; - de += i; - if(*de == 0) - continue; - f = dbget(DBfile, *de); - if(f->d.mtime > cmtime && f->d.mtime < t){ - cmtime = f->d.mtime; - cdaddr = *de; - } - mbput(f); - } - noerror(); - mbput(sl.b); - off += sl.len; - } - if(cdaddr == 0) - error("file not found"); - return dbget(DBfile, cdaddr); -} - -static int -digs(char **sp, int n) -{ - char b[8]; - char *s; - - s = *sp; - if(strlen(s) < n) - return 0; - assert(n < sizeof b - 1); - strecpy(b, b+n+1, *sp); - *sp += n; - return strtoul(b, nil, 10); -} - -/* - * convert symbolic time into a valid /archive wname. - * yyyymmddhhmm, yyyymmdd, mmdd, or hh:mm - */ -static Memblk* -archwalk(Memblk *f, char *wname) -{ - char *s; - Tm *tm; - uvlong t; - static QLock tmlk; - int wl; - - s = wname; - qlock(&tmlk); /* localtime is not reentrant! */ - tm = localtime(time(nil)); - wl = strlen(wname); - switch(wl){ - case 12: /* yyyymmddhhmm */ - case 8: /* yyyymmdd */ - tm->year = digs(&s, 4) - 1900; - case 4: /* mmdd */ - tm->mon = digs(&s, 2) - 1; - tm->mday = digs(&s, 2); - if(wl == 8) - break; - /* else fall */ - case 5: /* hh:mm */ - tm->hour = digs(&s, 2); - if(wl == 5 && s[0] != 0) - s++; - tm->min = digs(&s, 2); - break; - default: - qunlock(&tmlk); - error("file not found"); - } - dprint("archwalk to %d/%d/%d %d:%d:%d\n", - tm->year, tm->mday, tm->mon, tm->hour, tm->min, tm->sec); - t = tm2sec(tm); - t *= NSPERSEC; - qunlock(&tmlk); - return pickarch(f, t); -} - - -/* - * We are at /active/... or /archive/x/... - * Walk to /archive/T/... and return it so it's added to the - * path by the caller. - */ -static Memblk* -timewalk(Path *p, char *wname, int uid) -{ - Memblk *f, *nf, *pf, *arch, *af; - int i, isarch; - - if(p->nf < 2 || (p->f[1] == fs->archive && p->nf < 3)) - error("file not found"); - assert(p->f[0] == fs->root); - isarch = p->f[1] == fs->archive; - assert(p->f[1] == fs->active || isarch); - - arch = fs->archive; - rwlock(arch, Rd); - if(catcherror()){ - rwunlock(arch, Rd); - error(nil); - } - af = archwalk(arch, wname); - noerror(); - rwunlock(arch, Rd); - if(catcherror()){ - mbput(af); - error(nil); - } - i = 2; - if(isarch) - i++; - f = af; - incref(f); - nf = nil; - for(; i < p->nf; i++){ - pf = p->f[i]; - rwlock(f, Rd); - rwlock(pf, Rd); - if(catcherror()){ - rwunlock(pf, Rd); - rwunlock(f, Rd); - mbput(f); - error(nil); - } - dfaccessok(f, uid, AEXEC); - nf = dfwalk(f, pf->mf->name); - noerror(); - rwunlock(pf, Rd); - rwunlock(f, Rd); - mbput(f); - f = nf; - } - noerror(); - mbput(af); - USED(f); - USED(nf); - - if((f->d.mode&DMDIR) == 0){ /* it was not a dir at that time! */ - mbput(f); - error("file not found"); - } - return f; -} - -/* - * For walks into /archive, wname may be any time value (ns) or - * yyyymmddhhmm, yyyymmdd, mmdd, or hh:mm, - * and the walk proceeds to the archived file - * with the bigger mtime not greater than the specified time. - * If there's no such time, walk reports a file not found error. - * - * walks using @ lead to the corresponding dir in the archive. - */ void fidwalk(Fid *fid, char *wname) { @@ -577,47 +309,29 @@ xqunlock(fid); error(nil); } - p = lastpath(&fid->p, fid->p->nf); + p = fid->p; if(strcmp(wname, ".") == 0) goto done; if(strcmp(wname, "..") == 0){ if(p->nf > p->nroot) - p = dropelem(&fid->p); + dropelem(&fid->p); goto done; } f = p->f[p->nf-1]; - rwlock(f, Rd); + xrlock(f->mf); if(catcherror()){ - rwunlock(f, Rd); + xrunlock(f->mf); error(nil); } - dfaccessok(f, fid->uid, AEXEC); + fidaccessok(fid, f, AEXEC); - nf = nil; - if(catcherror()){ - if(f == fs->archive) - nf = archwalk(f, wname); - else if(wname[0] == '@' && f != fs->cons && f != fs->stats) - nf = timewalk(p, wname+1, fid->uid); - else - error(nil); - fid->archived = 1; /* BUG: clear archived on .. */ - }else{ - nf = dfwalk(f, wname); - noerror(); - } + nf = dfwalk(f, wname); - rwunlock(f, Rd); + xrunlock(f->mf); noerror(); - p = addelem(&fid->p, nf); - decref(nf); + addelem(&fid->p, nf); + mbput(nf); done: - f = p->f[p->nf-1]; - if(isro(f)) - fid->archived = f != fs->cons && f != fs->stats; - else if(f == fs->active) - fid->archived = 0; - dfused(p); noerror(); xqunlock(fid); } @@ -632,71 +346,112 @@ } void +fidcanwrite(Fid *fid) +{ + if(fs->mode == Rdonly && fid->p->f[fid->p->nf-1] != fs->cons) + error("read only file system"); + if(fs->mode == Worm && fid->wormwr == 0) + error("not allowed to write worm"); + if(writedenied(fid->uid)) + error("user can't write"); +} + + +/* + * Does not check if the user can't write because of the "write" + * user. + * Does check if the user is allowed in config mode. + */ +void +fidaccessok(Fid *fid, Memblk *f, int bits) +{ + uint mode; + int uid; + + uid = fid->uid; + if(fs->mode != Worm && allowed(uid)) + return; + if(fs->mode == Worm && fid->wormwr != 0 && allowed(uid)) + return; + bits &= 3; + + mode = f->d.mode &0777; + + if((mode&bits) == bits) + return; + mode >>= 3; + + if(member(f->d.gid, uid) && (mode&bits) == bits) + return; + mode >>= 3; + if(f->d.uid == uid && (mode&bits) == bits) + return; + + /* + * The process owner and elf can always access the console + * to configure the file system. + */ + if(f == fs->cons) + if(uid == usrid("elf") || uid == usrid(getuser())) + return; + + error("permission denied"); +} + +void fidopen(Fid *fid, int mode) { int fmode, amode; Memblk *f; Path *p; - if(fid->omode != -1) - error("fid already open"); - - /* check this before we try to melt it */ xqlock(fid); if(catcherror()){ xqunlock(fid); error(nil); } + if(fid->omode != -1) + error("fid already open"); p = fid->p; f = p->f[p->nf-1]; - if(mode != OREAD){ - if(f == fs->root || f == fs->archive || fid->archived) - error("can't write archived or built-in files"); - if(fs->mode == Rd) - error("read only file system"); - if(writedenied(fid->uid)) - error("user can't write"); - } + amode = 0; - if((mode&3) != OREAD || (mode&OTRUNC) != 0) + if((mode&3) != OREAD || (mode&OTRUNC) != 0 || (mode&ORCLOSE) != 0) amode |= AWRITE; if((mode&3) != OWRITE) amode |= AREAD; - if(amode != AREAD) - if(f == fs->cons) - rwlock(f, Wr); - else{ - p = meltedpath(&fid->p, fid->p->nf, 1); - f = p->f[p->nf-1]; - } - else{ - p = lastpath(&fid->p, fid->p->nf); - rwlock(f, Rd); - } + + if(amode != AREAD){ + fidcanwrite(fid); + f = prenew(fid->p, fid->p->nf); + }else + xrlock(f->mf); if(catcherror()){ - rwunlock(f, (amode!=AREAD)?Wr:Rd); + if(amode != AREAD) + xwunlock(f->mf); + else + xrunlock(f->mf); error(nil); } + fmode = f->d.mode; - if(mode != OREAD){ - if(f != fs->root && p->f[p->nf-2]->d.mode&DMAPPEND) + if(amode != AREAD){ + if(p->f[p->nf-2]->d.mode&DMAPPEND) error("directory is append only"); if((fmode&DMDIR) != 0) error("wrong open mode for a directory"); } - dfaccessok(f, fid->uid, amode); + fidaccessok(fid, f, amode); + if((fmode&DMEXCL) != 0 && f->mf->open) + if(f != fs->cons || amode != AWRITE) /* ok to write cons cmds */ + error("exclusive use file already open"); if(mode&ORCLOSE){ - if(fid->archived || isro(f)) - error("can't remove an archived or built-in file"); if(f->d.mode&DMUSERS) error("can't remove /users"); - dfaccessok(p->f[p->nf-2], fid->uid, AWRITE); + fidaccessok(fid, p->f[p->nf-2], AWRITE); fid->rclose++; } - if((fmode&DMEXCL) != 0 && f->mf->open) - if(f != fs->cons || amode != AWRITE) /* ok to write cons */ - error("exclusive use file already open"); - if((mode&OTRUNC) != 0 && f != fs->cons && f != fs->stats){ + if((mode&OTRUNC) != 0 && builtin(f) == 0){ wstatint(f, "length", 0); if(f->d.mode&DMUSERS){ f->d.mode = 0664|DMUSERS; @@ -704,22 +459,24 @@ f->d.gid = usrid("adm"); f->mf->uid = "adm"; f->mf->gid = "adm"; - changed(f); } } if(f == fs->stats) fid->buf = updatestats(mode&OTRUNC, 1); - f->mf->open++; + ainc(&f->mf->open); fid->omode = mode&3; fid->loff = 0; fid->lidx = 0; fid->consopen = f == fs->cons; noerror(); - rwunlock(f, (amode!=AREAD)?Wr:Rd); + if(amode != AREAD) + xwunlock(f->mf); + else + xrunlock(f->mf); if(mode&OTRUNC) - dfchanged(p, fid->uid); + pchanged(p, p->nf, fid->uid); else - dfused(p); + pused(p); noerror(); xqunlock(fid); } @@ -735,8 +492,6 @@ xqunlock(fid); error(nil); } - if(fsdiskfree() < Dzerofree) - error("disk full"); if(fid->omode != -1) error("fid already open"); if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) @@ -745,34 +500,28 @@ error("that file name is too creepy"); if((perm&DMDIR) != 0 && mode != OREAD) error("wrong open mode for a directory"); - if(fs->mode == Rd) - error("read only file system"); - if(writedenied(fid->uid)) - error("user can't write"); - if(fid->archived) - error("file is archived or builtin"); + fidcanwrite(fid); if((perm&DMBITS) != perm) error("unknown bit set in perm %M %#ulx", perm, perm); p = fid->p; f = p->f[p->nf-1]; - if(mode&ORCLOSE) - if(f->d.mode&DMUSERS) - error("can't remove the users file"); + if(f == fs->root) + error("file is read only"); + if((mode&ORCLOSE) != 0 && (f->d.mode&DMUSERS) != 0) + error("can't remove the users file"); if((f->d.mode&DMDIR) == 0) error("not a directory"); - p = meltedpath(&fid->p, fid->p->nf, 1); - f = p->f[p->nf-1]; + prenew(p, p->nf); if(catcherror()){ - rwunlock(f, Wr); + xwunlock(f->mf); error(nil); } - dfaccessok(f, fid->uid, AWRITE); + fidaccessok(fid, f, AWRITE); if(!catcherror()){ mbput(dfwalk(f, name)); error("file already exists"); } - nf = dfcreate(f, name, fid->uid, perm); p = addelem(&fid->p, nf); if(f == fs->active && strcmp(name, "users") == 0){ @@ -781,74 +530,81 @@ nf->d.gid = usrid("adm"); nf->mf->uid = "adm"; nf->mf->gid = "adm"; - changed(nf); } - decref(nf); - nf->mf->open++; + mbput(nf); + ainc(&nf->mf->open); noerror(); - rwunlock(f, Wr); + xwunlock(f->mf); fid->omode = mode&3; fid->loff = 0; fid->lidx = 0; if(mode&ORCLOSE) fid->rclose++; - dfchanged(p, fid->uid); + pchanged(p, p->nf, fid->uid); noerror(); xqunlock(fid); } -typedef struct Rarg -{ - Fid *fid; - int n; - uchar *data; - ulong ndata; - Packmeta pack; - ulong tot; - long ndents; -}Rarg; - -static int -readdirf(Memblk*, daddrt *de, void *a) -{ - Rarg *ra; - Memblk *f; - ulong nr; - - ra = a; - if(ra->tot+2 >= ra->ndata || ra->fid->lidx >= ra->ndents) - return -1; - if(*de == 0) - return 0; - if(ra->n > 0){ - ra->n--; - return 0; - } - f = dbget(DBfile, *de); - if(catcherror()){ - mbput(f); - error(nil); - } - nr = ra->pack(f, ra->data+ra->tot, ra->ndata-ra->tot); - noerror(); - mbput(f); - if(nr <= 2) - return -1; - ra->tot += nr; - ra->fid->lidx++; - return 0; -} - static ulong readdir(Fid *fid, uchar *data, ulong ndata, uvlong, Packmeta pack) { - Memblk *d; - Rarg ra = {fid, fid->lidx, data, ndata, pack, 0, 0}; + Memblk *d, *f; + Blksl sl; + vlong off, tot, nr; + daddrt *de; + int i, ndents, n; d = fid->p->f[fid->p->nf-1]; - ra.ndents = d->d.ndents; - dfdirmap(d, readdirf, &ra, Rd); - return ra.tot; + ndents = d->d.ndents; + if(fid->lidx >= ndents) + return 0; + off = 0; + n = 0; + tot = 0; + for(;;){ + sl = dfslice(d, Dblksz, off, Rd); + if(sl.len == 0) + break; + if(sl.b == nil) + continue; + if(catcherror()){ + mbput(sl.b); + error(nil); + } + de = sl.data; + for(i = 0; i < sl.len/Daddrsz; i++){ + if(de[i] == 0) + continue; + if(n++ < fid->lidx) + continue; + if(tot+2 > ndata || n > ndents) + goto done; + f = dbget(DBfile, de[i]); + xrlock(f->mf); + if(catcherror()){ + xrunlock(f->mf); + mbput(f); + error(nil); + } + nr = pack(f, data+tot, ndata-tot); + noerror(); + xrunlock(f->mf); + mbput(f); + if(nr <= 2) + goto done; + tot += nr; + fid->lidx++; + continue; + done: + noerror(); + mbput(sl.b); + return tot; + } + off += sl.len; + noerror(); + mbput(sl.b); + } + return tot; } static long @@ -868,6 +624,21 @@ return count; } +static void +openfor(Fid *fid, int iswr) +{ + + if(fid->omode == -1) + error("fid not open"); + if(iswr){ + fidcanwrite(fid); /* already checked; paranioa */ + if(fid->omode == OREAD) + error("fid not open for writing"); + }else + if(fid->omode == OWRITE) + error("fid not open for reading"); +} + long fidread(Fid *fid, void *data, ulong count, vlong offset, Packmeta pack) { @@ -879,13 +650,10 @@ xqunlock(fid); error(nil); } - if(fid->omode == -1) - error("fid not open"); - if(fid->omode == OWRITE) - error("fid not open for reading"); + openfor(fid, Rd); if(offset < 0) error("negative offset"); - p = lastpath(&fid->p, fid->p->nf); + p = fid->p; f = p->f[p->nf-1]; if(f == fs->cons){ noerror(); @@ -899,23 +667,22 @@ xqunlock(fid); return count; } - rwlock(f, Rd); + xrlock(f->mf); noerror(); xqunlock(fid); if(catcherror()){ - rwunlock(f, Rd); + xrunlock(f->mf); error(nil); } if(f->d.mode&DMDIR){ if(fid->loff != offset) - error("non-sequential dir read not supported"); + error("non-sequential dir read"); count = readdir(fid, data, count, offset, pack); fid->loff += count; }else count = dfpread(f, data, count, offset); noerror(); - rwunlock(f, Rd); - dfused(p); + xrunlock(f->mf); return count; } @@ -930,21 +697,14 @@ xqunlock(fid); error(nil); } - if(fs->mode == Rd) - error("read only file system"); - if(writedenied(fid->uid)) - error("user can't write"); - if(fsdiskfree() < Dzerofree) - error("disk full"); - if(fid->omode == -1) - error("fid not open"); - if(fid->omode == OREAD) - error("fid not open for writing"); + openfor(fid, Wr); p = fid->p; f = p->f[p->nf-1]; if(f == fs->cons){ xqunlock(fid); noerror(); + if(fid->omode == OWRITE) + return consctl(data, count); return conswrite(data, count); } if(f == fs->stats){ @@ -953,17 +713,16 @@ return count; } - p = meltedpath(&fid->p, fid->p->nf, 1); - f = p->f[p->nf-1]; + f = prenew(fid->p, fid->p->nf); if(catcherror()){ - rwunlock(f, Wr); + xwunlock(f->mf); error(nil); } count = dfpwrite(f, data, count, offset); noerror(); - rwunlock(f, Wr); + xwunlock(f->mf); - dfchanged(p, fid->uid); + pchanged(p, p->nf, fid->uid); noerror(); xqunlock(fid); @@ -981,29 +740,30 @@ xqunlock(fid); error(nil); } + if(fid->omode == -1) + error("fid not open"); p = fid->p; f = p->f[p->nf-1]; - rwlock(f, Wr); - f->mf->open--; - if((f->d.mode&DMUSERS) && (fid->omode&3) != OREAD) + adec(&f->mf->open); + if((f->d.mode&DMUSERS) && (fid->omode&3) != OREAD){ + xwlock(f->mf); rwusers(f); - rwunlock(f, Wr); + xwunlock(f->mf); + } fid->omode = -1; if(fid->rclose){ - lastpath(&fid->p, fid->p->nf); - p = meltedpath(&fid->p, fid->p->nf-1, 1); - fp = p->f[p->nf-2]; - rwlock(f, Wr); + fp = prenew(fid->p, fid->p->nf-1); + xwlock(f->mf); if(catcherror()){ - rwunlock(f, Wr); + xwunlock(f->mf); mbput(f); }else{ dfremove(fp, f); fid->p->nf--; noerror(); } - rwunlock(fp, Wr); - dfchanged(p, fid->uid); + xwunlock(fp->mf); + pchanged(p, p->nf, fid->uid); } putpath(fid->p); fid->p = nil; @@ -1023,35 +783,33 @@ xqunlock(fid); error(nil); } - if(fs->mode == Rd) - error("read only file system"); - if(writedenied(fid->uid)) - error("user can't write"); + fidcanwrite(fid); p = fid->p; f = p->f[p->nf-1]; - if(fid->archived || isro(f)) - error("can't remove archived or built-in files"); - lastpath(&fid->p, fid->p->nf); - p = meltedpath(&fid->p, fid->p->nf-1, 1); - fp = p->f[p->nf-2]; - f = p->f[p->nf-1]; - rwlock(f, Wr); + if(builtin(f)) + error("can't remove built-in files"); + if(f->d.mode&DMUSERS) + error("can't remove the users file"); + fp = prenew(fid->p, fid->p->nf-1); + if(fp->d.mode&DMAPPEND){ + xwunlock(fp->mf); + error("directory is append only"); + } + xwlock(f->mf); if(catcherror()){ - rwunlock(f, Wr); - rwunlock(fp, Wr); + xwunlock(f->mf); + xwunlock(fp->mf); error(nil); } - if(fp->d.mode&DMAPPEND) - error("directory is append only"); - if(f->d.mode&DMUSERS) - error("can't remove the users file"); - dfaccessok(fp, fid->uid, AWRITE); + fidaccessok(fid, fp, AWRITE); + if(fid->omode != -1) + adec(&f->mf->open); fid->omode = -1; dfremove(fp, f); fid->p->nf--; noerror(); - rwunlock(fp, Wr); - dfchanged(fid->p, fid->uid); + xwunlock(fp->mf); + pchanged(fid->p, fid->p->nf, fid->uid); putpath(fid->p); fid->p = nil; noerror(); @@ -1079,24 +837,22 @@ } /* - * Read ahead policy: to be called after replying to an ok. read RPC. - * - * We try to keep at least Nahead more bytes in the file if it seems - * that's ok. + * Read ahead policy: to be called after replying to a read request. + * We try to keep at least Nahead more bytes in the file. */ void rahead(Memblk *f, uvlong offset) { Mfile *m; - rwlock(f, Rd); + xrlock(f->mf); m = f->mf; if(m->sequential == 0 || m->raoffset > offset + Nahead){ - rwunlock(f, Rd); + xrunlock(f->mf); return; } if(catcherror()){ - rwunlock(f, Rd); + xrunlock(f->mf); warn("rahead: %r"); return; } @@ -1106,7 +862,7 @@ if(dfpread(f, nil, Maxmdata, offset) != Maxmdata) break; noerror(); - rwunlock(f, Rd); + xrunlock(f->mf); } static void @@ -1151,7 +907,7 @@ } void -srv9pix(char *srv, char* (*cliworker)(void *arg, void **aux)) +srv9pix(char *srv, int noauth, char* (*cliworker)(void *arg, void **aux)) { Cli *cli; int fd[2]; @@ -1161,9 +917,10 @@ if(pipe(fd) < 0) fatal("pipe: %r"); postfd(name, fd[0]); - warn("listen %s", srv); + warn("listen %s", name); consprint("listen %s\n", srv); cli = newcli(name, fd[1], -1); + cli->noauth = noauth; getworker(cliworker, cli, nil); } @@ -1172,7 +929,7 @@ { Cli *cli; Channel *c; - int data, nctl; + int data, nctl, noauth; char *dir, ndir[NETPATHLEN], *addr; char* (*cliworker)(void *arg, void **aux); @@ -1180,6 +937,7 @@ addr = recvp(c); dir = recvp(c); cliworker = recvp(c); + noauth = recvp(c) != nil; chanfree(c); threadsetname("listenproc %s", addr); for(;;){ @@ -1192,12 +950,13 @@ continue; } cli = newcli(getremotesys(ndir), data, nctl); + cli->noauth = noauth; getworker(cliworker, cli, nil); } } void -listen9pix(char *addr, char* (*cliworker)(void *arg, void **aux)) +listen9pix(char *addr, int noauth, char* (*cliworker)(void *arg, void **aux)) { char *dir; Channel *c; @@ -1214,4 +973,8 @@ sendp(c, addr); sendp(c, dir); sendp(c, cliworker); + if(noauth) + sendp(c, "noauth"); + else + sendp(c, nil); } --- /sys/src/cmd/creepy/fmt.c Thu Apr 26 16:52:03 2012 +++ /sys/src/cmd/creepy/fmt.c Wed May 9 16:17:51 2012 @@ -25,11 +25,6 @@ } void -meltfids(void) -{ -} - -void rwusers(Memblk*) { } @@ -46,29 +41,81 @@ return s; } -void -countfidrefs(void) +enum { + Fossiloff = 128*1024, + Fossilmagic = 0xffae7637, +}; + +static int +confirm(char *msg) +{ + char buf[100]; + int n; + + fprint(2, "%s [y/n]: ", msg); + n = read(0, buf, sizeof buf - 1); + if(n <= 0) + return 0; + if(buf[0] == 'y') + return 1; + return 0; +} + +static void +dontoverwrite(char *dev) +{ + Dsuperdata d; + static char buf[BIT64SZ]; + int fd; + + fd = open(dev, ORDWR); + if(fd < 0) + fatal("%s: %r", dev); + werrstr("short disk"); + if(pread(fd, &d, sizeof d, Dsuperaddr) != sizeof d) + fatal("%s: read: %r", dev); + if(d.magic == MAGIC) + if(!confirm("disk has a creepy fs: continue?")){ + warn("aborting"); + threadexitsall("no"); + }else{ + close(fd); + return; + } + if(pread(fd, buf, sizeof buf, Fossiloff) != sizeof buf){ + close(fd); + return; + } + if(GBIT32(buf) == Fossilmagic) + if(!confirm("disk has a fossil fs: continue?")){ + warn("aborting"); + threadexitsall("no"); + }else{ + memset(buf, 0, sizeof buf); + pwrite(fd, buf, sizeof buf, Fossiloff); + } + close(fd); } static void usage(void) { - fprint(2, "usage: %s [-DFLAGS] [-vy] disk\n", argv0); - exits("usage"); + fprint(2, "usage: %s [-DFLAGS] [-wy] disk\n", argv0); + threadexits("usage"); } void threadmain(int argc, char *argv[]) { char *dev; - int verb, force; + int force, isworm; dev = nil; - verb = force = 0; + force = isworm = 0; ARGBEGIN{ - case 'v': - verb = 1; + case 'w': + isworm = 1; break; case 'y': force = 1; @@ -85,18 +132,22 @@ dev = argv[0]; else usage(); + threadsetname("fmt"); + fmtinstall('D', daddrfmt); fmtinstall('P', pathfmt); fmtinstall('H', mbfmt); fmtinstall('M', dirmodefmt); errinit(Errstack); if(catcherror()) fatal("error: %r"); - fsfmt(dev, force); - if(verb) - fsdump(0, Mem); - else - print("%lld %ldK blocks\n", fs->ndblk, Dblksz/1024); + if(!force) + dontoverwrite(dev); + fsfmt(dev, isworm); + warn("%lld %ldK %s blocks\n", fs->ndblk, Dblksz/1024, mname(fs->mode)); + if(dbg['d']){ + fsdump(1, Disk); + fscheck(); + } noerror(); - exits(nil); + threadexitsall(nil); } - --- /sys/src/cmd/creepy/fns.h Thu Apr 26 19:07:04 2012 +++ /sys/src/cmd/creepy/fns.h Fri May 11 15:47:51 2012 @@ -1,55 +1,45 @@ extern Path* addelem(Path **pp, Memblk *f); -extern daddrt addrofref(daddrt refaddr, int idx); +extern daddrt address(Memblk *b); +extern void addressdentries(void *p, int n); extern void afree(Alloc *a, void *nd); extern int allowed(int uid); extern int allowed(int); extern int allowed(int); +extern void allowuid(int uid); extern void* anew(Alloc *a); -extern void changed(Memblk *b); +extern ulong asize(Alloc *a); +extern int builtin(Memblk *f); extern void checkblk(Memblk *b); -extern void checktag(u64int tag, uint type, daddrt addr); +extern u64int chkusr(char *buf); extern char* cliworker9p(void *v, void**aux); extern Path* clonepath(Path *p); +extern long consctl(char *p, long count); extern void consinit(void); extern void consprint(char *fmt, ...); extern void consprintclients(void); extern long consread(char *buf, long count); extern long conswrite(char *ubuf, long count); -extern void countfidrefs(void); -extern void countfidrefs(void); -extern void countfidrefs(void); -extern Memblk* dballocz(uint type, int dbit, int zeroit); -extern void dbclear(u64int tag, daddrt addr); -extern void dbcopy(daddrt dst, daddrt src); -extern daddrt dbcounted(daddrt addr); -extern u64int dbcountref(daddrt addr); -extern Memblk* dbdup(Memblk *b); -extern Memblk* dbget(uint type, daddrt addr); -extern daddrt dbgetref(daddrt addr); -extern daddrt dbincref(daddrt addr); -extern long dbput(Memblk *b); -extern long dbread(Memblk *b); -extern void dbsetref(daddrt addr, int ref); +extern int daddrfmt(Fmt *fmt); +extern daddrt dbaddress(int type, daddrt addr); +extern Memblk* dbget(int type, daddrt addr); +extern Memblk* dbgetlocked(int type, daddrt addr); +extern u64int dbgettag(daddrt addr); +extern void dbmark(int type, daddrt addr, u64int e); +extern u64int dbsettag(daddrt addr, u64int e); extern long dbwrite(Memblk *b); extern void debug(void); -extern void dfaccessok(Memblk *f, int uid, int bits); -extern ulong dfbno(Memblk *f, uvlong off, ulong *boffp); +extern daddrt dfaddress(daddrt addr); extern void dfcattr(Memblk *f, int op, char *name, char *val); -extern void dfchanged(Path *p, int muid); -extern uvlong dfchdentry(Memblk *d, daddrt addr, daddrt naddr); +extern void dfchdentry(Memblk *d, daddrt addr, daddrt naddr); extern Memblk* dfcreate(Memblk *parent, char *name, int uid, ulong mode); -extern uvlong dfdirmap(Memblk *d, Dirf dirf, void *arg, int iswr); -extern void dfdropblks(Memblk *f, ulong bno, ulong bend); extern void dfdump(Memblk *f, int isdisk); -extern long dffreeze(Memblk *f); -extern void dflast(Memblk **fp, int iswr); -extern Memblk* dfmelt(Memblk *parent, Memblk **fp); +extern void dfmark(daddrt addr, u64int e); extern ulong dfpread(Memblk *f, void *a, ulong count, uvlong off); extern ulong dfpwrite(Memblk *f, void *a, ulong count, uvlong *off); extern long dfrattr(Memblk *f, char *name, char *val, long count); extern void dfremove(Memblk *p, Memblk *f); extern Blksl dfslice(Memblk *f, ulong len, uvlong off, int iswr); -extern void dfused(Path *p); +extern void dftruncate(Memblk *f); extern Memblk* dfwalk(Memblk *d, char *name); extern long dfwattr(Memblk *f, char *name, char *val); extern Path* dropelem(Path **pp); @@ -57,7 +47,9 @@ extern void dumplockstats(void); extern ulong embedattrsz(Memblk *f); extern void fatal(char *fmt, ...); +extern void fidaccessok(Fid *fid, Memblk *f, int bits); extern void fidattach(Fid *fid, char *aname, char *uname); +extern void fidcanwrite(Fid *fid); extern Fid* fidclone(Cli *cli, Fid *fid, int no); extern void fidclose(Fid *fid); extern void fidcreate(Fid *fid, char *name, int mode, ulong perm); @@ -68,25 +60,16 @@ extern void fidwalk(Fid *fid, char *wname); extern long fidwrite(Fid *fid, void *data, ulong count, uvlong *offset); extern void freerpc(Rpc *rpc); -extern int fscheck(void); -extern uvlong fsdiskfree(void); +extern void fsarchive(void); +extern long fscheck(void); extern void fsdump(int full, int disktoo); -extern void fsfmt(char *dev, int force); -extern int fsfull(void); -extern int fslru(void); -extern uvlong fsmemfree(void); -extern void fsopen(char *dev, int worm, int canwr); -extern void fspolicy(int when); -extern int fsreclaim(void); +extern void fsfmt(char *dev, int isworm); +extern void fsopen(char *dev, int mode, usize fsysmem); +extern void fspolicy(void); extern void fssync(void); -extern void fssyncproc(void*); extern uvlong fstime(uvlong t); -extern Fid* getfid(Cli* cli, int no); +extern Fid* getfid(Cli* cli, uint no); extern void gmeta(Memblk *f, void *buf, ulong nbuf); -extern void isfile(Memblk *f); -extern void ismelted(Memblk *b); -extern int isro(Memblk *f); -extern void isrwlocked(Memblk *f, int iswr); extern int ixcallfmt(Fmt *fmt); extern uint ixpack(IXcall *f, uchar *ap, uint nap); extern uint ixpackedsize(IXcall *f); @@ -94,59 +77,65 @@ extern char* ixstats(char *s, char*, int, int); extern char* ixstats(char *s, char*, int, int); extern uint ixunpack(uchar *ap, uint nap, IXcall *f); -extern Path* lastpath(Path **pp, int nth); extern int leader(int gid, int lead); -extern void listen9pix(char *addr, char* (*cliworker)(void *arg, void **aux)); +extern void listen9pix(char *addr, int noauth, char* (*cliworker)(void *arg, void **aux)); extern void lockstats(int on); -extern Memblk* mballocz(daddrt addr, int zeroit); -extern daddrt mbcountref(Memblk *b); +extern void markdentries(void *p, int n, u64int e); +extern Memblk* mballocz(int zeroit); extern int mbfmt(Fmt *fmt); -extern Memblk* mbget(int type, daddrt addr, int mkit); -extern Memblk* mbhash(Memblk *b); -extern int mbhashed(daddrt addr); +extern Memblk* mbget(daddrt addr); +extern Memblk* mbgetlocked(daddrt addr); +extern void mbhash(Memblk *b); +extern int mbhashed(Memblk *b); +extern Memblk* mbload(daddrt addr, int locked); +extern int mblru(Memblk *b); +extern Memblk* mbnew(int type); extern void mbput(Memblk *b); -extern int mbunhash(Memblk *b, int ispolicy); -extern void mbunused(Memblk *b); -extern Path* meltedpath(Path **pp, int nth, int user); -extern void meltedref(Memblk *rb); -extern void meltfids(void); -extern void meltfids(void); -extern void meltfids(void); +extern daddrt mbrenew(Memblk *b); +extern daddrt mbrenewlocked(Memblk *b); +extern void mbset(Memblk *b, int s); +extern int mbunhash(Memblk *b, int onlyidle); extern int member(int uid, int member); extern int member(int uid, int member); extern int member(int uid, int member); -extern List mfilter(List *bl, int(*f)(Memblk*)); +extern void mlink(List *l, Memblk *b); extern void mlistdump(char *tag, List *l); -extern void munlink(List *l, Memblk *b, int ispolicy); +extern char* mname(int m); +extern Memblk* munlink(List *l, Memblk *b); +extern void mused(List *l, Memblk *b); +extern int namefmt(Fmt *fmt); +extern daddrt newdaddr(void); extern Fid* newfid(Cli* cli, int no); extern Path* newpath(Memblk *root); extern Rpc* newrpc(void); +extern long nextime(long t, int hr); extern char* ninestats(char *s, char *e, int clr, int verb); extern char* ninestats(char *s, char*, int, int); extern char* ninestats(char *s, char*, int, int); extern void nodebug(void); +extern void okdiskaddr(daddrt addr); +extern vlong opend(Opstat *o, vlong t0); +extern vlong opstart(Opstat *o, vlong t); extern void ownpath(Path **pp); extern int pathfmt(Fmt *fmt); +extern void pchanged(Path *p, int n, int muid); extern ulong pmeta(void *buf, ulong nbuf, Memblk *f); -extern int ptrmap(daddrt addr, int nind, Blkf f, void *a, int isdisk); +extern Memblk* prenew(Path *p, int nth); +extern void pused(Path *p); extern void putcli(Cli *cli); extern void putfid(Fid *fid); extern void putpath(Path *p); -extern void quiescent(int y); extern void rahead(Memblk *f, uvlong offset); -extern daddrt refaddr(daddrt addr, int *idx); extern void replied(Rpc *rpc); extern void rlsedebug(int r); extern int rpcfmt(Fmt *fmt); -extern void rwlock(Memblk *f, int iswr); -extern void rwunlock(Memblk *f, int iswr); extern void rwusers(Memblk *uf); extern void rwusers(Memblk*); extern void rwusers(Memblk*); extern int setdebug(void); extern void setfiduid(Fid *fid, char *uname); -extern void srv9pix(char *srv, char* (*cliworker)(void *arg, void **aux)); -extern void timeproc(void*); +extern char* sname(int s); +extern void srv9pix(char *srv, int noauth, char* (*cliworker)(void *arg, void **aux)); extern char* tname(int t); extern char* updatestats(int clr, int verb); extern int usrfmt(Fmt *fmt); @@ -156,16 +145,18 @@ extern char* usrname(int uid); extern char* usrname(int); extern char* usrname(int); -extern Path* walkpath(Memblk *f, char *elems[], int nelems); extern Path* walkto(char *a, char **lastp); extern void warn(char *fmt, ...); extern void warnerror(char *fmt, ...); extern int writedenied(int uid); -extern void written(Memblk *b); extern void wstatint(Memblk *f, char *name, u64int v); extern int xcanqlock(QLock *q); +extern Memblk* xmbget(daddrt addr, int locked, int load); +extern void xmlinkhd(List *l, Memblk *b); +extern void xmunlink(List *l, Memblk *b); extern void xqlock(QLock *q); -extern void xqunlock(QLock *q); -extern void xrwlock(RWLock *rw, int iswr); -extern void xrwunlock(RWLock *rw, int iswr); +extern void xrlock(RWLock *rw); +extern void xrunlock(RWLock *rw); +extern void xwlock(RWLock *rw); +extern void xwunlock(RWLock *rw); extern long wname(Memblk *f, char *val); --- /sys/src/cmd/creepy/fsys.c Thu Apr 26 19:07:04 2012 +++ /sys/src/cmd/creepy/fsys.c Fri May 11 16:19:35 2012 @@ -1,183 +1,137 @@ #include "all.h" /* - * All the code assumes outofmemoryexits = 1. + * file system */ -enum -{ - Lru = 0, - Freeze, - Write, - Nfsops, -}; - Fsys *fs; -uvlong maxfsz; -vlong fsoptime[Nfsops]; -ulong nfsopcalls[Nfsops]; -static char* fsopname[] = +/* + * Caller wants to write the file represented by path[0:nth]. + * Make sure it's MBmem and wlocked for writing. + * We know renewed files won't be frozen again because the system is + * not quiescent. + */ +Memblk* +prenew(Path *p, int nth) { -[Lru] "lru", -[Freeze] "freeze", -[Write] "write", -}; + int i; + Memblk *f; + daddrt oaddr, naddr; -char statstext[Statsbufsz], *statsp; + assert(nth >= 1 && p->nf >= nth && p->nf >= 2); + assert(p->f[0] == fs->root); + f = p->f[nth-1]; + + xwlock(f->mf); + if(f->state == MBmem) + return f; + xwunlock(f->mf); + + ainc(&fs->nrenewpath); + for(i = 0; i < nth; i++){ + if(p->f[i]->state == MBmem) + continue; + assert(i > 0); + xwlock(p->f[i-1]->mf); + xwlock(p->f[i]->mf); + oaddr = p->f[i]->addr; + naddr = mbrenew(p->f[i]); + if(naddr != 0) + dfchdentry(p->f[i-1], oaddr, naddr); + xwunlock(p->f[i-1]->mf); + xwunlock(p->f[i]->mf); + } + xwlock(f->mf); + return f; +} +/* + * Report that a file has been modified. + * Modification times propagate up to the root of the file tree. + */ void -quiescent(int y) +pchanged(Path *p, int n, int muid) { - if(y == No) - xrwlock(&fs->quiescence, Rd); - else - xrwunlock(&fs->quiescence, Rd); + Memblk *f; + u64int t; + int i; + + t = fstime(0); + for(i = 0; i < p->nf && i < n; i++){ + f = p->f[i]; + if(f->state == MBmem){ + xwlock(f->mf); + f->d.mtime = t; + f->d.atime = t; + f->d.muid = muid; + xwunlock(f->mf); + } + } } -uvlong -fsdiskfree(void) +void +pused(Path *p) { - uvlong nfree; + Memblk *f; - xqlock(fs); - nfree = fs->super->d.ndfree; - nfree += (fs->limit - fs->super->d.eaddr)/Dblksz; - xqunlock(fs); - return nfree; + f = p->f[p->nf-1]; + if(f->state == MBmem){ + xwlock(f->mf); + f->d.atime = fstime(0); + xwunlock(f->mf); + } } -static char* -fsstats(char *s, char *e, int clr, int verb) +static Path* +walkpath(Memblk *f, char *elems[], int nelems) { int i; + Memblk *nf; + Path *p; - s = seprint(s, e, "mblks:\t%4ulld nblk %4ulld nablk %4ulld mused %4ulld mfree\n", - fs->nblk, fs->nablk, fs->nmused, fs->nmfree); - s = seprint(s, e, "lists:\t%4uld clean %#4uld dirty %#4uld refs %4uld total\n", - fs->clean.n, fs->dirty.n, fs->refs.n, - fs->clean.n + fs->dirty.n + fs->refs.n); - s = seprint(s, e, "dblks:\t %4ulld nblk %4ulld nfree (%ulld list + %ulld rem)\n", - fs->limit/Dblksz - 1, fsdiskfree(), fs->super->d.ndfree, - (fs->limit - fs->super->d.eaddr)/Dblksz); - s = seprint(s, e, "paths:\t%4uld alloc %4uld free (%4uld bytes)\n", - pathalloc.nalloc, pathalloc.nfree, pathalloc.elsz); - s = seprint(s, e, "mfs:\t%4uld alloc %4uld free (%4uld bytes)\n", - mfalloc.nalloc, mfalloc.nfree, mfalloc.elsz); - - if(verb == 0) - return s; - s = seprint(s, e, "nmelts:\t%d\n", fs->nmelts); - s = seprint(s, e, "nindirs:\t"); - for(i = 0; i < nelem(fs->nindirs); i++){ - s = seprint(s, e, "%d ", fs->nindirs[i]); - if(clr) - fs->nindirs[i] = 0; + p = newpath(f); + if(catcherror()){ + putpath(p); + error(nil); } - s = seprint(s, e, "\n"); - s = seprint(s, e, "\n"); - s = seprint(s, e, "Fsysmem:\t%uld\n", Fsysmem); - s = seprint(s, e, "Mzerofree:\t%d\tMminfree:\t%d\tMmaxfree:\t%d\n", - Mzerofree, Mminfree, Mmaxfree); - s = seprint(s, e, "Dzerofree:\t%d\tDminfree:\t%d\tDmaxfree:\t%d\n", - Dzerofree, Dminfree, Dmaxfree); - s = seprint(s, e, "Mmaxdirtypcent:\t%d\n", Mmaxdirtypcent); - s = seprint(s, e, "Dblksz: \t%uld\n", Dblksz); - s = seprint(s, e, "Mblksz: \t%ud\n", sizeof(Memblk)); - s = seprint(s, e, "Dminattrsz:\t%uld\n", Dminattrsz); - s = seprint(s, e, "Nblkgrpsz:\t%uld\n", Nblkgrpsz); - s = seprint(s, e, "Dblkdatasz:\t%d\n", Dblkdatasz); - s = seprint(s, e, "Embedsz:\t%d\n", Embedsz); - s = seprint(s, e, "Dentryperblk:\t%d\n", Dblkdatasz/Daddrsz); - s = seprint(s, e, "Dptrperblk:\t%d\n\n", Dptrperblk); - - for(i = 0; i < nelem(nfsopcalls); i++){ - if(nfsopcalls[i] == 0) - s = seprint(s, e, "%s:\t0 calls\t0 µs\n", fsopname[i]); - else - s = seprint(s, e, "%s:\t%uld calls\t%ulld µs\n", fsopname[i], - nfsopcalls[i], (fsoptime[i]/nfsopcalls[i])/1000); - if(clr){ - nfsopcalls[i] = 0; - fsoptime[i] = 0; + for(i = 0; i < nelems; i++){ + if((f->d.mode&DMDIR) == 0) + error("not a directory"); + xrlock(f->mf); + if(catcherror()){ + xrunlock(f->mf); + error("walk: %r"); } + nf = dfwalk(f, elems[i]); + noerror(); + xrunlock(f->mf); + addelem(&p, nf); + mbput(nf); + f = nf; } - return s; + noerror(); + return p; } -char* -updatestats(int clr, int verb) +Path* +walkto(char *a, char **lastp) { - static QLock statslk; + char *els[Npathels]; + int nels, n; - if(clr) - warn("clearing stats"); - xqlock(&statslk); - statsp = statstext; - *statsp = 0; - statsp = fsstats(statsp, statstext+sizeof statstext, clr, verb); - statsp = ninestats(statsp, statstext+sizeof statstext, clr, verb); - statsp = ixstats(statsp, statstext+sizeof statstext, clr, verb); - xqunlock(&statslk); - return statstext; -} - -int -isro(Memblk *f) -{ - return f == fs->archive || f == fs->root || f == fs->cons || f == fs->stats; + n = strlen(a); + nels = gettokens(a, els, nelem(els), "/"); + if(nels < 1) + error("invalid path"); + if(lastp != nil){ + *lastp = a + n - strlen(els[nels-1]); + return walkpath(fs->root, els, nels-1); + }else + return walkpath(fs->root, els, nels); } -/* - * NO LOCKS. debug only - * - */ -void -fsdump(int full, int disktoo) -{ - int i, n, x; - Memblk *b; - daddrt a; - extern int fullfiledumps; - - x = fullfiledumps; - fullfiledumps = full; - nodebug(); - if(fs != nil){ - fprint(2, "\n\nfsys '%s' limit %#ullx super m%#p root m%#p:\n", - fs->dev, fs->limit, fs->super, fs->root); - fprint(2, "%H\n", fs->super); - dfdump(fs->root, disktoo); - mlistdump("refs", &fs->refs); - if(1){ - n = 0; - fprint(2, "hash:"); - for(i = 0; i < nelem(fs->fhash); i++) - for(b = fs->fhash[i].b; b != nil; b = b->next){ - if(n++ % 5 == 0) - fprint(2, "\n\t"); - fprint(2, "d%#010ullx ", EP(b->addr)); - } - fprint(2, "\n"); - } - } - if(fs->super->d.free != 0){ - fprint(2, "free:"); - i = 0; - for(a = fs->super->d.free; a != 0; a = dbgetref(a)){ - if(i++ % 5 == 0) - fprint(2, "\n\t"); - fprint(2, "d%#010ullx ", EP(a)); - } - fprint(2, "\n"); - } - mlistdump("mru", &fs->clean); - mlistdump("dirty", &fs->dirty); - fprint(2, "%s\n", updatestats(0, 1)); - fullfiledumps = x; - debug(); -} static daddrt disksize(int fd) @@ -193,1042 +147,730 @@ return sz; } -/* - * To preserve coherency, blocks written are always frozen. - * DBref blocks with RCs and the free block list require some care: - * - * On disk, the super block indicates that even (odd) DBref blocks are active. - * On memory, the super selects even (odd) refs (we read refs from there.) - * To sync... - * 1. we make a frozen super to indicate that odd (even) DBrefs are active. - * (the memory super is even (odd) and we fetch unloaded DBrefs from - * even (odd) blocks). - * 2. we write odd (even) DBref blocks. - * - * 3. the frozen super is written, indicating that odd (even) refs are in use. - * (The disk is coherent now, pretending to use odd (even) refs). - * 4. The memory super is udpated to select odd (even) DBref blocks. - * (from now on, we are fetch refs from odd (even) blocks. - * - * While we are writing odd (even) refs, we flag in the on-disk superblock that - * a new set of references is being written. The flag is cleared when the - * new frozen super block is written. - * At start time, if the flag is set, we must copy all odd (even) ref blocks - * from even (odd) ref blocks, so they are coherent and we can continue - * switching between even/odd blocks. - */ +uvlong +fstime(uvlong t) +{ + static Lock lk; + static uvlong last; -static void -freezesuperrefs(void) + lock(&lk); + if(t) + fs->atime = t; + t = fs->atime; + if(t == last) + fs->atime = ++t; + last = t; + unlock(&lk); + return t; +} + +int +builtin(Memblk *f) { - Memblk *b, *rb; + return f == fs->root || f == fs->cons || f == fs->stats; +} - b = mballocz(fs->super->addr, 0); - xqlock(fs); - b->type = fs->super->type; - b->d = fs->super->d; - b->d.oddrefs = !fs->super->d.oddrefs; - assert(fs->fzsuper == nil); - fs->fzsuper = b; - b->frozen = 1; - b->dirty = 1; /* so it's written */ - xqlock(&fs->refs); - for(rb = fs->refs.hd; rb != nil; rb = rb->lnext) - rb->frozen = 1; - xqunlock(&fs->refs); - xqunlock(fs); +void +fspolicy(void) +{ + long n; + + n = fs->mballoc.nfree; + if(n < Mzerofree) + sendul(fs->lruc, 0); + else if(n < Mminfree) + nbsendul(fs->lruc, 0); + n = fs->ndfree; + if(n < Dzerofree) + sendul(fs->sweepc, 0); + else if(n < Dminfree) + nbsendul(fs->sweepc, 0); } -static Memblk* -readsuper(void) +static void +fswritesuper(Channel *c) { + Memblk *b; Memblk *super; - Dsuperdata *d1, *d2; - if(catcherror()){ - error("not a creepy disk: %r"); - error(nil); - } - fs->super = dbget(DBsuper, Dblksz); super = fs->super; - if(super->d.magic != MAGIC) - error("bad magic number"); - d1 = &fs->super->d.Dsuperdata; - d2 = &fs->super->d.dup; - if(memcmp(d1, d2, sizeof(Dsuperdata)) != 0){ - warn("partially written superblock, using old."); - if(fs->super->d.dup.epoch < fs->super->d.epoch) - fs->super->d.Dsuperdata = fs->super->d.dup; - } - if(super->d.dblksz != Dblksz) - error("bad Dblksz"); - if(super->d.nblkgrpsz != Nblkgrpsz) - error("bad Nblkgrpsz"); - if(super->d.dminattrsz != Dminattrsz) - error("bad Dminattrsz"); - if(super->d.ndptr != Ndptr) - error("bad ndptr"); - if(super->d.niptr != Niptr) - error("bad niptr"); - if(super->d.embedsz != Embedsz) - error("bad Embedsz"); - if(super->d.dptrperblk != Dptrperblk) - error("bad Dptrperblk"); - - noerror(); - return super; + b = mballocz(1); + xqlock(&fs->superlk); + b->addr = fs->super->addr; + b->type = super->type; + b->addr = super->addr; + xqlock(&b->slk); + mbset(b, MBout); + b->d.Dsuperdata = fs->super->d.Dsuperdata; + b->d.dup = b->d.Dsuperdata; + b->wc = c; + xqunlock(&fs->superlk); + mlink(&fs->out, b); + xqunlock(&b->slk); + nbsendul(fs->syncc, 0); } /* - * Return /archive/yyyy/mmdd melted and wlocked, create it if needed. - * Clear the arch addr in the super if a new archive should be taken. + * The mark & sweep is dumb, slow, and reliable. + * It should happen only when the disk is almost full. + * fsmark and sweep may run concurrently. */ -static Path* -currentarch(void) +static void +fswritetags(void) { - Path *p; - Memblk *f, *pf; - char yname[30], dname[30], *names[2]; - Tm *tm; - int i; + Memblk *b; - tm = localtime(time(nil)); - seprint(yname, yname+sizeof yname, "%04d", tm->year + 1900); - seprint(dname, dname+sizeof dname, "%02d%02d", tm->mon + 1, tm->mday); - names[0] = yname; - names[1] = dname; - - p = newpath(fs->root); - addelem(&p, fs->archive); - for(i = 0; i < nelem(names); i++){ - if(catcherror()) - break; - pf = p->f[p->nf-1]; - rwlock(pf, Rd); - if(catcherror()){ - rwunlock(pf, Rd); - error(nil); - } - f = dfwalk(pf, names[i]); - addelem(&p, f); - mbput(f); - noerror(); - rwunlock(pf, Rd); - noerror(); + while((b = munlink(&fs->dtags, nil)) != nil){ + xqlock(&b->slk); + mbset(b, MBout); + mlink(&fs->out, b); + xqunlock(&b->slk); } - meltedpath(&p, p->nf, 0); - if(catcherror()){ - rwunlock(p->f[p->nf-1], Wr); - error(nil); - } - /* 0:/ 1:archive 2:yyyy 3:mmdd */ - for(i = p->nf-1; i < 3; i++){ - f = dfcreate(p->f[i], names[i-1], p->f[i]->d.uid, p->f[i]->d.mode); - rwlock(f, Wr); - rwunlock(p->f[i], Wr); - addelem(&p, f); - mbput(f); - } - noerror(); - - return p; } +/* + * Freeze the fs state and schedule it for writing. + * A new super with the new / is written at the end. + */ static void -updateroot(Memblk *nf) +fswrite(Channel *c) { - if(fs->super->d.root != nf->addr){ - fs->archive = nf; - incref(nf); - fs->super->d.root = nf->addr; - changed(fs->super); + vlong t; + daddrt addr, oaddr; + + dZprint("fswrite...\n"); + t = opstart(&fs->opstat[Opwriteq], 0); + xwlock(&fs->quiescence); + t = opend(&fs->opstat[Opwriteq], t); + t = opstart(&fs->opstat[Opwrite], t); + if(catcherror()){ + opend(&fs->opstat[Opwrite], t); + dZprint("fswrite: %r\n"); + xwunlock(&fs->quiescence); + error(nil); } -} -static void -flagdirtyrefs(void) -{ - Diskblk *db; - u64int dirty; - daddrt addr; + ainc(&fs->nwrite); + oaddr = fs->active->addr; + addr = dfaddress(oaddr); + dfchdentry(fs->root, oaddr, addr); - addr = fs->super->addr; + xqlock(&fs->superlk); + fs->super->d.root = addr; + xqunlock(&fs->superlk); + fswritetags(); + fswritesuper(c); - db = nil; - addr += (uintptr)&db->dirtyrefs; /* ugly, but portable */ - dirty = 1; - if(pwrite(fs->fd, &dirty, sizeof dirty, addr) != sizeof dirty) - error("flagdirtyrefs: %r"); + noerror(); + xwunlock(&fs->quiescence); + opend(&fs->opstat[Opwrite], t); + dZprint("fswrite: done\n"); } /* - * Freeze the file tree, keeping active as a new melted file - * that refers to frozen children now in the archive. - * returns the just frozen tree or nil - * - * NB: This may be called from fsfmt(), with a melted archive, - * which violates the invariant that archive is always frozen, leading - * to a violation on the expected number of references to it (fsfmt leaks it). + * Write blocks to disk, including copies for the super. */ -static Memblk* -fsfreeze(void) +static void +syncproc(void*) { - Path *p; - Memblk *na, *oa, *arch, *oarch; - char name[50]; - vlong t0; - - dZprint("freezing fs...\n"); - t0 = 0; - if(fs->profile) - t0 = fstime(nsec()); - xqlock(&fs->fzlk); - if(fs->fzsuper != nil){ - /* - * we did freeze/reclaim and are still writing, can't freeze now. - */ - xqunlock(&fs->fzlk); - return nil; - } - xrwlock(&fs->quiescence, Wr); /* not really required */ - nfsopcalls[Freeze]++; - if(catcherror()){ - /* - * There was an error during freeze. - * It's better not to continue to prevent disk corruption. - * The user is expected to restart from the last frozen - * version of the tree. - */ - fatal("freeze: %r"); - } + vlong t; + Memblk *b; + int n; - /* 1. Move active into /archive/yyyy/mmdd/. - * We must add an extra disk ref to keep archive alive after melting - * it within currentarch() because "/" is a fake and there's no old - * frozen copy for "/" (keeping such ref). - * - * Dbput will unlink the block from the hash and move its address - * into the free list. However, we still have a mem ref from fs->archive - * and perhaps more from user paths, which must be advanced, so we can't - * release the reference on archive just yet. - * We will do the mbput corresponding to fs->archive after - * advancing all fids, so their archive moves to the new one. - */ - arch = fs->archive; - dbincref(arch->addr); - p = currentarch(); - updateroot(p->f[1]); - oarch = arch; - dbput(arch); - - arch = p->f[p->nf-1]; - oa = fs->active; - rwlock(oa, Wr); - seprint(name, name+sizeof(name), "%ulld", oa->d.mtime); - wname(oa, name); - dfchdentry(arch, 0, oa->addr); + threadsetname("syncproc"); + errinit(Errstack); + if(catcherror()) + fatal("%s: uncatched: %r", threadgetname()); + for(;;){ + recvul(fs->syncc); + dZprint("sync...\n"); + t = opstart(&fs->opstat[Opsync], 0); + for(n = 0; (b = munlink(&fs->out, nil)) != nil; n++){ + if(catcherror()){ + warn("write %D: %r", b->addr); + if(b->type == DBsuper){ + if(b->wc != nil) + sendp(b->wc, smprint("%r")); + b->wc = nil; + mbput(b); + }else + mbset(b, MBerr); + continue; + } - /* 2. Freeze it, plus any melted blocks in /active due to - * the link of the new archived tree. - */ - oa->d.mtime = fstime(0); - oa->d.atime = fstime(0); - rwunlock(oa, Wr); - changed(oa); - dffreeze(oa); - rwunlock(arch, Wr); - dffreeze(fs->archive); - - /* 2. Freeze the on-disk reference counters - * and the state of the super-block. - * After doing so, the state to be written on the disk is - * coherent and corresponds to now. - * Flag the on-disk super block to report DBref blocks for the - * next epoch (even/odd) are being written. - */ - dprint("freezing refs...\n"); - flagdirtyrefs(); - freezesuperrefs(); - - /* 3. Make a new active and replace the old one. - * defer the release of the old active until all fids are melted - * (see similar discussion in 1). - */ - na = dbdup(oa); - rwlock(na, Wr); - na->d.id = fstime(0); - wname(na, "active"); - fs->active = na; - rwunlock(na, Wr); - rwlock(fs->root, Wr); - dfchdentry(fs->root, oa->addr, na->addr); - rwunlock(fs->root, Wr); - assert(oa->ref > 1); /* release fs->active */ - - /* 4. Advance pahts in fids to their most recent melted files, - * to release refs to old frozen files, and to the now gone old - * "/archive". - */ - meltfids(); - mbput(oa); - mbput(oarch); - if(fs->profile) - fsoptime[Freeze] += nsec() - t0; - noerror(); - xrwunlock(&fs->quiescence, Wr); - xqunlock(&fs->fzlk); - putpath(p); + dbwrite(b); - dZprint("fs frozen\n"); - return na; + noerror(); + if(b->type == DBsuper){ + if(b->wc != nil) + sendp(b->wc, nil); + b->wc = nil; + mbput(b); + }else{ + xqlock(&b->slk); + mbset(b, MBclean); + mlink(&fs->clean, b); + xqunlock(&b->slk); + } + } + opend(&fs->opstat[Opsync], t); + dZprint("sync: wrote %d blocks\n", n); + } } -static long -writerefs(void) +/* + * Move blocks out of memory to release some. + */ +static void +lruproc(void*) { - Memblk *rb; - long n; + vlong t; + int n, ign; + Memblk *b, *bnext; - n = 0; - xqlock(&fs->refs); - for(rb = fs->refs.hd; rb != nil; rb = rb->lnext){ - if((rb->addr - Dblk0addr)/Dblksz % Nblkgrpsz == 2){ - /* It's a fake DBref block used for checks: ignore. */ - rb->frozen = rb->dirty = 0; - continue; + threadsetname("lruproc"); + errinit(Errstack); + if(catcherror()) + fatal("%s: uncatched: %r", threadgetname()); + for(;;){ + recvul(fs->lruc); + dZprint("lru...\n"); + xrlock(&fs->dquiescence); + t = opstart(&fs->opstat[Oplru], 0); + xqlock(&fs->clean); + n = ign = 0; + for(b = fs->clean.hd; b != nil; b = bnext){ + bnext = b->lnext; + if(b->state != MBclean) + fatal("fslru: %D: %s", b->addr, sname(b->state)); + if(b->ref > 1 || b->type == DBtag){ + ainc(&fs->nlruign); + ign++; + continue; + } + if(mblru(b) != -1) + n++; /* could move it out */ + if(fs->mballoc.nfree > Mmaxfree) + break; } - if(rb->dirty && rb->frozen) - n++; - meltedref(rb); + xqunlock(&fs->clean); + opend(&fs->opstat[Oplru], t); + dZprint("lru: %uld free %d out %d ign\n", fs->mballoc.nfree, n, ign); + xrunlock(&fs->dquiescence); } - xqunlock(&fs->refs); - return n; } -static int -mustwrite(Memblk *b) +/* + * Mark all blocks that could be reached with a new epoch, + * so blocks with lower epochs could be considered free. + */ +static void +fsmark(void) { - return b->frozen != 0; + u64int e; + vlong t; + Channel *c; + + c = chancreate(sizeof(char*), 0); + dZprint("fsmark...\n"); + t = opstart(&fs->opstat[Opmark], 0); + nbsendul(fs->lruc, 0); /* make some room */ + + xqlock(&fs->superlk); + e = ++fs->super->d.epoch; + xqunlock(&fs->superlk); + + dfmark(fs->super->d.root, e); + dfmark(fs->active->addr, e); + + xqlock(&fs->archlk); + xqlock(&fs->superlk); + fs->super->d.fepoch = e-1; + xqunlock(&fs->superlk); + xqunlock(&fs->archlk); + + fswritetags(); + fswritesuper(c); + recvp(c); + chanfree(c); + + opend(&fs->opstat[Opmark], t); + dZprint("fsmark: done\n"); } -/* - * Written blocks become mru, perhaps we should - * consider keeping their location in the clean list, at the - * expense of visiting them while scanning for blocks to move out. - * We write only (dirty) blocks that are frozen or part of the "/archive" file. - */ -static long -writedata(void) +static void +dropold(daddrt addr) { Memblk *b; - long nw; - List dl; - nw = 0; - dl = mfilter(&fs->dirty, mustwrite); - while((b = dl.hd) != nil){ - munlink(&dl, b, 1); - assert(b->dirty); - if((b->addr&Fakeaddr) != 0) - fatal("write data on fake address"); - dbwrite(b); - nw++; - } - return nw; + b = mbget(addr); + if(b == nil) + return; + dprint("dropold: reused %H\n", b); + assert(b->ref > 1); /* hash and b */ + mbput(b); /* b */ + xqlock(&fs->clean); + if(mblru(b) < 0) + fatal("dropold: busy %H", b); + xqunlock(&fs->clean); } -static void -writezsuper(void) +static daddrt +growworm(void) { - if(canqlock(&fs->fzlk)) - fatal("writezsuper: lock"); - assert(fs->fzsuper != nil); - fs->fzsuper->d.epoch = fstime(0); - fs->fzsuper->d.dup = fs->fzsuper->d.Dsuperdata; - dbwrite(fs->fzsuper); - dprint("writezsuper: %H\n", fs->fzsuper); - mbput(fs->fzsuper); - fs->fzsuper = nil; + daddrt addr; + + xqlock(&fs->superlk); + if(fs->super->d.eaddr + Dblksz > fs->limit){ + xqunlock(&fs->superlk); + warnerror("worm full"); + } + addr = fs->super->d.eaddr; + fs->super->d.eaddr += Dblksz; + fs->ndfree--; + xqunlock(&fs->superlk); + return MKDADDR(addr); +} + +daddrt +newdaddr(void) +{ + Memblk *tb; + daddrt addr0, eaddr, grpsz, a; + dtagt epoch; + int i, ntry; + + if(fs->mode == Worm) + return growworm(); + + grpsz = Dblksz*Dtagperblk; + for(ntry = 0; ntry < Nscanfree; ntry++){ + xqlock(&fs->freelk); + + xqlock(&fs->superlk); + eaddr = fs->super->d.eaddr; + epoch = fs->super->d.epoch; + xqunlock(&fs->superlk); + addr0 = fs->ifree; + do{ + if(catcherror()){ + warn("newdaddr: %r"); + continue; + } + tb = dbgetlocked(DBtag, MKDADDR(fs->ifree)); + for(i = 1; i < Dtagperblk; i++) + if(tb->d.tag[i] == 0) + break; + /* TODO: we can skip this one when tb->d.tag[0] == 0 */ + if(i == Dtagperblk) + goto next; + if(tb->state != MBmem){ + xqunlock(&tb->slk); + mbrenewlocked(tb); + } + assert(tb->state == MBmem); + for(i = 1; i < Dtagperblk; i++){ + if(tb->d.tag[i] == 0){ + a = fs->ifree+i*Dblksz; + assert(a < eaddr); + tb->d.tag[i] = epoch; + tb->d.tag[0]--; + xqlock(&fs->superlk); + if(fs->ndfree == 0) + fatal("newdaddr: ndfree == 0"); + if(fs->ndfree < 30 || (fs->ndfree%50)== 0) + dAprint("newdaddr: %ulld-1\n", fs->ndfree); + fs->ndfree--; + noerror(); + xqunlock(&fs->superlk); + xqunlock(&tb->slk); + mbput(tb); + xqunlock(&fs->freelk); + dropold(MKDADDR(a)); + return MKDADDR(a); + } + } + next: + fs->ifree += grpsz; + if(fs->ifree >= eaddr) + fs->ifree = Dblk0addr; + noerror(); + xqunlock(&tb->slk); + mbput(tb); + }while(fs->ifree != addr0); + xqunlock(&fs->freelk); + + warn("newdaddr: %ulld free; asking sweep for more.", fs->ndfree); + ainc(&fs->newdaddrw); + sendul(fs->sweepc, 0); + recvul(fs->sweepec); + if(fs->nlastfree > 0) /* got some, restart */ + ntry = 0; + } + warnerror("newdaddr: disk full and no luck"); + return Noaddr; } -/* - * We found that we crashed while writing a new set of DBref blocks. - * If the super is using even/odd refs, odd/even refs may be corrupted. - * Restore all of them for safety. - */ static void -cleanrefs(void) +sweepproc(void*) { - daddrt addr; + Memblk *tb; + daddrt addr0, eaddr, grpsz, a; + dtagt fepoch; + int i, n, nb; + vlong t; - warn("restoring DBref blocks..."); - for(addr = Dblk0addr; addr < fs->super->d.eaddr; addr += Dblksz*Nblkgrpsz) - if(fs->super->d.oddrefs != 0) - dbcopy(addr, addr+Dblksz); - else - dbcopy(addr+Dblksz, addr); - warn("DBref blocks restored"); + threadsetname("sweepproc"); + errinit(Errstack); + if(catcherror()) + fatal("%s: uncatched: %r", threadgetname()); + grpsz = Dblksz*Dtagperblk; + for(;;){ + recvul(fs->sweepc); + xrlock(&fs->dquiescence); + dZprint("sweep at %D with %ulld free\n", fs->isweep, fs->ndfree); + again: + t = opstart(&fs->opstat[Opsweep], 0); + xqlock(&fs->superlk); + eaddr = fs->super->d.eaddr; + fepoch = fs->super->d.fepoch; + xqunlock(&fs->superlk); + n = 0; + addr0 = fs->isweep; + do{ + nb = 0; + if(catcherror()){ + warn("sweep: %r"); + continue; + } + tb = dbgetlocked(DBtag, MKDADDR(fs->isweep)); + for(i = 1; i < Dtagperblk; i++) + if(tb->d.tag[i] != 0 && tb->d.tag[i] <= fepoch) + break; + if(i == Dtagperblk) + goto next; + if(tb->state != MBmem){ + xqunlock(&tb->slk); + mbrenewlocked(tb); + } + assert(tb->state == MBmem); + for(i = 1; i < Dtagperblk; i++) + if(tb->d.tag[i] != 0 && tb->d.tag[i] <= fepoch){ + a = fs->isweep+i*Dblksz; + assert(a < eaddr); + dAprint("sweep: free %D\n", MKDADDR(a)); + ainc(&fs->ngc); + nb++; + tb->d.tag[i] = 0; + } + tb->d.tag[0] += nb; + n += nb; + next: + fs->isweep += grpsz; + if(fs->isweep >= eaddr) + fs->isweep = Dblk0addr; + noerror(); + if(nb > 0) + dAprint("sweep: %D: +%d free\n", tb->addr, nb); + xqlock(&fs->superlk); + fs->ndfree += nb; + if(fs->ndfree > fs->ndblk) + fatal("ndfree out of range: %ulld > %ulld", + fs->ndfree, fs->ndblk); + xqunlock(&fs->superlk); + xqunlock(&tb->slk); + mbput(tb); + }while(fs->isweep != addr0 && fs->ndfree < Dmaxfree); + + opend(&fs->opstat[Opsweep], t); + dZprint("sweepproc: %d collected %ulld free\n", n, fs->ndfree); + fs->nlastfree = n; + if(n == 0 && fs->ndfree < Dminfree){ + fsmark(); + goto again; + } + xrunlock(&fs->dquiescence); + nbsendul(fs->sweepec, 0); /* for cmd.c */ + } } -/* - * Write any dirty frozen state after a freeze. - * Only this function and initialization routines (i.e., super, refs) - * may lead to writes. - */ static void -fswrite(void) +tmrproc(void*) { - vlong t0; - long nr, nb; + uint n; - dZprint("writing fs...\n"); - t0 = 0; - if(fs->profile) - t0 = fstime(nsec()); - xqlock(&fs->fzlk); - nfsopcalls[Write]++; - if(fs->fzsuper == nil) - fatal("can't fswrite if we didn't fsfreeze"); - if(catcherror()){ - if(fs->profile) - fsoptime[Write] += nsec() - t0; - xqunlock(&fs->fzlk); - error(nil); + threadsetname("tmrproc"); + n = 0; + for(;;){ + sleep(1000); + fstime(nsec()); + /* with just one dirty block (/root) nothing has changed */ + if(fs->nmblk > 1) + if(fs->nmblk > Mmaxdirty || (++n % Syncival) == 0){ + xrlock(&fs->dquiescence); + dprint("tmr: sync %ulld blocks...\n", fs->nmblk); + fswrite(nil); + xrunlock(&fs->dquiescence); + } + if(fs->mode != Worm) + if(fs->archt != 0 && time(nil) > fs->archt){ + fsarchive(); + fs->archt = nextime(time(nil), fs->archhour); + } } - nr = writerefs(); - nb = writedata(); - writezsuper(); - fs->super->d.oddrefs = !fs->super->d.oddrefs; - nb++; - noerror(); - if(fs->profile) - fsoptime[Write] += fstime(nsec()) - t0; - fs->wtime = fstime(nsec()); - dZprint("fs written (2*%ld refs %ld data)\n", nr, nb); - if(fs->halt) - fs->halt = Halted; - xqunlock(&fs->fzlk); } static void fsinit(char *dev, int nblk) { - uvlong fact, i; void *p; - char *c, *e; + Memblk *b, *e; + uchar *c; + uvlong nablk; /* this is an invariant that must hold for directories */ assert(Embedsz % Daddrsz == 0); - maxfsz = Ndptr*Dblkdatasz; - fact = 1; - for(i = 0; i < Niptr; i++){ - maxfsz += Dptrperblk * fact; - fact *= Dptrperblk; - } fs = mallocz(sizeof *fs, 1); fs->dev = strdup(dev); fs->fd = open(dev, ORDWR); if(fs->fd < 0) - fatal("can't open disk: %r"); + error("open %s: %r", dev); - fs->nablk = Fsysmem / sizeof(Memblk); - if(nblk > 0 && nblk < fs->nablk) - fs->nablk = nblk; + fs->lruc = chancreate(sizeof(ulong), 0); + fs->syncc = chancreate(sizeof(ulong), 0); + fs->sweepc = chancreate(sizeof(ulong), 0); + fs->sweepec = chancreate(sizeof(ulong), 0); + fs->consc = chancreate(sizeof(char*), 256); + fs->isweep = Dblk0addr; + fs->ifree = Dblk0addr; + nablk = Fsysmem / sizeof(Memblk); + if(nblk > 0 && nblk < nablk) + nablk = nblk; fs->limit = disksize(fs->fd); fs->ndblk = fs->limit/Dblksz; fs->limit = fs->ndblk*Dblksz; if(fs->limit < 10*Dblksz) fatal("buy a larger disk"); - if(fs->nablk > fs->ndblk){ + if(nablk > fs->ndblk){ warn("using %ulld blocks and not %ulld (small disk)", - fs->ndblk, fs->nablk); - fs->nablk = fs->ndblk; + fs->ndblk, nablk); + nablk = fs->ndblk; } - p = malloc(fs->nablk * sizeof fs->blk[0]); + p = malloc(nablk * sizeof fs->blk[0]); fs->blk = p; + fs->mballoc.fixedsz = 1; + fs->mballoc.nalloc = nablk; warn("prepaging..."); - c = p; - e = c + fs->nablk * sizeof fs->blk[0]; - for(; c < e; c += 4096) - *c = 0; /* prepage it */ + e = fs->blk + nablk; + for(b = fs->blk; b < e; b++){ + for(c = b->d.data + 4096; c < b->d.data+Dblksz; c += 4096) + *c = 0; + b->state = MBfree; + afree(&fs->mballoc, b); + } fstime(nsec()); - dprint("fsys '%s' init\n", fs->dev); } void fssync(void) { - while(fsfreeze() == nil) - sleep(1); - fswrite(); -} - -static int -confirm(char *msg) -{ - char buf[100]; - int n; + char *e; - fprint(2, "%s [y/n]: ", msg); - n = read(0, buf, sizeof buf - 1); - if(n <= 0) - return 0; - if(buf[0] == 'y') - return 1; - return 0; -} - -enum -{ - Fossiloff = 128*1024, - Fossilmagic = 0xffae7637, -}; - -static void -dontoverwrite(daddrt addr) -{ - Dsuperdata d; - static char buf[BIT64SZ]; - - if(pread(fs->fd, &d, sizeof d, addr + sizeof(Diskblkhdr)) != sizeof d) + if(fs->mode == Rdonly) return; - if(d.magic == MAGIC) - if(!confirm("disk has a creepy fs: continue?")){ - warn("aborting"); - threadexitsall("no"); - }else - return; - - if(pread(fs->fd, buf, sizeof buf, Fossiloff) != sizeof buf) - return; - if(GBIT32(buf) == Fossilmagic) - if(!confirm("disk has a fossil fs: continue?")){ - warn("aborting"); - threadexitsall("no"); - }else{ - memset(buf, 0, sizeof buf); - pwrite(fs->fd, buf, sizeof buf, Fossiloff); - } -} - -/* - * / is only in memory. It's `on-disk' address is Noaddr. - * - * /archive is the root on disk. - * /active is allocated on disk, but not on disk. It will be linked into - * /archive as a child in the future. - */ -void -fsfmt(char *dev, int force) -{ - Memblk *super; - int uid; - - fsinit(dev, Mmaxfree); /* enough # of blocks for fmt */ - - if(catcherror()) - fatal("fsfmt: error: %r"); - - fs->super = dballocz(DBsuper, DFreg, 1); - - if(!force) - dontoverwrite(fs->super->addr); - super = fs->super; - super->d.magic = MAGIC; - super->d.eaddr = fs->super->addr + Dblksz; - super->d.dblksz = Dblksz; - super->d.nblkgrpsz = Nblkgrpsz; - super->d.dminattrsz = Dminattrsz; - super->d.ndptr = Ndptr; - super->d.niptr = Niptr; - super->d.embedsz = Embedsz; - super->d.dptrperblk = Dptrperblk; - uid = usrid(getuser()); - fs->root = dfcreate(nil, "", uid, DMDIR|0555); - rwlock(fs->root, Wr); - fs->active = dfcreate(fs->root, "active", uid, DMDIR|0775); - fs->archive = dfcreate(fs->root, "archive", uid, DMDIR|0555); - rwunlock(fs->root, Wr); - super->d.root = fs->archive->addr; - fssync(); - - noerror(); -} - -void -timeproc(void*) -{ - threadsetname("timeproc"); - for(;;){ - sleep(1); - fstime(nsec()); - } -} -/* - * If there are dirty blocks, call the policy once per Syncival. - */ -void -fssyncproc(void*) -{ - threadsetname("syncer"); - errinit(Errstack); - for(;;){ - sleep(Syncival*1000); - if(fs->halt == 0 && fs->check == 0) - fspolicy(Post); - } + if(fs->syncwc == nil) + fs->syncwc = chancreate(sizeof(char*), 0); + fswrite(fs->syncwc); + e = recvp(fs->syncwc); + if(e != nil) + error("sync: %s", e); /* leaks e */ } -typedef struct Parg -{ - int last; - Memblk *which; -} Parg; -enum{First = 0, Last = 1}; - -static int -pickf(Memblk*, daddrt *de, void *a) +static void +inittags(void) { - Parg *pa; - Memblk *c; + static Diskblk d; + daddrt addr, n, nt; - pa = a; - if(*de == 0) - return 0; - c = dbget(DBfile, *de); - if(pa->which == nil || (pa->last && pa->which->d.mtime < c->d.mtime) || - (!pa->last && pa->which->d.mtime > c->d.mtime)){ - mbput(pa->which); - pa->which = c; - incref(c); + nt = 0; + for(addr = Dblk0addr; addr+Dblksz < fs->limit; addr += Dblksz*Dtagperblk){ + d.tag[0] = Dtagperblk - 1; + n = (fs->limit - addr)/Dblksz; + if(n < Dtagperblk){ + dprint("partial group with %ulld blocks\n", n); + d.tag[0] = n-1; + for(; n < Dtagperblk; n++) + d.tag[n] = ~0; + } + dWprint("inittags %D\n", addr); + if(pwrite(fs->fd, &d, sizeof d, DADDR(addr)) != Dblksz) + fatal("inittags: %D: %r", addr); + nt++; } - mbput(c); - return 0; + dprint("%ulld tag blocks\n", nt); } -/* - * Return the first or last children, for selecting archives. - */ -static Memblk* -pickchild(Memblk *f, int last) +static void +initsuper(Memblk *b, int isworm) { - Parg pa; - - pa.which = nil; - pa.last = last; - rwlock(f, Rd); - if(catcherror()){ - rwunlock(f, Rd); - mbput(pa.which); - error(nil); + b->addr = MKDADDR(Dsuperaddr); + b->d.magic = MAGIC; + b->d.dblksz = Dblksz; + b->d.dminattrsz = Dminattrsz; + b->d.ndptr = Ndptr; + b->d.niptr = Niptr; + b->d.embedsz = Embedsz; + b->d.dptrperblk = Dptrperblk; + b->d.dtagperblk = Dtagperblk; + b->d.epoch = 1; + if(isworm){ + b->d.eaddr = Dblk0addr; + b->d.fsmode = Worm; + }else{ + b->d.fsmode = Normal; + b->d.eaddr = fs->ndblk*Dblksz; } - dfdirmap(f, pickf, &pa, Rd); - noerror(); - rwunlock(f, Rd); - return pa.which; } -static Path* -pickvictim(void) +void +fsfmt(char *dev, int isworm) { - Path *p; - int i; - - if(fs->archive->d.ndents == 0) - return nil; - - p = newpath(fs->root); - if(catcherror()){ - putpath(p); - return nil; - } - addelem(&p, fs->archive); - - /* yyyy mmdd epoch */ - for(i = 0; i < 3 && p->f[p->nf-1]->d.ndents > 0; i++){ - addelem(&p, pickchild(p->f[p->nf-1], First)); - mbput(p->f[p->nf-1]); - } - for(i = 1; i < p->nf; i++) - if(p->f[i]->d.ndents > 1) - break; - - if(i == p->nf){ /* last snap; nothing to reclaim */ - putpath(p); - p = nil; - } - - noerror(); - return p; + fsinit(dev, Mmaxfree); /* more than enough blocks for fmt */ + if(isworm == 0) + inittags(); + else + fs->mode = Worm; + fs->super = mbnew(DBsuper); + initsuper(fs->super, isworm); + fs->root = dfcreate(nil, "", usrid("sys"), DMDIR|0555); + fs->active = dfcreate(fs->root, "root", usrid("sys"), DMDIR|0775); + fs->active->d.id = MAGIC; /* so we could locate root blocks */ + proccreate(syncproc, nil, Stack); + fssync(); } -/* - * One process per file system, so consume all the memory - * for the cache. - * To open more file systems, use more processes! - */ void -fsopen(char *dev, int worm, int canwr) +fsopen(char *dev, int mode, usize fsysmem) { - Memblk *arch, *last, *c; + Memblk *u; int uid; - - if(catcherror()) - fatal("fsopen: error: %r"); - - fsinit(dev, 0); - readsuper(); - fs->worm = worm; - fs->mode = canwr; - - if(!worm && fs->super->d.dirtyrefs){ - cleanrefs(); - fs->super->d.dirtyrefs = 0; + if(mode == Worm && fsysmem == 0) + fsysmem = Wormmem; + fsinit(dev, fsysmem/Dblksz); + fs->mode = mode; + fs->super = dbget(DBsuper, MKDADDR(Dsuperaddr)); + if(fs->super->d.fsmode == Worm){ + fs->mode = Worm; + fs->ndfree = (fs->limit - fs->super->d.eaddr)/Dblksz; + }else + if(mode == Worm) + sysfatal("not a worm"); + proccreate(lruproc, nil, Stack); + if(fs->mode == Normal) + proccreate(sweepproc, nil, Stack); + if(fs->mode != Rdonly){ + proccreate(syncproc, nil, Stack); + proccreate(tmrproc, nil, Stack); } + rwusers(nil); + uid = usrid("sys"); - xqlock(&fs->fzlk); fs->root = dfcreate(nil, "", uid, DMDIR|0555); - arch = dbget(DBfile, fs->super->d.root); - fs->archive = arch; - rwlock(fs->root, Wr); - rwlock(arch, Wr); - dfchdentry(fs->root, 0, arch->addr); - rwunlock(arch, Wr); - rwunlock(fs->root, Wr); - - last = pickchild(arch, Last); /* yyyy */ - if(last != nil){ - c = pickchild(last, Last); /* mmdd */ - mbput(last); - last = c; - } - if(last != nil){ - c = pickchild(last, Last); /* epoch */ - mbput(last); - last = c; - } - rwlock(fs->root, Wr); - if(last != nil){ - rwlock(last, Rd); - fs->active = dbdup(last); - rwunlock(last, Rd); - mbput(last->mf->melted); /* could keep it, but no need */ - last->mf->melted = nil; - wname(fs->active, "active"); - fs->active->d.id = fstime(nsec()); - rwlock(fs->active, Wr); - dfchdentry(fs->root, 0, fs->active->addr); - rwunlock(fs->active, Wr); - mbput(last); - }else - fs->active = dfcreate(fs->root, "active", uid, DMDIR|0775); - - fs->cons = dfcreate(nil, "cons", uid, DMEXCL|0660); - fs->cons->d.gid = usrid("adm"); - fs->cons->mf->gid = "adm"; - changed(fs->cons); - fs->stats = dfcreate(nil, "stats", uid, 0664); - rwunlock(fs->root, Wr); - fs->consc = chancreate(sizeof(char*), 256); - xqunlock(&fs->fzlk); - - noerror(); + fs->cons = dfcreate(fs->root, "cons", uid, DMEXCL|0660); + fs->stats = dfcreate(fs->root, "stats", uid, 0664); + fs->active = dbget(DBdir, fs->super->d.root); + dfchdentry(fs->root, 0, fs->active->addr); + warn("fsys open %s\n", mname(fs->mode)); /* * Try to load the /active/users file, if any, * but ignore errors. We already have a default table loaded * and may operate using it. */ - if(!catcherror()){ - c = dfwalk(fs->active, "users"); - rwlock(c, Wr); - if(catcherror()){ - rwunlock(c, Wr); - mbput(c); - error(nil); - } - rwusers(c); - noerror(); - rwunlock(c, Wr); - mbput(c); - noerror(); - fs->cons->d.uid = usrid(getuser()); - fs->cons->mf->uid = getuser(); - } - fs->wtime = fstime(nsec()); -} - -uvlong -fsmemfree(void) -{ - uvlong nfree; - - xqlock(fs); - nfree = fs->nablk - fs->nblk; - nfree += fs->nmfree; - xqunlock(fs); - return nfree; -} - -/* - * Check if we are low on memory and move some blocks out in that case. - * This does not acquire locks on blocks, so it's safe to call it while - * keeping some files/blocks locked. - */ -int -fslru(void) -{ - Memblk *b, *bprev; - vlong t0; - int x; - long target, tot, n, ign; + if(catcherror()) + return; - xqlock(&fs->lrulk); + u = dfwalk(fs->active, "users"); if(catcherror()){ - xqunlock(&fs->lrulk); - warn("fslru: %r"); - return -1; + mbput(u); + error(nil); } + rwusers(u); - x = setdebug(); - dZprint("fslru: low on memory %ulld free %d min\n", fsmemfree(), Mminfree); - tot = ign = 0; - do{ - target = Mmaxfree - fsmemfree(); - t0 = nsec(); - xqlock(&fs->clean); - nfsopcalls[Lru]++; - if(catcherror()){ - fsoptime[Lru] += t0 - nsec(); - xqunlock(&fs->clean); - warn("fslru: %r"); - break; - } - n = 0; - for(b = fs->clean.tl; b != nil && target > 0; b = bprev){ - bprev = b->lprev; - if(b->dirty) - fatal("fslru: dirty block on clean\n"); - switch(b->type){ - case DBfree: - /* can happen. but, does it? */ - fatal("fslru: DBfree on clean\n"); - case DBsuper: - case DBref: - fatal("fslru: type %d found on clean\n", b->type); - case DBfile: - if(b == fs->root || b == fs->active || b == fs->archive){ - ign++; - continue; - } - break; - } - if(b->ref > 1){ - ign++; - continue; - } - /* - * Blocks here have one ref because of the hash table, - * which means they are are not used. - * We release the hash ref to let them go. - * bprev can't move while we put b. - */ - dOprint("fslru: out: m%#p d%#010ullx\n", b, b->addr); - if(mbunhash(b, 1)){ - n++; - tot++; - target--; - } - } - noerror(); - fsoptime[Lru] += t0 - nsec(); - xqunlock(&fs->clean); - }while(n > 0 && target > 0); - if(tot == 0){ - warn("low on mem (0 out; %uld ignored)", ign); - tot = -1; - }else - dZprint("fslru: %uld out %uld ignored %ulld free %d min %d max\n", - tot, ign, fsmemfree(), Mminfree, Mmaxfree); - rlsedebug(x); noerror(); - xqunlock(&fs->lrulk); - return tot; -} - -int -fsfull(void) -{ - if(fsdiskfree() > Dzerofree) - return 0; - - if(1){ - warn("file system full"); - if(0)fsdump(0, Mem); - fatal("aborting"); - } - return 1; -} - -int -fsreclaim(void) -{ - Memblk *victim, *arch; - long n, tot; - Path *p; + mbput(u); - xqlock(&fs->fzlk); - if(catcherror()){ - warn("reclaim: %r"); - xqunlock(&fs->fzlk); - return 0; - } - warn("%ulld free: reclaiming...", fsdiskfree()); - if(fs->fzsuper != nil){ - /* - * we did freeze/reclaim and are still writing, can't reclaim now. - */ - noerror(); - xqunlock(&fs->fzlk); - warn("write in progress. refusing to reclaim"); - return 0; - } - - tot = 0; - for(;;){ - - /* - * The logic regarding references for reclaim is similar - * to that described in fsfreeze(). - * Read that comment before this code. - */ - dprint("fsreclaim: reclaiming\n"); - p = pickvictim(); - if(p == nil){ - dprint("nothing to reclaim\n"); - break; - } - if(catcherror()){ - putpath(p); - error(nil); - } - assert(p->nf > 2); - victim = p->f[p->nf-1]; - warn("reclaiming '%s'", victim->mf->name); - dprint("%H\n", victim); - arch = fs->archive; - dbincref(arch->addr); /* see comment in fsfreeze() */ - meltedpath(&p, p->nf-1, 0); - updateroot(p->f[1]); - if(catcherror()){ - rwunlock(p->f[p->nf-2], Wr); - error(nil); - } - dfchdentry(p->f[p->nf-2], victim->addr, 0); - noerror(); - rwunlock(p->f[p->nf-2], Wr); - n = dbput(victim); - dbput(arch); - mbput(arch); - noerror(); - putpath(p); - dffreeze(fs->archive); - dprint("fsreclaim: %uld file%s reclaimed\n", n, n?"s":""); - tot += n; - - if(fsdiskfree() > Dmaxfree){ - dprint("fsreclaim: %d free: done\n", Dmaxfree); - break; - } - } - if(tot == 0){ - warn("low on disk: 0 files reclaimed %ulld blocks free", - fsdiskfree()); - tot = -1; - }else - warn("%uld file%s reclaimed %ulld blocks free", - tot, tot?"s":"", fsdiskfree()); + fs->cons->d.uid = usrid(getuser()); + fs->cons->mf->uid = getuser(); noerror(); - xqunlock(&fs->fzlk); - return tot; } -static int -fsdirtypcent(void) -{ - long n, ndirty; - - n = fs->clean.n; - ndirty = fs->dirty.n; - - return (ndirty*100)/(n + ndirty); -} - -/* - * Policy for memory and and disk block reclaiming. - * Called from the sync proc from time to time and also before and - * after each rpc. - */ -void -fspolicy(int when) +static char* +fsstats(char *s, char *e, int clr, int verb) { - int longago; - - if(when == Pre){ - if(fsmemfree() < Mzerofree){ - dZprint("fspolicy: pre: low mem %ulld\n", fsmemfree()); - fslru(); - } - if(fsmemfree() > Mzerofree && fsdiskfree() > Dzerofree) - return; - dZprint("fspolicy: pre: low on mem or disk\n"); - } - - if(fsmemfree() < Mminfree){ - dZprint("fspolicy: low mem %ulld\n", fsmemfree()); - fslru(); - } - - if(fsdiskfree() < Dminfree){ - dZprint("fspolicy: low disk %ulld\n", fsdiskfree()); - fsreclaim(); - } + int i; - if(!xcanqlock(&fs->policy)) /* another is doing it */ - return; + s = seprint(s, e, "fsys %s epoch %ulld fepoch %ulld %s\n", + fs->dev, fs->super->d.epoch, fs->super->d.fepoch, mname(fs->mode)); + s = seprint(s, e, "mem:\t%8uld blocks %8uld alloc %8uld free\n", + fs->mballoc.nalloc, fs->mballoc.nalloc - fs->mballoc.nfree, + fs->mballoc.nfree); + s = seprint(s, e, "disk:\t%8ulld blocks %8ulld alloc %8ulld free\n", + fs->ndblk, fs->ndblk - fs->ndfree, fs->ndfree); + s = seprint(s, e, "blocks:\t%8ulld blocks %8ulld dirty %8uld clean" + " %#4uld out\n", + fs->nmblk + fs->clean.n + fs->out.n, + fs->nmblk, fs->clean.n, fs->out.n); + s = seprint(s, e, "paths:\t%8uld alloc %8uld free\n", + pathalloc.nalloc, pathalloc.nfree); + s = seprint(s, e, "mfs:\t%8uld alloc %8uld free\n", + mfalloc.nalloc, mfalloc.nfree); - if(catcherror()){ - xqunlock(&fs->policy); - return; - } - longago = (fstime(nsec()) - fs->wtime)/NSPERSEC > Syncival; - if(fs->dirty.n == 0) - longago = 0; - if(fsdirtypcent() > Mmaxdirtypcent || longago){ - dZprint("fspolicy: dirties %d%% %s\n", - fsdirtypcent(), longago ? "long ago" : ""); - fssync(); + if(verb == 0) + return s; + s = seprint(s, e, "\n"); + s = seprint(s, e, "Fsysmem:\t%uld\n", Fsysmem); + s = seprint(s, e, "Mzerofree:\t%d\tMminfree:\t%d\tMmaxfree:\t%d\n", + Mzerofree, Mminfree, Mmaxfree); + s = seprint(s, e, "Dzerofree:\t%d\tDminfree:\t%d\tDmaxfree:\t%d\n", + Dzerofree, Dminfree, Dmaxfree); + s = seprint(s, e, "Dblksz: \t%uld\n", Dblksz); + s = seprint(s, e, "Mblksz: \t%ud\n", sizeof(Memblk)); + s = seprint(s, e, "Dminattrsz:\t%uld\n", Dminattrsz); + s = seprint(s, e, "Embedsz:\t%uld\n", Embedsz); + s = seprint(s, e, "Dentryperblk:\t%uld\n", Dblksz/Daddrsz); + s = seprint(s, e, "Dptrperblk:\t%uld\n", Dptrperblk); + s = seprint(s, e, "Dtagperblk:\t%uld\n", Dtagperblk); + s = seprint(s, e, "pathsize:\t%uld\n", pathalloc.elsz); + s = seprint(s, e, "mfsize:\t%uld\n", mfalloc.elsz); + s = seprint(s, e, "nindirs:\t"); + for(i = 0; i < nelem(fs->nindirs); i++){ + s = seprint(s, e, "%d ", fs->nindirs[i]); + if(clr) + fs->nindirs[i] = 0; } - noerror(); - xqunlock(&fs->policy); + s = seprint(s, e, "\n"); + s = seprint(s, e, "\n"); + return s; } -uvlong -fstime(uvlong t) +char* +updatestats(int clr, int verb) { - static Lock lk; - static uvlong last; + static QLock statslk; + static char statstext[Statsbufsz], *statsp; - lock(&lk); - if(t) - fs->atime = t; - t = fs->atime; - if(t == last) - fs->atime = ++t; - last = t; - unlock(&lk); - return t; + if(clr) + warn("clearing stats"); + xqlock(&statslk); + statsp = statstext; + *statsp = 0; + statsp = fsstats(statsp, statstext+sizeof statstext, clr, verb); + statsp = ninestats(statsp, statstext+sizeof statstext, clr, verb); + statsp = ixstats(statsp, statstext+sizeof statstext, clr, verb); + xqunlock(&statslk); + return statstext; } --- /sys/src/cmd/creepy/ix.c Wed Apr 25 11:11:02 2012 +++ /sys/src/cmd/creepy/ix.c Tue May 8 19:51:30 2012 @@ -227,7 +227,7 @@ rpc->xr.data = rpc->data + ixrreadhdrsz; /* - * send all but the last reply, if we are given permissiong to + * send all but the last reply, if we are given permission to * send multiple replies back. * Errors, eof, and flush terminate the sequence. * As usual, the caller sends the last reply when we return. @@ -301,16 +301,16 @@ fid = rpc->rpc0->fid; if(fid == nil) error("fid not set"); - p = lastpath(&fid->p, fid->p->nf); + p = fid->p; f = p->f[p->nf-1]; - rwlock(f, Rd); + xrlock(f->mf); if(catcherror()){ - rwunlock(f, Rd); + xrunlock(f->mf); error(nil); } rpc->xr.value = (char*)rpc->data + ixrattrhdrsz; dfrattr(f, rpc->xt.attr, rpc->xr.value, Minmdata - ixrattrhdrsz); - rwunlock(f, Rd); + xrunlock(f->mf); noerror(); } @@ -330,21 +330,17 @@ error("fid not set"); p = fid->p; f = p->f[p->nf-1]; - if(fs->mode == Rd) - error("read only file system"); - if(writedenied(fid->uid)) - error("user can't write"); - if(isro(f) || fid->archived) + fidcanwrite(fid); + if(builtin(f)) error("can't wattr archived or built-in files"); - p = meltedpath(&fid->p, fid->p->nf, 1); - f = p->f[p->nf-1]; + f = prenew(fid->p, fid->p->nf); if(catcherror()){ - rwunlock(f, Wr); + xwunlock(f->mf); error(nil); } dfwattr(f, rpc->xt.attr, rpc->xt.value); noerror(); - rwunlock(f, Wr); + xwunlock(f->mf); } static void @@ -359,14 +355,14 @@ error("fid not set"); p = fid->p; f = p->f[p->nf-1]; - rwlock(f, Rd); + xrlock(f->mf); if(catcherror()){ - rwunlock(f, Rd); + xrunlock(f->mf); error(nil); } dfcattr(f, rpc->xt.op, rpc->xt.attr, rpc->xt.value); noerror(); - rwunlock(f, Rd); + xrunlock(f->mf); } static void @@ -516,14 +512,12 @@ dPprint("%s started\n", threadgetname()); do{ - fspolicy(Pre); - nerr = errstacksize(); rpc->xr.type = rpc->xt.type + 1; rpc->rpc0 = rpc0; - quiescent(No); + xrlock(&fs->quiescence); if(catcherror()){ - quiescent(Yes); + xrunlock(&fs->quiescence); rpc->xr.type = Rerror; rpc->xr.ename = err; rerrstr(err, sizeof err); @@ -533,7 +527,7 @@ if(fs->halt != 0) error("file system halted"); ixcalls[rpc->xt.type](rpc); - quiescent(Yes); + xrunlock(&fs->quiescence); noerror(); } @@ -567,8 +561,6 @@ freeixrpc(rpc); replied(rpc0); freeixrpc(rpc0); - - fspolicy(Post); dPprint("%s exiting\n", threadgetname()); threadsetname("rpcworkerix"); --- /sys/src/cmd/creepy/mblk.c Wed Apr 11 18:35:19 2012 +++ /sys/src/cmd/creepy/mblk.c Fri May 11 16:30:05 2012 @@ -2,12 +2,8 @@ /* * memory blocks. - * see dk.h - */ - -/* - * For simplicity, functions in mblk.c do not raise errors. - * (debug dump functions may be an exception). + * + * Functions in mblk.c do not raise errors. */ Alloc mfalloc = @@ -16,601 +12,365 @@ .zeroing = 1, }; -char* -tname(int t) -{ - static char*nms[] = { - [DBfree] "DBfree", - [DBsuper] "DBsuper", - [DBref] "DBref", - [DBdata] "DBdata", - [DBattr] "DBattr", - [DBfile] "DBfile", - [DBptr0] "DBptr0", - [DBptr0+1] "DBptr1", - [DBptr0+2] "DBptr2", - [DBptr0+3] "DBptr3", - [DBptr0+4] "DBptr4", - [DBptr0+5] "DBptr5", - [DBptr0+6] "DBptr6", - }; - - if(t < 0 || t >= nelem(nms)) - return "BADTYPE"; - return nms[t]; -} - -int fullfiledumps = 0; - -/* - * NO LOCKS. debug only - */ -static void -fmttab(Fmt *fmt, int t, int c) -{ - while(t-- > 0) - fmtprint(fmt, "%c ", c?'.':' '); -} -int mbtab; -static void -fmtptr(Fmt *fmt, int type, daddrt addr, char *tag, int n) -{ - Memblk *b; - - if(addr == 0) - return; - b = mbget(type, addr, Dontmk); - if(b == nil){ - fmttab(fmt, mbtab, 0); - fmtprint(fmt, "%s[%d] = d%#010ullx \n", tag, n, addr); - }else{ - fmtprint(fmt, "%H", b); - mbput(b); - } -} -static void -dumpdirdata(Fmt *fmt, Memblk *b) +void +mbset(Memblk *b, int s) { - long doff; - daddrt *p; - int i; - - if(b->d.length == 0 || DBDIR(b) == 0) - return; - doff = embedattrsz(b); - if(doff < Embedsz){ - fmttab(fmt, mbtab, 0); - p = (daddrt*)(b->d.embed+doff); - for(i = 0; i < 5 && (uchar*)p < b->d.embed+Embedsz - Daddrsz; i++) - fmtprint(fmt, "%sd%#010ullx", i?" ":"data: ", EP(*p++)); - fmtprint(fmt, "\n"); + if(xcanqlock(&b->slk)) + fatal("mbset: unlocked"); + dSprint("%D: %s -> %s %D\n",b->addr, sname(b->state), sname(s), MKMADDR(b)); + if(b->state == MBmem){ + lock(&fs->nmblklk); + fs->nmblk--; + unlock(&fs->nmblklk); + } + if(s == MBmem){ + lock(&fs->nmblklk); + fs->nmblk++; + unlock(&fs->nmblklk); } + b->state = s; } -int -mbfmt(Fmt *fmt) +Memblk* +mballocz(int zeroit) { Memblk *b; - int i, n; + static int nwait; - b = va_arg(fmt->args, Memblk*); - if(b == nil) - return fmtprint(fmt, "\n"); - nodebug(); - fmttab(fmt, mbtab, b->type == DBfile); - - fmtprint(fmt, "%s", tname(b->type)); - if(b->type == DBfile && b->mf != nil) - fmtprint(fmt, " '%s'", b->mf->name); - if(b->frozen) - fmtprint(fmt, " FZ"); - if(b->dirty) - fmtprint(fmt, " DT"); - if(DBDIR(b)) - fmtprint(fmt, " DIR"); - fmtprint(fmt, " m%#p d%#010ullx", b, EP(b->addr)); - fmtprint(fmt, " r=%d", b->ref); - switch(b->type){ - case DBfree: - fmtprint(fmt, "\n"); - break; - case DBdata: - case DBattr: - fmtprint(fmt, " dr=%ulld\n", dbgetref(b->addr)); - break; - case DBref: - fmtprint(fmt, " next m%#p", b->lnext); - for(i = n = 0; i < Drefperblk; i++) - if(b->d.ref[i]){ - if(n++%3 == 0){ - fmtprint(fmt, "\n"); - fmttab(fmt, mbtab, 0); - } - fmtprint(fmt, " "); - fmtprint(fmt, "[%02d]d%#010ullx=%#ullx", - i, addrofref(b->addr, i), b->d.ref[i]); - } - if(n == 0 || --n%4 != 0) - fmtprint(fmt, "\n"); - break; - case DBfile: - fmtprint(fmt, " dr=%ulld", dbgetref(b->addr)); - if(b->mf == nil){ - fmtprint(fmt, " no mfile\n"); - break; - } - fmtprint(fmt, " nr%d nw%d\n", b->mf->readers, b->mf->writer); - if(0) - fmtprint(fmt, " asz %#ullx aptr %#ullx", - b->d.asize, b->d.aptr); - fmttab(fmt, mbtab, 0); - fmtprint(fmt, " %M '%s' len %ulld ndents %ulld melted m%#p\n", - (ulong)b->d.mode, usrname(b->d.uid), - b->d.length, b->d.ndents, b->mf->melted); - if(0){ - fmttab(fmt, mbtab, 0); - fmtprint(fmt, " id %#ullx mode %M mt %#ullx" - " '%s'\n", - EP(b->d.id), (ulong)b->d.mode, - EP(b->d.mtime), b->mf->uid); - } - mbtab++; - if(DBDIR(b)) - dumpdirdata(fmt, b); - for(i = 0; i < nelem(b->d.dptr); i++) - fmtptr(fmt, DBdata, b->d.dptr[i], "d", i); - for(i = 0; i < nelem(b->d.iptr); i++) - fmtptr(fmt, DBptr0+i, b->d.iptr[i], "i", i); - mbtab--; - break; - case DBsuper: - fmtprint(fmt, "\n"); - fmttab(fmt, mbtab, 0); - fmtprint(fmt, " free d%#010ullx eaddr d%#010ullx root d%#010ullx %s refs\n", - b->d.free, b->d.eaddr, b->d.root, - b->d.oddrefs?"odd":"even"); - break; - default: - if(b->type < DBptr0 || b->type >= DBptr0+Niptr){ - fmtprint(fmt, "", b->type); + for(;;){ + b = anew(&fs->mballoc); + if(b != nil) break; - } - fmtprint(fmt, " dr=%ulld\n", dbgetref(b->addr)); - mbtab++; - if(fullfiledumps) - for(i = 0; i < Dptrperblk; i++) - fmtptr(fmt, b->type-1, b->d.ptr[i], "p", i); - mbtab--; - break; + ainc(&fs->nmballocw); + if((nwait++ % 60) == 0) + warn("out of memory blocks. waiting"); + sendul(fs->lruc, 0); } - debug(); - return 0; -} - -/* - * Blocks are kept in a hash while loaded, to look them up. - * When in the hash, they fall into exactly one of this cases: - * - a super block or a fake mem block (e.g., cons, /), unlinked - * - a ref block, linked in the fs->refs list - * - a clean block, linked in the fs clean list - * - a dirty block, linked in the fs dirty list. - * - * The policy function (eg fslru) keeps the lock on the list while - * releasing blocks from the hash. This implies locking in the wrong order. - * The "ispolicy" argument in some functions here indicates that the - * call is from the policy function. - */ - -void -ismelted(Memblk *b) -{ - if(b->frozen) - fatal("frozen at pc %#p", getcallerpc(&b)); -} - -void -munlink(List *l, Memblk *b, int ispolicy) -{ - if(!ispolicy) - xqlock(l); - if(b->lprev != nil) - b->lprev->lnext = b->lnext; - else - l->hd = b->lnext; - if(b->lnext != nil) - b->lnext->lprev = b->lprev; - else - l->tl = b->lprev; - b->lnext = nil; - b->lprev = nil; - l->n--; - if(!ispolicy) - xqunlock(l); - b->unlinkpc = getcallerpc(&l); -} - -static void -mlink(List *l, Memblk *b) -{ - assert(b->lnext == nil && b->lprev == nil); - xqlock(l); - b->lnext = l->hd; - if(l->hd != nil) - l->hd->lprev = b; - else - l->tl = b; - l->hd = b; - l->n++; - xqunlock(l); -} -static void -mlinklast(List *l, Memblk *b) -{ - xqlock(l); - b->lprev = l->tl; - if(l->tl != nil) - l->tl->lnext = b; + if(zeroit) + memset(b, 0, sizeof *b); else - l->hd = b; - l->tl = b; - l->n++; - xqunlock(l); -} - -List -mfilter(List *bl, int(*f)(Memblk*)) -{ - Memblk *b, *bnext; - List wl; - - memset(&wl, 0, sizeof wl); - xqlock(bl); - for(b = bl->hd; b != nil; b = bnext){ - bnext = b->lnext; - if(f(b)){ - munlink(bl, b, 1); - mlinklast(&wl, b); - } - } - xqunlock(bl); - return wl; -} - -void -mlistdump(char *tag, List *l) -{ - Memblk *b; - int i; - - fprint(2, "%s:", tag); - i = 0; - for(b = l->hd; b != nil; b = b->lnext){ - if(i++ % 5 == 0) - fprint(2, "\n\t"); - fprint(2, "d%#010ullx ", EP(b->addr)); - } - fprint(2, "\n"); + memset(&b->Meminfo, 0, sizeof b->Meminfo); + b->ref = 1; + xqlock(&b->slk); + mbset(b, MBmem); + b->type = DBfree; + b->addr = MKMADDR(b); + b->next = nil; + xqunlock(&b->slk); + dNprint("mballocz %H", b); + return b; } static void -mbused(Memblk *b) -{ - if(b->dirty != 0 || (b->addr&Fakeaddr) != 0) - return; - switch(b->type){ - case DBref: - case DBsuper: - break; - default: - munlink(&fs->clean, b, 0); - mlink(&fs->clean, b); - } -} - -void -mbunused(Memblk *b) +mbfree(Memblk *b) { - if(b->dirty || (b->addr&Fakeaddr) != 0) /* not on the clean list */ - return; - if(b->type == DBsuper || b->type == DBref) /* idem */ - return; - munlink(&fs->clean, b, 0); - mlinklast(&fs->clean, b); -} + Mfile *mf; -void -changed(Memblk *b) -{ - if(b->type != DBsuper) - ismelted(b); - if(b->dirty || (b->addr&Fakeaddr) != 0) - return; - lock(&b->dirtylk); - if(b->dirty){ - unlock(&b->dirtylk); + if(b == nil) return; - } - switch(b->type){ - case DBsuper: - case DBref: - b->dirty = 1; - break; - default: - assert(b->dirty == 0); - munlink(&fs->clean, b, 0); - b->dirty = 1; - mlink(&fs->dirty, b); - } - unlock(&b->dirtylk); -} + dNprint("mbfree m%#p %D\n", b, b->addr); + if(b->ref > 0) + fatal("mbfree: %D: %d refs", b->addr, b->ref); + if(b->state == MBfree) + fatal("mbfree: %D: double free\n", b->addr); + if(b->next != nil) + fatal("mbfree: %D: next", b->addr); + if(b->lnext != nil || b->lprev != nil) + fatal("mbfree: %D: linked", b->addr); -void -written(Memblk *b) -{ - lock(&b->dirtylk); - assert(b->dirty != 0); - switch(b->type){ - case DBsuper: - case DBref: - b->dirty = 0; - unlock(&b->dirtylk); - break; - default: - /* - * data blocks are removed from the dirty list, - * then written. They are not on the list while - * being written. - */ - assert(b->lprev == nil && b->lnext == nil); - b->dirty = 0; - unlock(&b->dirtylk); - - /* - * heuristic: frozen files that have a melted version - * are usually no longer useful. - */ - if(b->type == DBfile && b->mf->melted != nil) - mlinklast(&fs->clean, b); - else - mlink(&fs->clean, b); + if(b->mf != nil){ + mf = b->mf; + b->mf = nil; + if(mf->writer != 0 || mf->readers != 0) + fatal("mbfree: %D: %d writer %d readers", + b->addr, mf->writer, mf->readers); + afree(&mfalloc, mf); } + xqlock(&b->slk); + mbset(b, MBfree); + xqunlock(&b->slk); + afree(&fs->mballoc, b); } -static void -linkblock(Memblk *b) +int +mbhashed(Memblk *b) { - if((b->addr&Fakeaddr) != 0 || b->type == DBsuper) - return; - if(b->type == DBref) - mlink(&fs->refs, b); - else{ - assert(b->dirty == 0); - mlink(&fs->clean, b); - } + return b->next != nil; } -static void -unlinkblock(Memblk *b, int ispolicy) +void +mbhash(Memblk *b) { - if((b->addr&Fakeaddr) != 0) - return; - switch(b->type){ - case DBref: - fatal("unlinkblock: DBref"); - case DBsuper: - fatal("unlinkblock: DBsuper"); - } + uint hv; - if(b->dirty){ - assert(!ispolicy); - munlink(&fs->dirty, b, 0); - }else - munlink(&fs->clean, b, ispolicy); - b->unlinkpc = getcallerpc(&b); + hv = b->addr%nelem(fs->fhash); + xqlock(&fs->fhash[hv]); + b->next = fs->fhash[hv].b; + fs->fhash[hv].b = b; + xqunlock(&fs->fhash[hv]); } -/* - * hashing a block also implies placing it in the refs/clean/dirty lists. - * mbget has also the guts of mbhash, for new blocks. - */ -Memblk* -mbhash(Memblk *b) +int +mbunhash(Memblk *b, int onlyidle) { - Memblk **h; uint hv; + Memblk **h; + int r; + r = -1; hv = b->addr%nelem(fs->fhash); xqlock(&fs->fhash[hv]); for(h = &fs->fhash[hv].b; *h != nil; h = &(*h)->next) if((*h)->addr == b->addr){ - warn("mbhash: dup blocks:"); - warn("b=> %H*h=> %H", b, *h); - fatal("mbhash: dup"); + if(*h != b) + fatal("mbunhash: dup"); + break; } - *h = b; - if(b->next != nil) - fatal("mbhash: next"); - incref(b); - linkblock(b); + if(*h == nil) + fatal("mbunhash: %D: not found", b->addr); + if(onlyidle == 0 || b->ref == 1){ + *h = b->next; + b->next = nil; + r = 0; + }else + ainc(&fs->nmbunhashbusy); xqunlock(&fs->fhash[hv]); - return b; + return r; } /* - * unhashing a block also implies removing it from the refs/clean/dirty lists. - * + * clean -> out + * called with the clean list locked. + * will move the block out only if idle (ref == 1). */ int -mbunhash(Memblk *b, int ispolicy) +mblru(Memblk *b) { - Memblk **h; - uint hv; + if(b->type == DBsuper) + return -1; - if(b->type == DBref) - fatal("mbunhash: DBref"); - - hv = b->addr%nelem(fs->fhash); - if(ispolicy){ - if(!xcanqlock(&fs->fhash[hv])) - return 0; - }else - xqlock(&fs->fhash[hv]); - for(h = &fs->fhash[hv].b; *h != nil; h = &(*h)->next) - if((*h)->addr == b->addr){ - if(*h != b) - fatal("mbunhash: dup"); - *h = b->next; - b->next = nil; - unlinkblock(b, ispolicy); - b->unlinkpc = getcallerpc(&b); - xqunlock(&fs->fhash[hv]); - mbput(b); - return 1; - } - fatal("mbunhash: not found"); + if(xcanqlock(&b->slk) == 0){ /* busy; try later... */ + dprint("mblru: %D lock busy\n", b->addr); + return -1; + } + if(b->state != MBclean) + fatal("mblru: %D: not clean: %s", b->addr, sname(b->state)); + if(xcanqlock(&fs->clean)) + fatal("mblru: clean not locked"); + if(mbunhash(b, 1) < 0){ + xqunlock(&b->slk); + dprint("mblru: %D busy\n", b->addr); + return -1; + } + xmunlink(&fs->clean, b); + mbset(b, MBlru); + xqunlock(&b->slk); + dOprint("mblru: %D\n", b->addr); + if(b->type == DBtag){ + dprint("lru: %D DBtag out: -%ud ndfree\n", b->addr, b->d.tag[0]); + xqlock(&fs->superlk); + assert(fs->ndfree > b->d.tag[0]); + fs->ndfree -= b->d.tag[0]; + xqunlock(&fs->superlk); + } + mbput(b); /* hash and list ref */ return 0; } +/* + * If b is still on the out queue, move it to the head so + * syncproc finds it soon. We are waiting for it. + */ static void -mbfree(Memblk *b) +wahead(Memblk *b) { - Mfile *mf; - - if(b == nil) - return; - dNprint("mbfree m%#p d%#010ullx\n", b, b->addr); - if(b->ref > 0) - fatal("mbfree: d%#010ullx has %d refs\n", b->addr, b->ref); - if(b->type == DBfree) - fatal("mbfree: d%#010ullx double free:\n", b->addr); - if(b->next != nil) - fatal("mbfree: d%#010ullx has next\n", b->addr); - if(b->lnext != nil || b->lprev != nil) - fatal("mbfree: d%#010ullx has lnext/lprev\n", b->addr); + Memblk *bl; - /* this could panic, but errors reading a block might cause it */ - if(b->type == DBref) - warn("free of DBref. i/o errors?"); - - if(b->mf != nil){ - mf = b->mf; - b->mf = nil; - mbput(mf->melted); - assert(mf->writer == 0 && mf->readers == 0); - afree(&mfalloc, mf); + xqlock(&fs->out); + for(bl = fs->out.hd; bl != nil; bl = bl->lnext) + if(bl == b) + break; + if(bl != nil){ + xmunlink(&fs->out, b); + xmlinkhd(&fs->out, b); } - - xqlock(fs); - fs->nmused--; - fs->nmfree++; - b->next = fs->free; - fs->free = b; - xqunlock(fs); + xqunlock(&fs->out); } -Memblk* -mballocz(daddrt addr, int zeroit) +/* + * clean -> mem + * The block is returned unlocked, but can move from mem to out + * only while the system is quiescent. + */ +daddrt +mbrenew(Memblk *b) { - Memblk *b; - static int nwait; + int nwait; + daddrt oaddr; + + if(b->type == DBsuper) + return 0; + ainc(&fs->nmbrenew); + nwait = 0; for(;;){ - xqlock(fs); - if(fs->free != nil){ - b = fs->free; - fs->free = b->next; - fs->nmfree--; - b->next = nil; - break; - } - if(fs->nblk < fs->nablk){ - b = &fs->blk[fs->nblk++]; + xqlock(&b->slk); + if(b->state == MBout){ + wahead(b); + ainc(&fs->mbreneww); + }else{ + if(nwait > 0) + dSprint("mbrenew: waited %d for %D", nwait, b->addr); break; } - xqunlock(fs); - if((nwait++ % 60) == 0) - warn("out of memory blocks. waiting"); - sleep(1000); + if((++nwait % 1000) == 0) + warn("mbrenew: block %D is dirty. waited %d", b->addr, nwait); + xqunlock(&b->slk); + nbsendul(fs->syncc, 0); /* ask for writing */ + if(nwait < 100) /* but don't wait for it, more blocks */ + sleep(0); /* may be added while we wait and we prefer */ + else if(nwait < 1000) /* to wahead the block we are waiting for */ + sleep(60); + else + sleep(1000); } - fs->nmused++; - xqunlock(fs); + if(b->state == MBmem || b->state == MBerr){ + xqunlock(&b->slk); + return 0; + } + if(b->state != MBclean) + fatal("mbrenew: %D: old %s", b->addr, sname(b->state)); + munlink(&fs->clean, b); + if(b->type == DBtag) + mlink(&fs->dtags, b); + else{ + mbunhash(b, 0); + oaddr = b->addr; + b->addr = MKMADDR(b); + if(b->type == DBfile || b->type == DBdir) + b->d.prev = oaddr; + mbhash(b); + } + mbset(b, MBmem); + xqunlock(&b->slk); + return b->addr; +} - if(zeroit) - memset(b, 0, sizeof *b); - else - memset(&b->Meminfo, 0, sizeof b->Meminfo); +/* + * Tags can be frozen (written) even if the fs is quiescent, + * this is to make sure a tag block does not leave MBmem after + * being renewed and before being locked. + */ +daddrt +mbrenewlocked(Memblk *b) +{ + daddrt addr; - b->addr = addr; - b->ref = 1; - dNprint("mballocz %H", b); - return b; + for(;;){ + addr = mbrenew(b); + xqlock(&b->slk); + if(b->state == MBmem) + return addr; + xqunlock(&b->slk); + } } -int -mbhashed(daddrt addr) +/* + * Make a new mem block of the given type. + */ +Memblk* +mbnew(int type) { Memblk *b; - uint hv; - hv = addr%nelem(fs->fhash); - xqlock(&fs->fhash[hv]); - for(b = fs->fhash[hv].b; b != nil; b = b->next) - if(b->addr == addr) - break; - xqunlock(&fs->fhash[hv]); - return b != nil; + if(fs->mode == Worm && type == DBtag) + fatal("mbnew: DBtag in worm"); + b = mballocz(1); + b->type = type; + incref(b); /* hash and list */ + if(b->type == DBfile || b->type == DBdir){ + assert(b->mf == nil); + b->mf = anew(&mfalloc); + } + mbhash(b); + return b; } +/* + * Return block for the given address. + * It might be unloaded, in which case the caller is responsible + * for reading it. + * If locked, the block is returned locked so its state + * does not change before used. + */ Memblk* -mbget(int type, daddrt addr, int mkit) +xmbget(daddrt addr, int locked, int load) { Memblk *b; uint hv; - if(catcherror()) - fatal("mbget: %r"); +again: hv = addr%nelem(fs->fhash); xqlock(&fs->fhash[hv]); for(b = fs->fhash[hv].b; b != nil; b = b->next) if(b->addr == addr){ - checktag(b->d.tag, type, addr); incref(b); break; } - if(mkit) - if(b == nil){ - b = mballocz(addr, 0); - b->loading = 1; - b->type = type; - b->d.tag = TAG(type, 0, addr); - /* mbhash() it, without releasing the locks */ - b->next = fs->fhash[hv].b; - fs->fhash[hv].b = b; - incref(b); - linkblock(b); - xqlock(&b->newlk); /* make others wait for it */ - }else if(b->loading){ + if(b == nil){ + if(load == 0){ xqunlock(&fs->fhash[hv]); - xqlock(&b->newlk); /* wait for it */ - xqunlock(&b->newlk); - if(b->loading){ - mbput(b); - dprint("mbget %#ullx -> i/o error\n", addr); - return nil; /* i/o error reading it */ - } - dMprint("mbget %#010ullx -> waited for m%#p\n", addr, b); - noerror(); - return b; + return nil; } - xqunlock(&fs->fhash[hv]); - if(b != nil) - mbused(b); - dMprint("mbget %#010ullx -> m%#p\n", addr, b); - noerror(); + b = mballocz(0); + b->addr = addr; + xqlock(&b->slk); + mbset(b, MBin); + incref(b); /* hash (& future list) */ + b->next = fs->fhash[hv].b; + fs->fhash[hv].b = b; + xqlock(&b->ldlk); /* make others wait for it */ + xqunlock(&b->slk); + xqunlock(&fs->fhash[hv]); + }else{ + xqunlock(&fs->fhash[hv]); + xqlock(&b->ldlk); /* make sure it's loaded */ + xqunlock(&b->ldlk); + } + + dMprint("mbload %D -> m%#p\n", addr, b); + if(locked){ + xqlock(&b->slk); + if(b->addr != addr){ + ainc(&fs->mbgetrace); + warn("mbload: %D renewed", b->addr); /* should not happen */ + xqunlock(&b->slk); + mbput(b); + goto again; + } + } return b; } +Memblk* +mbload(daddrt addr, int locked) +{ + return xmbget(addr, locked, 1); +} + +Memblk* +mbget(daddrt addr) +{ + return xmbget(addr, Unlocked, 0); +} + +Memblk* +mbgetlocked(daddrt addr) +{ + return xmbget(addr, Locked, 0); +} + void mbput(Memblk *b) { @@ -619,5 +379,13 @@ dMprint("mbput m%#p d%#010ullx pc=%#p\n", b, b->addr, getcallerpc(&b)); if(decref(b) == 0) mbfree(b); + else if(b->type != DBsuper){ + xqlock(&b->slk); + if(b->state == MBclean){ + /* dont "use" DBtag blocks with no free blocks */ + if(b->type != DBtag || b->d.tag[0] > 0) + mused(&fs->clean, b); /* lru */ + } + xqunlock(&b->slk); + } } - --- /sys/src/cmd/creepy/mkfile Thu Apr 26 19:07:04 2012 +++ /sys/src/cmd/creepy/mkfile Thu May 10 12:53:10 2012 @@ -2,10 +2,9 @@ TARG=\ - archer\ - cmd\ - fmt\ 9pix\ + fmt\ + cmd\ rip\ OFILES=\ @@ -13,10 +12,10 @@ mblk.$O\ dblk.$O\ fblk.$O\ + tools.$O\ attr.$O\ fsys.$O\ - tools.$O\ - check.$O\ + arch.$O\ IXOFILES=\ ixcall.$O\ @@ -44,8 +43,19 @@ $LD $LDFLAGS -o $target $prereq $O.rip: rip.$O $OFILES $IXOFILES $LIB $LD $LDFLAGS -o $target $prereq -$O.archer: archer.$O - $LD $LDFLAGS -o $target $prereq fns:V: c/f2p *.c >fns.h +wc:QV: + # FS + CFILES=`{echo $OFILES|sed 's/\.'^$O^'/.c/g'} + echo fs: + wc -l 9pix.c $CFILES dk.h dbg.h conf.h all.h + # Net + CFILES=`{echo $IXOFILES|sed 's/\.'^$O^'/.c/g'} + echo net: + wc -l $CFILES ix.h net.h + echo cmd: + wc -l cmd.c fmt.c + echo total: + cat *.[ch] | wc -l --- /sys/src/cmd/creepy/net.h Mon Mar 26 19:36:10 2012 +++ /sys/src/cmd/creepy/net.h Wed May 9 15:42:58 2012 @@ -27,7 +27,6 @@ Path* p; int omode; /* -1 if closed */ int rclose; - int archived; int cflags; /* OCERR|OCEND */ int consopen; /* for flush. has /cons open? */ int uid; @@ -36,6 +35,8 @@ int lidx; /* next dir entry index to read */ char* buf; /* for statsread() */ + int wormwr; /* fid can write in Worm mode */ + int afd; /* for afids */ int authok; /* for afids */ AuthRpc *rpc; /* for afids */ @@ -93,6 +94,7 @@ char *addr; int uid; ulong msize; + int noauth; QLock wlk; /* lock for writing replies to the client */ uchar wdata[IOHDRSZ+Maxmdata]; @@ -111,4 +113,3 @@ extern ulong ncalls[]; extern char *callname[]; extern Alloc fidalloc, rpcalloc, clialloc; -extern int noauth; --- /sys/src/cmd/creepy/rip.c Fri Mar 30 18:30:42 2012 +++ /sys/src/cmd/creepy/rip.c Thu May 10 12:55:32 2012 @@ -1,44 +1,13 @@ #include "all.h" - /* - * Requiescat in pace to all old files. - * They deserve it. - * - * This is a WORM archive for creepy files. - * It is actually a creepy 9pix that operates in worm mode: - * - only "main" and "archive" are valid attach specs. - * - only the owner can attach to "main" - * - dbrefs are not used (all blocks are kept forever). - * - there is no automatic sync proc. - * - * This is meant to be used by an archiving program that determines - * which files changed from day to day and writes them to the archive - * using the active tree of the worm, and calling sync() after that. - * - * XXX: modify sync so that in worm mode only ddmm[.n] dirs are kept - * if the sync is due to a console request. (other syncs are ok to flush - * changes to disk). - * - * XXX: The plan is that the archive can use an index file - * /active/idx/n0/.../n15 to map sha1 -> address, such that - * the archival program computes the index, looks if the file is already - * kept here, and uses a new "link new old" ctl in that case, and - * copies the file (and updates the index) otherwise. - * - * This permits the archival process to operate with multiple concurrent - * processes archiving files in parallel (and computing hashes in parallel). - * It's likely that's going to outperform fossil+venti. - * - * XXX: The owner must be always in allow mode for the active file tree. - * - * XXX: Change worm mode so that file->id value is the disk address. + * Creepy worm 9P and IX service. */ static void usage(void) { - fprint(2, "usage: %s [-DFLAGS] [-a] [-A addr] [-S srv] disk\n", argv0); - exits("usage"); + fprint(2, "usage: %s [-DFLAGS] [-a] [-c n] [-A addr] [-S srv] disk\n", argv0); + threadexits("usage"); } int mainstacksize = Stack; @@ -47,9 +16,12 @@ threadmain(int argc, char *argv[]) { char *addr, *dev, *srv; + int noauth, fsysmem; - addr = "tcp!*!dump"; - srv = "dump"; + noauth = 0; + fsysmem = 0; + srv = nil; + addr = nil; ARGBEGIN{ case 'A': addr = EARGF(usage()); @@ -60,6 +32,12 @@ case 'a': noauth = 1; break; + case 'c': + fsysmem = strtoul(EARGF(usage()), 0, 0); + if(fsysmem < 2 || fsysmem > Fsysmem/MiB) + sysfatal("mem size is too small or too large"); + fsysmem *= MiB; + break; default: if(ARGC() >= 'A' && ARGC() <= 'Z' || ARGC() == '9'){ dbg[ARGC()] = 1; @@ -69,10 +47,13 @@ }ARGEND; if(argc != 1) usage(); + if(srv == nil || addr == nil) + srv = "rip"; dev = argv[0]; if(dbg['d']) dbg['Z'] = 1; + threadsetname("rip %s", dev); outofmemoryexits(1); workerthreadcreate = proccreate; fmtinstall('H', mbfmt); @@ -83,19 +64,19 @@ fmtinstall('R', rpcfmt); fmtinstall('A', usrfmt); fmtinstall('P', pathfmt); + fmtinstall('D', daddrfmt); errinit(Errstack); if(catcherror()) fatal("uncatched error: %r"); rfork(RFNAMEG|RFNOTEG); - rwusers(nil); - fsopen(dev, Worm, Wr); + fsopen(dev, Worm, fsysmem); if(srv != nil) - srv9pix(srv, cliworker9p); + srv9pix(srv, noauth, cliworker9p); if(addr != nil) - listen9pix(addr, cliworker9p); + listen9pix(addr, noauth, cliworker9p); + consinit(); - proccreate(timeproc, nil, Stack); noerror(); threadexits(nil); } --- /sys/src/cmd/creepy/tools.c Thu Apr 26 19:07:04 2012 +++ /sys/src/cmd/creepy/tools.c Wed May 9 17:50:32 2012 @@ -8,7 +8,6 @@ static Lstat none; static Lstat *lstats; static int lstatson; -int fatalaborts; Alloc pathalloc = { @@ -16,56 +15,31 @@ .zeroing = 0, }; -void -fatal(char *fmt, ...) -{ - va_list arg; - char *s; - - va_start(arg, fmt); - s = vsmprint(fmt, arg); - vfprint(2, fmt, arg); - va_end(arg); - if(fs != nil && fs->dev != nil) - fprint(2, "%s: %s: fatal: %s\n", argv0, fs->dev, s); - else - fprint(2, "%s: fatal: %s\n", argv0, s); - free(s); - if(fatalaborts) - abort(); - threadexitsall("fatal"); -} - -void -warn(char *fmt, ...) -{ - va_list arg; - char *s; - - va_start(arg, fmt); - s = vsmprint(fmt, arg); - va_end(arg); - if(fs != nil && fs->dev != nil) - fprint(2, "%s: %s: %s\n", argv0, fs->dev, s); - else - fprint(2, "%s: %s\n", argv0, s); - free(s); -} - -void -warnerror(char *fmt, ...) +vlong +opstart(Opstat *o, vlong t) { - va_list arg; - char err[128]; + vlong t0; - va_start(arg, fmt); - vseprint(err, err+sizeof err, fmt, arg); - va_end(arg); - if(fs != nil && fs->dev != nil) - fprint(2, "%s: %s: %s\n", argv0, fs->dev, err); - else - fprint(2, "%s: %s\n", argv0, err); - error(err); + ainc(&o->ntimes); + t0 = t; + if(t0 == 0) + t0 = nsec(); + return t0; +} + +vlong +opend(Opstat *o, vlong t0) +{ + vlong t1, delta; + + t1 = nsec(); + delta = t1 - t0; + lock(o); + o->tot += delta; + if(o->max < delta) + o->max = delta; + unlock(o); + return t1; } void @@ -144,12 +118,6 @@ } } -void -xqunlock(QLock *q) -{ - qunlock(q); -} - int xcanqlock(QLock *q) { @@ -166,7 +134,7 @@ return canqlock(q); } -void +static void xrwlock(RWLock *rw, int iswr) { vlong t; @@ -199,12 +167,27 @@ } void -xrwunlock(RWLock *rw, int iswr) +xrlock(RWLock *rw) { - if(iswr) - wunlock(rw); - else - runlock(rw); + xrwlock(rw, Rd); +} + +void +xwlock(RWLock *rw) +{ + xrwlock(rw, Wr); +} + +void +xrunlock(RWLock *rw) +{ + runlock(rw); +} + +void +xwunlock(RWLock *rw) +{ + wunlock(rw); } void* @@ -212,18 +195,18 @@ { Next *n; - assert(a->elsz > 0); xqlock(a); n = a->free; if(n != nil){ a->free = n->next; a->nfree--; - }else{ + }else if(a->fixedsz == 0){ + assert(a->elsz > 0); a->nalloc++; n = mallocz(a->elsz, !a->zeroing); } xqunlock(a); - if(a->zeroing) + if(n != nil && a->zeroing) memset(n, 0, a->elsz); return n; @@ -244,6 +227,17 @@ xqunlock(a); } +ulong +asize(Alloc *a) +{ + ulong n; + + xqlock(a); + n = a->nfree; + xqunlock(a); + return n; +} + static void xaddelem(Path *p, Memblk *f) { @@ -347,3 +341,133 @@ fmtprint(fmt, "p[%d] = %H", i, p->f[i]); return 0; } + +void +xmunlink(List *l, Memblk *b) +{ + if(b->lprev == nil && b->lnext == nil && l->hd != b) + fatal("xmunlink: unlinked"); + if(b->lprev != nil) + b->lprev->lnext = b->lnext; + else + l->hd = b->lnext; + if(b->lnext != nil) + b->lnext->lprev = b->lprev; + else + l->tl = b->lprev; + b->lnext = nil; + b->lprev = nil; + l->n--; +} + +void +xmlinkhd(List *l, Memblk *b) +{ + if(b->lprev != nil || b->lnext != nil) + fatal("xmlinkhd: linked"); + b->lnext = l->hd; + if(l->hd != nil) + l->hd->lprev = b; + else + l->tl = b; + l->hd = b; + l->n++; +} + +static void +xmlinktl(List *l, Memblk *b) +{ + if(b->lprev != nil || b->lnext != nil) + fatal("xmlinkhd: linked"); + b->lprev = l->tl; + if(l->tl != nil) + l->tl->lnext = b; + else + l->hd = b; + l->tl = b; + l->n++; +} + +Memblk* +munlink(List *l, Memblk *b) +{ + xqlock(l); + if(b == nil) + b = l->hd; + if(b != nil) + xmunlink(l, b); + xqunlock(l); + return b; +} + +void +mlink(List *l, Memblk *b) +{ + assert(b->lnext == nil && b->lprev == nil); + xqlock(l); + xmlinktl(l, b); + xqunlock(l); +} + +void +mused(List *l, Memblk *b) +{ + xqlock(l); + xmunlink(l, b); + xmlinktl(l, b); + xqunlock(l); +} + +void +mlistdump(char *tag, List *l) +{ + Memblk *b; + int i; + + fprint(2, "%s:\t", tag); + i = 0; + if(l->hd == nil) + fprint(2, "none"); + for(b = l->hd; b != nil; b = b->lnext){ + if(++i % 5 == 0) + fprint(2, "\n\t"); + fprint(2, "%D ", b->addr); + } + fprint(2, "\n"); +} + +/* + * Adapted from rom cwfs(4). + * compute the next time after t + * that has hour hr and is not on + * day in bitpattern -- + * for automatic dumps + */ +long +nextime(long t, int hr) +{ + int nhr; + Tm *tm; + + if(hr < 0 || hr >= 24) + hr = 5; + for (;;){ + tm = localtime(t); + t -= tm->sec; + t -= tm->min*60; + nhr = tm->hour; + do{ + t += 60*60; + nhr++; + }while(nhr%24 != hr); + tm = localtime(t); + if(tm->hour != hr){ + t += 60*60; + tm = localtime(t); + if(tm->hour != hr) + t -= 60*60; + } + return t; + } +} +