More tiny fixes, plus a huge one for a bug that could make the disk become inconsistent after a previous failure. Initial version of the man page. beware: we don't trust this program yet, although it's in use here now. don't put it in production. Reference: /n/patches.lsub.org/patch/morecreeps Date: Thu Apr 26 17:22:44 CES 2012 Signed-off-by: nemo@lsub.org # rm /sys/src/cmd/creepy/fscmd.c # rm /sys/src/cmd/creepy/fsfmt.c # rm /sys/man/4/creepy --- /sys/src/cmd/creepy/cmd.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy/cmd.c Fri Apr 13 11:47:45 2012 @@ -0,0 +1,474 @@ +#include "all.h" + +/* + * HUGE warning: + * these commands do not perform checks at all. + * that means you know what you are doing if you use them. + * e.g., you can create multiple files with the same name + * in the same directory. + * + * This tool is only an aid for testing and debugging. + */ + +enum +{ + Nels = 64 +}; + +static char *fsdir; +static int verb; + +int +member(int uid, int member) +{ + return uid == member; +} + +int +allowed(int) +{ + return 1; +} + +int +usrid(char*) +{ + return 0; +} + +char* +usrname(int) +{ + return getuser(); +} + +void +meltfids(void) +{ +} + +void +rwusers(Memblk*) +{ +} + +char* +ninestats(char *s, char*, int, int) +{ + return s; +} + +char* +ixstats(char *s, char*, int, int) +{ + return s; +} + +void +countfidrefs(void) +{ +} + +/* + * Walks elems starting at f. + * Ok if nelems is 0. + */ +static char* +fsname(char *p) +{ + if(p[0] == '/') + return strdup(p); + if(fsdir) + return smprint("%s/%s", fsdir, p); + return strdup(p); +} + +static void +fscd(int, char *argv[]) +{ + free(fsdir); + fsdir = strdup(argv[1]); +} + +/* + * This is unrealistic in that it keeps the file locked + * during the entire put. This means that we can only give + * fslru() a chance before each put, and not before each + * write, because everything is going to be in use and dirty if + * we run out of memory. + */ +static void +fsput(int, char *argv[]) +{ + int fd; + char *fn; + Memblk *m, *f; + Dir *d; + char buf[4096]; + uvlong off; + long nw, nr; + Path *p; + char *nm; + + fd = open(argv[1], OREAD); + if(fd < 0) + error("open: %r\n"); + d = dirfstat(fd); + if(d == nil){ + error("dirfstat: %r\n"); + } + nm = fsname(argv[2]); + if(catcherror()){ + free(nm); + close(fd); + free(d); + error(nil); + } + p = walkto(nm, &fn); + if(catcherror()){ + putpath(p); + error(nil); + } + meltedpath(&p, p->nf, 1); + m = p->f[p->nf-1]; + if(catcherror()){ + rwunlock(m, Wr); + error(nil); + } + f = dfcreate(m, fn, usrid(d->uid), d->mode&(DMDIR|0777)); + noerror(); + addelem(&p, f); + decref(f); /* kept now in p */ + rwlock(f, Wr); + rwunlock(m, Wr); + if(catcherror()){ + rwunlock(f, Wr); + error(nil); + } + if((d->mode&DMDIR) == 0){ + off = 0; + for(;;){ + if(fsmemfree() < Mminfree) + fslru(); + nr = read(fd, buf, sizeof buf); + if(nr <= 0) + break; + nw = dfpwrite(f, buf, nr, &off); + dprint("wrote %ld of %ld bytes\n", nw, nr); + off += nr; + } + } + noerror(); + noerror(); + noerror(); + if(verb) + print("created %H\nat %H\n", f, m); + rwunlock(f, Wr); + free(nm); + putpath(p); + close(fd); + free(d); +} + +static void +fscat(int, char *argv[]) +{ + Memblk *f; + Mfile *m; + char buf[4096], *nm; + uvlong off; + long nr; + Path *p; + + nm = fsname(argv[2]); + if(catcherror()){ + free(nm); + error(nil); + } + p = walkto(nm, nil); + f = p->f[p->nf-1]; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + putpath(p); + error(nil); + } + m = f->mf; + print("cat %-30s\t%M\t%5ulld\t%s %ulld refs\n", + m->name, (ulong)f->d.mode, f->d.length, m->uid, dbgetref(f->addr)); + if((f->d.mode&DMDIR) == 0){ + off = 0; + for(;;){ + if(fsmemfree() < Mminfree) + fslru(); + nr = dfpread(f, buf, sizeof buf, off); + if(nr <= 0) + break; + write(1, buf, nr); + off += nr; + } + } + noerror(); + noerror(); + rwunlock(f, Rd); + putpath(p); + free(nm); +} + +static void +fsget(int, char *argv[]) +{ + Memblk *f; + Mfile *m; + char buf[4096], *nm; + uvlong off; + long nr; + int fd; + Path *p; + + fd = create(argv[1], OWRITE, 0664); + if(fd < 0) + error("create: %r\n"); + nm = fsname(argv[2]); + if(catcherror()){ + free(nm); + close(fd); + error(nil); + } + p = walkto(nm, nil); + f = p->f[p->nf-1]; + rwlock(f, Rd); + if(catcherror()){ + rwunlock(f, Rd); + putpath(p); + error(nil); + } + m = f->mf; + print("get %-30s\t%M\t%5ulld\t%s %ulld refs\n", + m->name, (ulong)f->d.mode, f->d.length, m->uid, dbgetref(f->addr)); + if((f->d.mode&DMDIR) == 0){ + off = 0; + for(;;){ + if(fsmemfree() < Mminfree) + fslru(); + nr = dfpread(f, buf, sizeof buf, off); + if(nr <= 0) + break; + if(write(fd, buf, nr) != nr){ + fprint(2, "%s: error: %r\n", argv[0]); + break; + } + off += nr; + } + } + close(fd); + noerror(); + noerror(); + rwunlock(f, Rd); + putpath(p); + free(nm); +} + +static void +fssnap(int, char**) +{ + fssync(); +} + +static void +fsrcl(int, char**) +{ + fsreclaim(); + fssync(); /* commit changes to disk */ +} + +static void +fsdmp(int, char *argv[]) +{ + fsdump(*argv[0] == 'l', strstr(argv[0], "all") != 0); +} + +static void +fsdbg(int, char *argv[]) +{ + char *s; + + memset(dbg, 0, sizeof dbg); + for(s = argv[1]; *s; s++) + dbg['D'] = dbg[*s] = 1; +} + +static void +fsout(int, char*[]) +{ + fslru(); +} + +static void +fsrm(int, char *argv[]) +{ + Memblk *f, *pf; + Path *p; + char *nm; + + nm = fsname(argv[1]); + if(catcherror()){ + free(nm); + error(nil); + } + p = walkto(nm, nil); + if(catcherror()){ + putpath(p); + error(nil); + } + if(p->nf < 2) + error("short path for rm"); + meltedpath(&p, p->nf-1, 1); + f = p->f[p->nf-1]; + pf = p->f[p->nf-2]; + rwlock(f, Wr); + if(catcherror()){ + rwunlock(f, Wr); + rwunlock(pf, Wr); + error(nil); + } + dfremove(pf, f); + p->f[p->nf-1] = nil; + noerror(); + noerror(); + noerror(); + rwunlock(pf, Wr); + putpath(p); + free(nm); +} + +static void +fsst(int, char**) +{ + fprint(2, "%s\n", updatestats(0, 1)); +} + +static void +fschk(int, char**) +{ + if(fscheck() != 0) + error("check failed"); +} + +static void +fserr(int, char *argv[]) +{ + if(*argv[0] == 'r'){ + swreaderr = atoi(argv[1]); + print("sw read err count = %d\n", swreaderr); + }else{ + swwriteerr = atoi(argv[1]); + print("sw write err count = %d\n", swwriteerr); + } +} + +static void +fspol(int, char**) +{ + fspolicy(Post); +} + +static void +usage(void) +{ + fprint(2, "usage: %s [-DFLAGS] [-dv] [-f disk] cmd...\n", argv0); + exits("usage"); +} + +static Cmd cmds[] = +{ + {"cd", fscd, 2, "cd!where"}, + {"put", fsput, 3, "put!src!dst"}, + {"get", fsget, 3, "get!dst!src"}, + {"cat", fscat, 3, "cat!what"}, + {"dump", fsdmp, 1, "dump"}, + {"dumpall", fsdmp, 1, "dumpall"}, + {"ldump", fsdmp, 1, "ldump"}, + {"ldumpall", fsdmp, 1, "ldumpall"}, + {"sync", fssnap, 1, "sync"}, + {"snap", fssnap, 1, "snap"}, + {"rcl", fsrcl, 1, "rcl"}, + {"dbg", fsdbg, 2, "dbg!n"}, + {"out", fsout, 1, "out"}, + {"rm", fsrm, 2, "rm!what"}, + {"stats", fsst, 1, "stats"}, + {"check", fschk, 1, "check"}, + {"rerr", fserr, 2, "rerr!n"}, + {"werr", fserr, 2, "werr!n"}, + {"pol", fspol, 1, "pol"}, +}; + +void +threadmain(int argc, char *argv[]) +{ + char *dev; + char *args[Nels]; + int i, j, nargs, check; + + dev = "disk"; + check = 0; + ARGBEGIN{ + case 'c': + check++; + break; + case 'v': + verb++; + break; + case 'f': + dev = EARGF(usage()); + break; + default: + if(ARGC() >= 'A' && ARGC() <= 'Z'){ + dbg['d'] = 1; + dbg[ARGC()] = 1; + }else + usage(); + }ARGEND; + if(argc == 0) + usage(); + fatalaborts = 1; + fmtinstall('H', mbfmt); + fmtinstall('M', dirmodefmt); + fmtinstall('P', pathfmt); + errinit(Errstack); + if(catcherror()){ + fprint(2, "cmd failed: %r\n"); + threadexitsall("failed"); + } + fsopen(dev, Normal, Wr); + for(i = 0; i < argc; i++){ + if(verb>1) + fsdump(0, Mem); + print("%% %s\n", argv[i]); + nargs = gettokens(argv[i], args, Nels, "!"); + for(j = 0; j < nelem(cmds); j++){ + if(strcmp(cmds[j].name, argv[i]) != 0) + continue; + if(cmds[j].nargs != 0 && cmds[j].nargs != nargs) + fprint(2, "usage: %s\n", cmds[j].usage); + else + cmds[j].f(nargs, args); + break; + } + if(j == nelem(cmds)){ + fprint(2, "no such command\n"); + for(j = 0; j < nelem(cmds); j++) + fprint(2, "\t%s\n", cmds[j].usage); + break; + } + if(check){ + print("%% check\n"); + fscheck(); + } + } + if(verb>1) + fsdump(0, Mem); + noerror(); + threadexitsall(nil); +} + --- /sys/src/cmd/creepy/dblk.c Thu Mar 29 22:37:54 2012 +++ /sys/src/cmd/creepy/dblk.c Wed Apr 25 18:23:00 2012 @@ -22,12 +22,31 @@ } void +dbcopy(daddrt dst, daddrt src) +{ + static Diskblk d; + static QLock lk; + + xqlock(&lk); + dWprint("dbcopy d%#010ullx -> d%#010ullx\n", src, dst); + if(pread(fs->fd, &d, sizeof d, src) != Dblksz){ + xqunlock(&lk); + warnerror("dbcopy: read: d%#ullx: %r", src); + } + if(pwrite(fs->fd, &d, sizeof d, dst) != Dblksz){ + xqunlock(&lk); + warnerror("dbcopy: write: d%#ullx: %r", dst); + } + xqunlock(&lk); +} + +void dbclear(u64int tag, daddrt addr) { static Diskblk d; static QLock lk; - dWprint("dbclear d%#ullx\n", addr); + dWprint("dbclear d%#010ullx\n", addr); xqlock(&lk); d.tag = tag; if(pwrite(fs->fd, &d, sizeof d, addr) != Dblksz){ @@ -484,6 +503,7 @@ * Write the block a b->addr. * DBrefs are written at even (b->addr) or odd (b->addr+DBlksz) * reference blocks as indicated by the frozen super block to be written. + * See comment in fsys.c:/^freezesuperrefs */ long dbwrite(Memblk *b) @@ -524,6 +544,12 @@ return Dblksz; } +/* + * Read a block from b->addr. + * DBrefs are written at even (b->addr) or odd (b->addr+DBlksz) + * reference blocks as indicated by the super block in used. + * See comment in fsys.c:/^freezesuperrefs + */ long dbread(Memblk *b) { Binary files disk.orig and disk differ --- /sys/src/cmd/creepy/dk.h Wed Apr 25 11:11:02 2012 +++ /sys/src/cmd/creepy/dk.h Wed Apr 25 18:41:20 2012 @@ -273,7 +273,7 @@ u64int dminattrsz; /* only for checking */ u64int ndptr; /* only for checking */ u64int niptr; /* only for checking */ - u64int _avail_; /* now unused */ + u64int dirtyrefs; /* 0 || we were writing ref blocks and failed */ u64int embedsz; /* only for checking */ u64int dptrperblk; /* only for checking */ }; @@ -405,7 +405,6 @@ int dirty; /* must be written */ int frozen; /* is frozen */ int loading; /* block is being read */ - int changed; /* for freerefs/writerefs */ QLock newlk; /* only to wait on DBnew blocks */ uintptr unlinkpc; --- /sys/src/cmd/creepy/fblk.c Fri Mar 30 13:45:56 2012 +++ /sys/src/cmd/creepy/fblk.c Wed Apr 25 15:42:15 2012 @@ -684,7 +684,6 @@ void dfremove(Memblk *p, Memblk *f) { - vlong n; /* funny as it seems, we may need extra blocks to melt */ if(fsfull()) @@ -711,7 +710,7 @@ noerror(); rwunlock(f, Wr); if(!catcherror()){ - n = dbput(f); + dbput(f); noerror(); } mbput(f); --- /sys/src/cmd/creepy/fmt.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/creepy/fmt.c Thu Apr 26 16:52:03 2012 @@ -0,0 +1,102 @@ +#include "all.h" + +int +usrid(char*) +{ + return 3; +} + +char* +usrname(int) +{ + return "sys"; +} + +int +member(int uid, int member) +{ + return uid == member; +} + +int +allowed(int) +{ + return 1; +} + +void +meltfids(void) +{ +} + +void +rwusers(Memblk*) +{ +} + +char* +ninestats(char *s, char*, int, int) +{ + return s; +} + +char* +ixstats(char *s, char*, int, int) +{ + return s; +} + +void +countfidrefs(void) +{ +} + +static void +usage(void) +{ + fprint(2, "usage: %s [-DFLAGS] [-vy] disk\n", argv0); + exits("usage"); +} + +void +threadmain(int argc, char *argv[]) +{ + char *dev; + int verb, force; + + dev = nil; + verb = force = 0; + ARGBEGIN{ + case 'v': + verb = 1; + break; + case 'y': + force = 1; + break; + default: + if((ARGC() >= 'A' && ARGC() <= 'Z') || ARGC() == '9'){ + dbg['d'] = 1; + dbg[ARGC()] = 1; + fatalaborts = 1; + }else + usage(); + }ARGEND; + if(argc == 1) + dev = argv[0]; + else + usage(); + fmtinstall('P', pathfmt); + fmtinstall('H', mbfmt); + fmtinstall('M', dirmodefmt); + errinit(Errstack); + if(catcherror()) + fatal("error: %r"); + fsfmt(dev, force); + if(verb) + fsdump(0, Mem); + else + print("%lld %ldK blocks\n", fs->ndblk, Dblksz/1024); + noerror(); + exits(nil); +} + --- /sys/src/cmd/creepy/fns.h Wed Apr 11 18:42:07 2012 +++ /sys/src/cmd/creepy/fns.h Wed Apr 25 18:37:52 2012 @@ -20,6 +20,7 @@ extern void countfidrefs(void); extern Memblk* dballocz(uint type, int dbit, int zeroit); extern void dbclear(u64int tag, daddrt addr); +extern void dbcopy(daddrt dst, daddrt src); extern daddrt dbcounted(daddrt addr); extern u64int dbcountref(daddrt addr); extern Memblk* dbdup(Memblk *b); @@ -75,7 +76,7 @@ extern int fslru(void); extern uvlong fsmemfree(void); extern void fsopen(char *dev, int worm, int canwr); -extern void fspolicy(int); +extern void fspolicy(int when); extern int fsreclaim(void); extern void fssync(void); extern void fssyncproc(void*); @@ -104,7 +105,7 @@ extern Memblk* mbhash(Memblk *b); extern int mbhashed(daddrt addr); extern void mbput(Memblk *b); -extern int mbunhash(Memblk *b, int isreclaim); +extern int mbunhash(Memblk *b, int ispolicy); extern void mbunused(Memblk *b); extern Path* meltedpath(Path **pp, int nth, int user); extern void meltedref(Memblk *rb); @@ -116,7 +117,7 @@ extern int member(int uid, int member); extern List mfilter(List *bl, int(*f)(Memblk*)); extern void mlistdump(char *tag, List *l); -extern void munlink(List *l, Memblk *b, int isreclaim); +extern void munlink(List *l, Memblk *b, int ispolicy); extern Fid* newfid(Cli* cli, int no); extern Path* newpath(Memblk *root); extern Rpc* newrpc(void); --- /sys/src/cmd/creepy/fsys.c Wed Apr 25 11:11:02 2012 +++ /sys/src/cmd/creepy/fsys.c Thu Apr 26 16:40:24 2012 @@ -201,14 +201,21 @@ * On memory, the super selects even (odd) refs (we read refs from there.) * To sync... * 1. we make a frozen super to indicate that odd (even) DBrefs are active. + * (the memory super is even (odd) and we fetch unloaded DBrefs from + * even (odd) blocks). * 2. we write odd (even) DBref blocks. + * * 3. the frozen super is written, indicating that odd (even) refs are in use. * (The disk is coherent now, pretending to use odd (even) refs). * 4. The memory super is udpated to select odd (even) DBref blocks. - * (from now on, we are loading refs from odd (even) blocks. - * 5. we update even (odd) DBref blocks, so we can get back to 1. - * with even/odd swapped. + * (from now on, we are fetch refs from odd (even) blocks. * + * While we are writing odd (even) refs, we flag in the on-disk superblock that + * a new set of references is being written. The flag is cleared when the + * new frozen super block is written. + * At start time, if the flag is set, we must copy all odd (even) ref blocks + * from even (odd) ref blocks, so they are coherent and we can continue + * switching between even/odd blocks. */ static void @@ -226,10 +233,8 @@ b->frozen = 1; b->dirty = 1; /* so it's written */ xqlock(&fs->refs); - for(rb = fs->refs.hd; rb != nil; rb = rb->lnext){ + for(rb = fs->refs.hd; rb != nil; rb = rb->lnext) rb->frozen = 1; - rb->changed = rb->dirty; - } xqunlock(&fs->refs); xqunlock(fs); } @@ -340,6 +345,22 @@ } } +static void +flagdirtyrefs(void) +{ + Diskblk *db; + u64int dirty; + daddrt addr; + + addr = fs->super->addr; + + db = nil; + addr += (uintptr)&db->dirtyrefs; /* ugly, but portable */ + dirty = 1; + if(pwrite(fs->fd, &dirty, sizeof dirty, addr) != sizeof dirty) + error("flagdirtyrefs: %r"); +} + /* * Freeze the file tree, keeping active as a new melted file * that refers to frozen children now in the archive. @@ -358,6 +379,7 @@ vlong t0; dZprint("freezing fs...\n"); + t0 = 0; if(fs->profile) t0 = fstime(nsec()); xqlock(&fs->fzlk); @@ -421,8 +443,11 @@ * and the state of the super-block. * After doing so, the state to be written on the disk is * coherent and corresponds to now. + * Flag the on-disk super block to report DBref blocks for the + * next epoch (even/odd) are being written. */ dprint("freezing refs...\n"); + flagdirtyrefs(); freezesuperrefs(); /* 3. Make a new active and replace the old one. @@ -526,35 +551,23 @@ fs->fzsuper = nil; } +/* + * We found that we crashed while writing a new set of DBref blocks. + * If the super is using even/odd refs, odd/even refs may be corrupted. + * Restore all of them for safety. + */ static void -syncref(daddrt addr) -{ - static Memblk b; - - b.addr = addr; - b.type = DBref; - dbread(&b); - if(fs->super->d.oddrefs == 0) /* then the old ones are odd */ - addr += Dblksz; - dWprint("syncref d%#010ullx at d%#010ullx\n", b.addr, addr); - if(pwrite(fs->fd, &b.d, sizeof b.d, addr) != sizeof b.d) - error("syncref: write: %r"); -} - -static void -syncrefs(void) +cleanrefs(void) { - Memblk *rb; + daddrt addr; - fs->super->d.oddrefs = !fs->super->d.oddrefs; - xqlock(&fs->refs); - rb = fs->refs.hd; - xqunlock(&fs->refs); - for(; rb != nil; rb = rb->lnext){ - if(rb->changed) - syncref(rb->addr); - rb->changed = 0; - } + warn("restoring DBref blocks..."); + for(addr = Dblk0addr; addr < fs->super->d.eaddr; addr += Dblksz*Nblkgrpsz) + if(fs->super->d.oddrefs != 0) + dbcopy(addr, addr+Dblksz); + else + dbcopy(addr+Dblksz, addr); + warn("DBref blocks restored"); } /* @@ -569,6 +582,7 @@ long nr, nb; dZprint("writing fs...\n"); + t0 = 0; if(fs->profile) t0 = fstime(nsec()); xqlock(&fs->fzlk); @@ -576,15 +590,16 @@ if(fs->fzsuper == nil) fatal("can't fswrite if we didn't fsfreeze"); if(catcherror()){ - fsoptime[Write] += nsec() - t0; + if(fs->profile) + fsoptime[Write] += nsec() - t0; xqunlock(&fs->fzlk); error(nil); } nr = writerefs(); nb = writedata(); writezsuper(); + fs->super->d.oddrefs = !fs->super->d.oddrefs; nb++; - syncrefs(); noerror(); if(fs->profile) fsoptime[Write] += fstime(nsec()) - t0; @@ -867,6 +882,12 @@ readsuper(); fs->worm = worm; fs->mode = canwr; + + if(!worm && fs->super->d.dirtyrefs){ + cleanrefs(); + fs->super->d.dirtyrefs = 0; + } + uid = usrid("sys"); xqlock(&fs->fzlk); fs->root = dfcreate(nil, "", uid, DMDIR|0555); @@ -1178,6 +1199,7 @@ if(!xcanqlock(&fs->policy)) /* another is doing it */ return; + if(catcherror()){ xqunlock(&fs->policy); return; @@ -1190,6 +1212,7 @@ fsdirtypcent(), longago ? "long ago" : ""); fssync(); } + noerror(); xqunlock(&fs->policy); } --- /sys/src/cmd/creepy/mkfile Fri Mar 30 18:17:46 2012 +++ /sys/src/cmd/creepy/mkfile Thu Apr 26 16:47:47 2012 @@ -3,8 +3,8 @@ TARG=\ archer\ - fscmd\ - fsfmt\ + cmd\ + fmt\ 9pix\ rip\ --- /sys/src/cmd/creepy/tools.c Wed Apr 11 19:19:04 2012 +++ /sys/src/cmd/creepy/tools.c Wed Apr 25 15:44:42 2012 @@ -153,10 +153,8 @@ int xcanqlock(QLock *q) { - vlong t; Lstat *lst; - t = 0; if(lstats != nil){ lst = getlstat(getcallerpc(&q), Tqlock); ainc(&lst->ntimes);