update ζfs → θfs sadly, we're keeping with naming the fs after its version number, rather than sticking with ζ and using successive values of the ζ function. and i still don't know of a program with an irratonal version number. Reference: /n/atom/patch/applied/thetafs Date: Thu Feb 20 02:20:18 CET 2014 Signed-off-by: quanstro@quanstro.net # rm /sys/src/cmd/ζfs/aoe.c # rm /sys/src/cmd/ζfs/aoe.h # rm /sys/src/cmd/ζfs/cache.c # rm /sys/src/cmd/ζfs/cons.c # rm /sys/src/cmd/ζfs/dat.h # rm /sys/src/cmd/ζfs/fis.h # rm /sys/src/cmd/ζfs/free.c # rm /sys/src/cmd/ζfs/fs.c # rm /sys/src/cmd/ζfs/hash.c # rm /sys/src/cmd/ζfs/macosx.c # rm /sys/src/cmd/ζfs/meta.c # rm /sys/src/cmd/ζfs/mkfile # rm /sys/src/cmd/ζfs/nfs.c # rm /sys/src/cmd/ζfs/plan9.c # rm /sys/src/cmd/ζfs/sunos.c # rm /sys/src/cmd/ζfs/super.c # rm /sys/src/cmd/ζfs/uid.c # rm /sys/src/cmd/ζfs/util.c --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:23 2014 @@ -0,0 +1,1136 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Adapted by Brian L. Stuart from: + * + * vblade — virtual aoe target + * copyright © 2007—2013 erik quanstrom + */ + +#include +#include +#include +#include /* irony */ +#include +#include +#include <9p.h> + +enum { + Eaddrlen = 6, /* only defined in kernel */ +}; +#include "aoe.h" + +#include "dat.h" + +enum { + Fclone, + Fdata, + Flast, + + Nether = 8, + Nvblade = 16, + Nmask = 10, + Nmaxout= 128, + Maxpkt = 10000, + Conflen = 1024, +}; + +typedef struct Vblade Vblade; + +struct Vblade { + vlong maxlba; + uint nmask; + Lock mlk; + uchar *mask; + int shelf; + int slot; + int clen; + char *config; +}; + +static Vblade vblade[Nvblade]; +static int nblade; + +static char *ethertab[Nether] = { + "#l0/ether0", +}; +static int etheridx = 1; +static int efdtab[Nether*Flast]; +static uchar pkttab[Nether][Maxpkt]; +static uchar bctab[Nether][Maxpkt]; +static int mtutab[Nether]; +static Ioproc *ioprocs[Nether]; + +static int +getmtu(char *p) +{ + char buf[50]; + int fd, mtu; + + snprint(buf, sizeof buf, "%s/mtu", p); + if((fd = open(buf, OREAD)) == -1) + return 2; + if(read(fd, buf, 36) < 0) + return 2; + close(fd); + buf[36] = 0; + mtu = strtoul(buf+12, 0, 0)-Aoehsz-Aoeatasz; + return mtu>>9; +} + +static int +aoeopen(Ioproc *io, char *e, int fds[]) +{ + char buf[128], ctl[13]; + int n; + + snprint(buf, sizeof buf, "%s/clone", e); + if((fds[Fclone] = ioopen(io, buf, ORDWR)) == -1) + return -1; + memset(ctl, 0, sizeof ctl); + if(ioread(io, fds[Fclone], ctl, sizeof ctl - 1) < 0) + return -1; + n = atoi(ctl); + snprint(buf, sizeof buf, "connect %d", Aoetype); + if(iowrite(io, fds[Fclone], buf, strlen(buf)) != strlen(buf)) + return -1; + snprint(buf, sizeof buf, "%s/%d/data", e, n); + fds[Fdata] = ioopen(io, buf, ORDWR); + return fds[Fdata]; +} + +static void +replyhdr(Aoehdr *h, Vblade *vblade) +{ + uchar ea[Eaddrlen]; + + memmove(ea, h->dst, Eaddrlen); + memmove(h->dst, h->src, Eaddrlen); + memmove(h->src, ea, Eaddrlen); + + hnputs(h->major, vblade->shelf); + h->minor = vblade->slot; + h->verflag |= AFrsp; +} + +static int +servebad(uchar *pkt, Vblade*, int) +{ + Aoehdr *h; + + h = (Aoehdr*)pkt; + h->verflag |= AFerr; + h->error = AEcmd; + + return Aoehsz; +} + +static uchar nilea[Eaddrlen]; + +/* +static void +savemask(Vblade *vb) +{ + uvlong qpath, meta; + + qpath = ((uvlong)TLun << 60) | (vb->shelf << 8) | vb->slot; + meta = q2m(-1, qpath, 0); + if(meta == 0) + return; + setmetaint(meta, "nmask", nil, vb->nmask); + if(vb->mask) + setmetablob(meta, "mask", nil, vb->mask, vb->nmask * Eaddrlen, 0); + else + setmetastr(meta, "mask", nil, "", 0); +} +*/ + +static int +servemask(uchar *pkt, Vblade *vb, int mtu) +{ + int i, j, r, e; + uchar mx[Nmask*Eaddrlen], *mtab[Nmask], *p; + Aoem *m; + Aoemd *d; + + m = (Aoem*)(pkt + Aoehsz); + if(m->mcnt > (mtu - Aoehsz - Aoemsz)/Aoemdsz) + return -1; + + if(!canlock(&vb->mlk)) + return -1; /* drop */ + + switch(m->mcmd){ + default: + unlock(&vb->mlk); + return servebad(pkt, vb, mtu); + case Medit: + memcpy(mx, vb->mask, vb->nmask*Eaddrlen); + j = 0; + for(i = 0; i < vb->nmask; i++){ + p = mx + i*Eaddrlen; + if(memcmp(p, nilea, Eaddrlen) != 0) + mtab[j++] = p; + } + e = 0; + p = pkt + Aoehsz + Aoemsz; + for(i = 0; i < m->mcnt && e == 0; i++){ + d = (Aoemd*)(p + i*Aoemdsz); + switch(d->dcmd){ + default: + e = MEunk; + break; + case MDnop: + break; + case MDadd: + for(i = 0; i < j; i++) + if(memcmp(d->ea, mtab[j], Eaddrlen) == 0) + continue; + if(j == Nmask) + e = MEfull; + else + memcpy(mtab[j++], d->ea, Eaddrlen); + break; + case MDdel: + for(i = 0; i < j; i++) + if(memcmp(d->ea, mtab[j], Eaddrlen) == 0) + break; + if(i < j){ + for(; i < j; i++) + mtab[i] = mtab[i+1]; + j--; + } + break; + } + } + + if(e != 0){ + m->merr = e; + r = Aoehsz + Aoemsz; + break; + } + + p = malloc(j*Eaddrlen); + if(p == nil){ + r = -1; + break; + } + + for(i = 0; i < j; i++) + memcpy(p+i*Eaddrlen, mtab[i], Eaddrlen); + free(vb->mask); + vb->nmask = j; + vb->mask = p; + case Mread: + m->mcnt = vb->nmask; + m->merr = 0; + p = pkt + Aoehsz + Aoemsz; + for(i = 0; i < m->mcnt; i++){ + d = (Aoemd*)(p + i*Aoemdsz); + d->dres = 0; + d->dcmd = MDnop; + memcpy(d->ea, vb->mask + i*Eaddrlen, Eaddrlen); + } + r = Aoehsz + Aoemsz + m->mcnt * Aoemdsz; + break; + } + + unlock(&vb->mlk); + return r; +} + +static void +saveconfig(Vblade *vb) +{ + uvlong qpath, meta; + + qpath = ((uvlong)TLun << 60) | (vb->shelf << 8) | vb->slot; + meta = q2m(-1, qpath, 0); + if(meta == 0) + return; + if(vb->config) + setmetastr(meta, "config", nil, vb->config, 0); + else + setmetastr(meta, "config", nil, "", 0); +} + +static int +serveconfig(uchar *pkt, Vblade *vb, int mtu) +{ + char *cfg; + int cmd, reqlen, len; + Aoehdr *h; + Aoecfg *q; + + h = (Aoehdr*)pkt; + q = (Aoecfg*)(pkt + Aoehsz); + + if(memcmp(h->src, h->dst, Eaddrlen) == 0) + return -1; + + reqlen = nhgets(q->cslen); + len = vb->clen; + cmd = q->verccmd&0xf; + cfg = (char*)(pkt + Aoehsz + Aoecfgsz); + + switch(cmd){ + case AQCtest: + if(reqlen != len) + return -1; + case AQCprefix: + if(reqlen > len) + return -1; + if(memcmp(vb->config, cfg, reqlen) != 0) + return -1; + case AQCread: + break; + case AQCset: + if(len && len != reqlen || memcmp(vb->config, cfg, reqlen) != 0){ + h->verflag |= AFerr; + h->error = AEcfg; + break; + } + case AQCfset: + if(reqlen > Conflen){ + h->verflag |= AFerr; + h->error = AEarg; + break; + } + free(vb->config); + vb->config = θmalloc(reqlen + 1); + memmove(vb->config, cfg, reqlen); + vb->clen = len = reqlen; + saveconfig(vb); + break; + default: + h->verflag |= AFerr; + h->error = AEarg; + break; + } + + if(vb->config) + memmove(cfg, vb->config, len); + hnputs(q->cslen, len); + hnputs(q->bufcnt, Nmaxout); + q->scnt = mtu; + hnputs(q->fwver, 2323); + q->verccmd = Aoever<<4 | cmd; + + return len; +} + +static ushort ident[256] = { + [47] 0x8000, + [49] 0x0200, + [50] 0x4000, + [83] 0x5400, + [84] 0x4000, + [86] 0x1400, + [87] 0x4000, + [93] 0x400b, +}; + +static void +idmoveto(char *a, int idx, int len, char *s) +{ + char *p; + + p = a+idx*2; + for(; len > 0; len -= 2) { + if(*s == 0) + p[1] = ' '; + else + p[1] = *s++; + if (*s == 0) + p[0] = ' '; + else + p[0] = *s++; + p += 2; + } +} + +static void +lbamoveto(char *p, int idx, int n, vlong lba) +{ + int i; + + p += idx*2; + for(i = 0; i < n; i++) + *p++ = lba>>i*8; +} + +enum { + Crd = 0x20, + Crdext = 0x24, + Cwr = 0x30, + Cwrext = 0x34, + Cid = 0xec, +}; + +static uvlong +getlba(uchar *p) +{ + uvlong v; + + v = p[0]; + v |= p[1]<<8; + v |= p[2]<<16; + v |= p[3]<<24; + v |= (uvlong)p[4]<<32; + v |= (uvlong)p[5]<<40; + return v; +} + +static void +putlba(uchar *p, vlong lba) +{ + p[0] = lba; + p[1] = lba>>8; + p[2] = lba>>16; + p[3] = lba>>24; + p[4] = lba>>32; + p[5] = lba>>40; +} + +static int +serveata(uchar *pkt, Vblade *vb, int mtu) +{ + char *buf; + int rbytes, bytes, len; + vlong lba, off, qpath; + Aoehdr *h; + Aoeata *a; + + h = (Aoehdr*)pkt; + a = (Aoeata*)(pkt + Aoehsz); + + buf = (char*)(pkt + Aoehsz + Aoeatasz); + lba = getlba(a->lba); + len = a->scnt<<9; + off = lba<<9; + + if(a->scnt > mtu || a->scnt == 0){ + h->verflag |= AFerr; + h->error = AEarg; + a->cmdstat = ASdrdy|ASerr; + return 0; + } + + if(a->cmdstat != Cid) + if(lba+a->scnt > vb->maxlba){ + a->errfeat = Eidnf; + a->cmdstat = ASdrdy|ASerr; + return 0; + } + + if(a->cmdstat&0xf0 == 0x20) + lba &= 0xfffffff; + switch(a->cmdstat){ + default: + a->errfeat = Eabrt; + a->cmdstat = ASdrdy|ASerr; + return 0; + case Cid: + memmove(buf, ident, sizeof ident); + idmoveto(buf, 27, 40, "Plan 9 Vblade"); + idmoveto(buf, 10, 20, "serial#"); + idmoveto(buf, 23, 8, "2"); + lbamoveto(buf, 60, 4, vb->maxlba); + lbamoveto(buf, 100, 8, vb->maxlba); + a->cmdstat = ASdrdy; + return 512; + break; + case Crd: + case Crdext: + qpath = ((uvlong)TLun << 60) | (vb->shelf << 8) | vb->slot; + bytes = θpread(-1, qpath, buf, len, off); + rbytes = bytes; + break; + case Cwr: + case Cwrext: + qpath = ((uvlong)TLun << 60) | (vb->shelf << 8) | vb->slot; + bytes = θpwrite(qpath, buf, len, off, 0); + rbytes = 0; + break; + } + if(bytes != len){ + a->errfeat = Eabrt; + a->cmdstat = ASdf|ASerr; + putlba(a->lba, lba+(len-bytes)>>9); + return 0; + } + + putlba(a->lba, lba+a->scnt); + a->scnt = 0; + a->errfeat = 0; + a->cmdstat = ASdrdy; + + return rbytes; +} + +static int +myea(Ioproc *io, uchar ea[6], char *p) +{ + char buf[50]; + int fd; + + snprint(buf, sizeof buf, "%s/addr", p); + if((fd = ioopen(io, buf, OREAD)) == -1) + return -1; + if(ioread(io, fd, buf, 12) < 12) + return -1; + ioclose(io, fd); + return parseether(ea, buf); +} + +static int +bcastpkt(uchar *pkt, uint shelf, uint slot, int i) +{ + Aoehdr *h; + + h = (Aoehdr*)pkt; + myea(ioprocs[i], h->dst, ethertab[i]); + memset(h->src, 0xff, Eaddrlen); + hnputs(h->type, Aoetype); + hnputs(h->major, shelf); + h->minor = slot; + h->cmd = ACconfig; + *(u32int*)h->tag = 0; + return Aoehsz + Aoecfgsz; +} + +static int +osdgetattr(Aoeosd *o, int len, uvlong pid, uvlong oid) +{ + MVal x; + uchar *inbuf, *outbuf, *end; + char *name, *strval; + uvlong meta; + int n, nn, typ, tot; + + name = smprint("%016ullx:%016ullx", pid, oid); + meta = q2m(-1, p2q(-1, name, 0), 0); + free(name); + if(meta == 0) { + o->oflag = 0x40; + return 0; + } + len -= 20; + inbuf = θmalloc(len); + memmove(inbuf, o->oaddr, len); + end = inbuf + len; + outbuf = o->opid; + tot = 0; + while(inbuf < end) { + name = (char *)inbuf; /* the compiler's obsession with signed and unsigned is annoying */ + nn = strlen(name); + inbuf += nn + 1; + typ = getmeta(-1, meta, name, &x); + switch(typ) { +/* + case MTshort: + if(tot + nn + 4 >= 8192) + goto done; + tot += nn + 4; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 'h'; + hnputs(outbuf, *((ushort *)x)); + outbuf += 2; + break; + case MTlong: + if(tot + nn + 6 >= 8192) + goto done; + tot += nn + 6; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 'l'; + hnputl(outbuf, *((ulong *)x)); + outbuf += 4; + break; +*/ + case MTint: + if(tot + nn + 10 >= 8192) + goto done; + tot += nn + 10; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 'v'; + hnputv(outbuf, x.val); + outbuf += 8; + break; + case MTistring: + n = strlen(x.str) + 1; + if(tot + nn + n + 3 >= 8192) + goto done; + tot += nn + n + 3; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 's'; + strcpy((char *)outbuf, x.str); + outbuf += n; + break; + case MTstring: + strval = getblob(-1, x.val, &n); + if(tot + nn + n + 3 >= 8192) + goto done; + tot += nn + n + 3; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 's'; + strcpy((char *)outbuf, strval); + free(strval); + outbuf += n; + break; + case MTblob: + strval = getblob(-1, x.val, &n); + if(tot + nn + n + 4 >= 8192) + goto done; + tot += nn + n + 4; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 'b'; + hnputs(outbuf, n); + outbuf += 2; + memmove(outbuf, strval, n); + free(strval); + outbuf += n; + break; + } + } +done: + brelease(meta); + return tot; +} + +static int +osdsetattr(Aoeosd *o, int len, uvlong pid, uvlong oid) +{ + uchar *buf, *end; + char *name; + uvlong meta; + int n; + + name = smprint("%016ullx:%016ullx", pid, oid); + meta = q2m(-1, p2q(-1, name, 0), 0); + free(name); + if(meta == 0) { + o->oflag = 0x40; + return 0; + } + buf = o->oaddr; + end = buf + len; + while(buf < end) { + name = (char *)buf; /* the compiler's obsession with signed and unsigned is annoying */ + buf += strlen(name) + 1; + switch(*buf) { + case 'h': + setmetaint(meta, name, nil, nhgets(buf + 1)); + buf += 3; + break; + case 'l': + setmetaint(meta, name, nil, nhgetl(buf + 1)); + buf += 5; + break; + case 'v': + setmetaint(meta, name, nil, nhgetv(buf + 1)); + buf += 9; + break; + case 's': + setmetastr(meta, name, nil, (char *)(buf + 1), 0); + buf += strlen((char *)(buf + 1)) + 2; + break; + case 'b': + n = *((ushort *)(buf + 1)); + setmetablob(meta, name, nil, buf + 3, n, 0); + buf += n + 1; + break; + } + } + return 0; +} + +static int +serveosd(Ioproc *io, uchar *pkt, int fd, int) +{ + Qid nqid; + Aoehdr *ah; + Aoeosd *o; + uchar *buf; + char *name; + uvlong x; + uvlong pid, oid, addr, meta, pmeta, dirblk, pqpath; + int n, len, rlen; + + ah = (Aoehdr *)pkt; + o = (Aoeosd *)(pkt + Aoehsz); + len = nhgets(o->olen); + /* for some commands, the pid, oid, or addr may be junk */ + pid = nhgetv(o->opid); + oid = nhgetv(o->ooid); + addr = nhgetv(o->oaddr); + + rlen = 0; + o->oflag = 0; +fprint(2, "OSD request: %016ullx:%016ullx len:%d cmd:%x addr:%ulld\n", pid, oid, len, o->ocmd, addr); + switch(o->ocmd) { + case AOCformat: + name = smprint("0000000000000000:0000000000000000"); + nqid.path = p2q(-1, name, 1); + nqid.vers = 0; + nqid.type = QTFILE; + meta = q2m(-1, nqid.path, 1); + setmetastr(meta, "name", nil, name, 0); + setmetaint(meta, "pid", nil, 0); + setmetaint(meta, "oid", nil, 0); + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + setmetaint(meta, "child", nil, 0); + setqhash(nqid.path, meta); + free(name); + savesuper(); + break; + case AOCcreate: + name = smprint("%016ullx:0000000000000000", pid); + pqpath = p2q(-1, name, 0); + pmeta = q2m(-1, pqpath, 0); + free(name); + if(pmeta == 0) { + o->oflag = 0x40; + break; + } + name = smprint("%016ullx:%016ullx", pid, oid); + nqid.path = p2q(-1, name, 1); + nqid.vers = 0; + nqid.type = QTFILE; + meta = q2m(-1, nqid.path, 1); + if(meta == 0) { + o->oflag = 0x40; + free(name); + break; + } + setmetastr(meta, "name", nil, name, 0); + setmetaint(meta, "pid", nil, pid); + setmetaint(meta, "oid", nil, oid); + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + setmetaint(meta, "length", nil, 0); + setmetaint(meta, "parent", nil, pqpath); + getmetaint(-1, pmeta, "child", &x); + setmetaint(meta, "sib", nil, x); + setmetaint(pmeta, "child", nil, nqid.path); + dirblk = allocblock(); + if(dirblk != 0) { + cbclean(dirblk); + cbwrite(dirblk); + brelease(dirblk); + } + setmetaint(meta, "index", nil, dirblk); + setqhash(nqid.path, meta); + free(name); + savesuper(); + break; + case AOClist: + name = smprint("%016ullx:%016ullx", pid, oid); + pqpath = p2q(-1, name, 0); + meta = q2m(-1, pqpath, 0); + getmetaint(-1, meta, "child", &pqpath); + buf = o->opid; + while(len > 0 && pqpath != 0) { + meta = q2m(-1, pqpath, 0); + if(meta == 0) + break; + if(pid == 0) + getmetaint(-1, meta, "pid", &x); + else + getmetaint(-1, meta, "oid", &x); + hnputv(buf, x); + buf += 8; + len -= 8; + getmetaint(-1, meta, "sib", &pqpath); + } + rlen = len = buf - o->opid; + hnputs(o->olen, len); + break; + case AOCread: + name = smprint("%016ullx:%016ullx", pid, oid); + pqpath = p2q(-1, name, 0); + buf = o->opid; + len = θpread(-1, pqpath, buf, len, addr); + rlen = len; + free(name); + break; + case AOCwrite: + name = smprint("%016ullx:%016ullx", pid, oid); + pqpath = p2q(-1, name, 0); + buf = o->oaddr + 8; + len = θpwrite(pqpath, buf, len, addr, 1); + free(name); + break; + case AOCappend: + name = smprint("%016ullx:%016ullx", pid, oid); + pqpath = p2q(-1, name, 0); + buf = o->oaddr + 8; + len = θpwrite(pqpath, buf, len, 0, 2); + free(name); + break; + case AOCflush: + resetmeta(); + csync(); + break; + case AOCremove: + name = smprint("%016ullx:%016ullx", pid, oid); + meta = q2m(-1, p2q(-1, name, 0), 0); + if(meta == 0) { + o->oflag = 0x40; + free(name); + break; + } + getmetaint(-1, meta, "qpath", &x); + rmdlist(meta, x); + rmq(x, meta); + rmp(name); + free(name); + break; + case AOCpcreate: + name = smprint("0000000000000000:0000000000000000"); + pqpath = p2q(-1, name, 0); + pmeta = q2m(-1, pqpath, 0); + free(name); + if(pmeta == 0) { + o->oflag = 0x40; + break; + } + name = smprint("%016ullx:0000000000000000", pid); + nqid.path = p2q(-1, name, 1); + nqid.vers = 0; + nqid.type = QTFILE; + meta = q2m(-1, nqid.path, 1); + if(meta == 0) { + o->oflag = 0x40; + free(name); + break; + } + setmetastr(meta, "name", nil, name, 0); + setmetaint(meta, "pid", nil, pid); + setmetaint(meta, "oid", nil, 0); + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + setmetaint(meta, "parent", nil, pqpath); + setmetaint(meta, "child", nil, 0); + getmetaint(-1, pmeta, "child", &x); + setmetaint(meta, "sib", nil, x); + setmetaint(pmeta, "child", nil, nqid.path); + setqhash(nqid.path, meta); + free(name); + savesuper(); + break; + case AOCpremove: + name = smprint("%016ullx:0000000000000000", pid); + meta = q2m(-1, p2q(-1, name, 0), 0); + if(meta == 0) { + o->oflag = 0x40; + free(name); + break; + } + getmetaint(-1, meta, "child", &x); + if(x == 0) { + rmdlist(meta, x); + rmq(x, meta); + freeblock(meta); + rmp(name); + } + else { + o->oflag |= 0x40; + } + free(name); + break; + case AOCgetattr: + rlen = len = osdgetattr(o, len, pid, oid); + break; + case AOCsetattr: + rlen = len = osdsetattr(o, len, pid, oid); + break; + case AOCccreate: + case AOCcremove: + case AOCclist: + default: + o->oflag = 0x40; + break; + } + memmove(ah->dst, ah->src, Eaddrlen); + ah->verflag |= AFrsp; + if(o->oflag & 0x40) { + ah->verflag |= AFerr; + ah->error = AEarg; + } + o->oflag |= 0x80; + hnputs(o->olen, len); + n = rlen + 4 + sizeof(Aoehdr); + if(n < 60) + n = 60; + if(iowrite(io, fd, pkt, n) != n) { + fprint(2, "response write failed: %r\n"); + return -1; + } + return 0; +} + +static int +bladereply(Vblade *v, int i, int fd, uchar *pkt) +{ + int n; + Aoehdr *h; + + h = (Aoehdr*)pkt; + switch(h->cmd){ + case ACata: + n = serveata(pkt, v, mtutab[i]); + n += Aoehsz+Aoeatasz; + break; + case ACconfig: + n = serveconfig(pkt, v, mtutab[i]); + if(n >= 0) + n += Aoehsz+Aoecfgsz; + break; + case ACmask: + n = servemask(pkt, v, mtutab[i]); + break; + case ACosd: + if(v == vblade) + return serveosd(ioprocs[i], pkt, fd, mtutab[i]); + else + return 0; + break; + default: + n = servebad(pkt, v, mtutab[i]); + break; + } + if(n == -1) + return -1; + replyhdr(h, v); + if(n < 60){ + memset(pkt+n, 0, 60-n); + n = 60; + } + if(iowrite(ioprocs[i], fd, h, n) != n){ + fprint(2, "vblade: write to %s failed: %r\n", ethertab[i]); + return -1; + } + return 0; +} + +static int +filter(Vblade *v, uchar *ea) +{ + int i; + uchar *u; + + if(v->nmask == 0) + return 0; + + u = v->mask; + for(i = 0; i < v->nmask; i++) + if(memcmp(u + i*Eaddrlen, ea, Eaddrlen) == 0) + return 0; + return -1; +} + +static void +serve(void *a) +{ + int i, j, popcnt, vec, n, s, efd; + uchar *pkt, *bcpkt; + Aoehdr *h; + Vblade *v; + + i = (int)(uintptr)a; + + efd = efdtab[i*Flast+Fdata]; + pkt = pkttab[i]; + bcpkt = bctab[i]; + + n = 60; + h = (Aoehdr*)pkt; + bcastpkt(pkt, 0xffff, 0xff, i); + goto start; + + for(;;){ + n = ioread(ioprocs[i], efd, pkt, Maxpkt); + start: + if(shutdown) + threadexits(nil); + if(n < 60 || h->verflag & AFrsp) + continue; + s = nhgets(h->major); + popcnt = 0; + vec = 0; + for(j = 0; j < nblade; j++){ + v = vblade+j; + if(v->shelf == s || s == 0xffff) + if(v->slot == h->minor || h->minor == 0xff) + if(v->nmask == 0 || filter(v, h->src) == 0){ + popcnt++; + vec |= 1<0){ + memcpy(bcpkt, pkt, n); + bladereply(vblade + j, i, efd, bcpkt); + }else{ + bladereply(vblade + j, i, efd, pkt); + break; + } + } + } +} + +static void +aoeannounce(Vblade *vb) +{ + uchar *pkt; + int i; + + pkt = θmalloc(Maxpkt); + for(i = 0; i < etheridx; ++i) { + bcastpkt(pkt, 0xffff, 0xff, i); + bladereply(vb, i, efdtab[i*Flast+Fdata], pkt); + } +} + +void +starttarget(int major, int minor, uvlong nsect) +{ + Vblade *vp; + + vp = vblade + nblade; + vp->maxlba = nsect; + vp->nmask = 0; + vp->mask = nil; + vp->shelf = major; + vp->slot = minor; + vp->clen = 0; + ++nblade; + aoeannounce(vp); +} + +void +rmtarget(int major, int minor) +{ + int i; + + for(i = 0; i < nblade && (vblade[i].shelf != major || vblade[i].slot != minor); ++i) ; + if(i >= nblade) + return; + for(; i < nblade - 1; ++i) + vblade[i] = vblade[i+1]; + memset(vblade + i, 0, sizeof (Vblade)); + --nblade; +} + +static void +scanluns(void) +{ + uvlong x; + uvlong qpath, meta; + + for(qpath = super.firstlun; qpath; ) { + meta = q2m(-1, qpath, 0); + if(meta == 0) { + fprint(2, "No metadata for %ulld\n", qpath); + break; + } + getmetaint(-1, meta, "length", &x); + vblade[nblade].maxlba = x >> 9; + if(getmetaint(-1, meta, "nmask", &x) == MTnone) + vblade[nblade].nmask = 0; + else + vblade[nblade].nmask = x; + if(vblade[nblade].nmask != 0) { + if(getmeta(-1, meta, "masks", (MVal *)&x) == MTnone) + vblade[nblade].nmask = 0; + else + vblade[nblade].mask = getblob(-1, x, nil); + } + getmetaint(-1, meta, "aoemajor", &x); + vblade[nblade].shelf = x; + getmetaint(-1, meta, "aoeminor", &x); + vblade[nblade].slot = x; + if(vblade[nblade].config = getmetastr(-1, meta, "config")) + vblade[nblade].clen = strlen(vblade[nblade].config); + else + vblade[nblade].clen = 0; + ++nblade; + getmetaint(-1, meta, "nextlun", &qpath); + } +} + +static void +launch(char *tab[], int fdtab[]) +{ + int i; + + for(i = 0; tab[i]; i++){ + ioprocs[i] = ioproc(); + if(aoeopen(ioprocs[i], tab[i], fdtab+Flast*i) < 0) + sysfatal("network open: %r"); + threadcreate(serve, (void*)(uintptr)i, 32*1024); + } +} + +void +initaoe(void) +{ + int i; + + for(i = 0; i < etheridx; i++) + mtutab[i] = getmtu(ethertab[i]); + scanluns(); + launch(ethertab, efdtab); +} + +void +haltaoe(void) +{ + int i; + + for(i = 0; ethertab[i]; ++i) { + if(ioprocs[i]) { + iointerrupt(ioprocs[i]); + closeioproc(ioprocs[i]); + } + } +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:34 2014 @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +enum { + ACata, + ACconfig, + ACmask, + ACres, + ACkey, + ACosd, +}; + +enum { + AQCread, + AQCtest, + AQCprefix, + AQCset, + AQCfset, +}; + +enum { + AOCformat = 0x01, + AOCcreate, + AOClist, + AOCread = 0x05, + AOCwrite, + AOCappend, + AOCflush, + AOCremove = 0x0a, + AOCpcreate, + AOCpremove, + AOCgetattr = 0x0e, + AOCsetattr, + AOCccreate = 0x15, + AOCcremove, + AOCclist, +}; + +enum { + AEunk, + AEcmd, /* bad command */ + AEarg, /* bad argument */ + AEoff, /* device offline */ + AEcfg, /* config string already set */ + AEver, /* unsupported version */ + AEres, /* target reserved */ +}; + +enum { + /* mask commands */ + Mread = 0, + Medit, + + /* mask directives */ + MDnop = 0, + MDadd, + MDdel, + + /* mask errors */ + MEunk = 1, + MEbad, + MEfull, + + /* reserve / release */ + Rrread = 0, + Rrset, + Rrforce, +}; + +enum { + Aoetype = 0x88a2, + Aoesectsz = 512, + Aoemaxcfg = 1024, + + Aoehsz = 24, + Aoeatasz = 12, + Aoecfgsz = 8, + Aoerrsz = 2, + Aoemsz = 4, + Aoemdsz = 8, + + Aoever = 1, + + AFerr = 1<<2, + AFrsp = 1<<3, + + AAFwrite = 1, + AAFext = 1<<6, +}; + +typedef struct Aoehdr Aoehdr; +typedef struct Aoeata Aoeata; +typedef struct Aoecfg Aoecfg; +typedef struct Aoemd Aoemd; +typedef struct Aoem Aoem; +typedef struct Aoerr Aoerr; +typedef struct Aoeosd Aoeosd; + +struct Aoehdr { + uchar dst[Eaddrlen]; + uchar src[Eaddrlen]; + uchar type[2]; + uchar verflag; + uchar error; + uchar major[2]; + uchar minor; + uchar cmd; + uchar tag[4]; +}; + +struct Aoeata { + uchar aflag; + uchar errfeat; + uchar scnt; + uchar cmdstat; + uchar lba[6]; + uchar res[2]; +}; + +struct Aoecfg { + uchar bufcnt[2]; + uchar fwver[2]; + uchar scnt; + uchar verccmd; + uchar cslen[2]; +}; + +struct Aoemd { + uchar dres; + uchar dcmd; + uchar ea[Eaddrlen]; +}; + +struct Aoem { + uchar mres; + uchar mcmd; + uchar merr; + uchar mcnt; +}; + +struct Aoerr { + uchar rcmd; + uchar nea; + uchar ea0[]; +}; + +struct Aoeosd { + uchar ocmd; + uchar oflag; + uchar olen[2]; + uchar opid[8]; + uchar ooid[8]; + uchar oaddr[8]; +}; --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:34 2014 @@ -0,0 +1,862 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +enum { + CDirty = 1, + CFlushing = 2, + CFree = 4, + + CBrelease = 1, + CBclean, + CBread, + CBwrite, + CCanfree, + CReset, + + Ncht = 4001, + + Nreaders = 4, + + Ridle = 0, + Rloading = 1, +}; + +typedef struct CBlock CBlock; +typedef struct Cachereq Cachereq; +typedef struct Cacheresp Cacheresp; +typedef struct Reader Reader; + +struct CBlock { + Ref ref; + uvlong blkno; + uchar buf[BlkSize]; + int flags; + CBlock *next, *prev; + CBlock *htnext, *htprev; + CBlock *wnext, *wprev; +}; + +struct Cachereq { + int req; + uvlong blk; + Channel *resp; +}; + +struct Cacheresp { + int res; + void *p; +}; + +struct Reader { + char *dev; + Channel *rdchan; + int state; + uvlong loading; +}; + +static int mypid; +static Channel *wbtrigger; +static CBlock *ht[Ncht]; +static CBlock *chd, *ctl; +static CBlock *whd, *wtl; +static CBlock *freehd; +static int maxcache; +static Ref ncache; +static Ref ndirty; +static Ref nwlist; +static int timertid; +static uvlong nmiss; +static uvlong nread; +static uvlong nwrite; +static ulong hrate; +static int syncing; +static Reader rds[Nreaders]; +static Lock calock; + +Channel *cachechan; + +/* + * Because all the allocs and frees are done in threads of the + * same process, we shouldn't need any locks + */ +static CBlock * +cballoc(void) +{ + CBlock *p; + + if(freehd == nil) + return θmalloc(sizeof(CBlock)); + lock(&calock); + p = freehd; + freehd = p->next; + if(!(p->flags & CFree)) + fprint(2, "Internal error: non-free block on free list\n"); + p->flags &= ~CFree; + p->next = nil; + unlock(&calock); + return p; +} + +static void +cbfree(CBlock *p) +{ + if(p->flags & CFree) + fprint(2, "Freeing already free block?!?!?\n"); + else if(p->ref.ref != 0) + fprint(2, "Freeing in use block\n"); + else if(p->next || p->prev || p->htnext || p->htprev || p->wnext || p->wprev) + fprint(2, "Freeing block in data structures\n"); + else { + lock(&calock); + p->flags |= CFree; + p->next = freehd; + freehd = p; + unlock(&calock); + } +} + +static CBlock * +lookup(uvlong blk) +{ + CBlock *p; + int idx; + + idx = blk % Ncht; + for(p = ht[idx]; p && p->blkno != blk; p = p->htnext) ; + return p; +} + +static void +updatestats(int hit) +{ + if(!hit) { + ++nmiss; + hrate = (999 * hrate + 500) / 1000; + } + else + hrate = (999 * hrate + 500) / 1000 + 1000; +} + +static void +insht(CBlock *p) +{ + int idx; + + idx = p->blkno % Ncht; + if(ht[idx]) + ht[idx]->htprev = p; + p->htnext = ht[idx]; + ht[idx] = p; +} + +static void +rmht(CBlock *p) +{ + CBlock *nxt, *prv; + int idx; + + idx = p->blkno % Ncht; + nxt = p->htnext; + prv = p->htprev; + if(nxt) + nxt->htprev = prv; + if(prv) + prv->htnext = nxt; + if(ht[idx] == p) + ht[idx] = nxt; + p->htnext = nil; + p->htprev = nil; +} + +static void +inslru(CBlock *p) +{ + if(chd == nil) + chd = p; + else + ctl->next = p; + p->prev = ctl; + p->next = nil; + ctl = p; + incref(&ncache); +} + +static void +rmlru(CBlock *p) +{ + if(p->next) + p->next->prev = p->prev; + else + ctl = p->prev; + if(p->prev) + p->prev->next = p->next; + else + chd = p->next; + p->next = nil; + p->prev = nil; + decref(&ncache); +} + +static void +insw(CBlock *p) +{ + if(whd == nil) + whd = p; + else + wtl->wnext = p; + p->wprev = wtl; + p->wnext = nil; + wtl = p; + incref(&nwlist); +} + +static void +rmw(CBlock *p) +{ + if(p->wnext) + p->wnext->wprev = p->wprev; + else + wtl = p->wprev; + if(p->wprev) + p->wprev->wnext = p->wnext; + else + whd = p->wnext; + p->wnext = nil; + p->wprev = nil; + decref(&nwlist); +} + +static void +mvlru(CBlock *p) +{ + if(p != ctl) { + rmlru(p); + inslru(p); + } +} + +static void +dolru(void) +{ + CBlock *p; + int i; + + if(ncache.ref < maxcache) + return; + for(p = chd, i = 0; p && (p->ref.ref > 0 || (p->flags & (CDirty | CFlushing))); p = p->next, ++i) + if(i > ncache.ref) { + fprint(2, "cycle in LRU list? n:%ld d:%ld\n", ncache.ref, ndirty.ref); + chd->prev = nil; + ctl->next = nil; + return; + } + if(p) { + rmht(p); + rmlru(p); + if(p->flags & CDirty) { + decref(&ndirty); + p->flags &= ~CDirty; + rmw(p); + } + cbfree(p); + } +} + +static long +_iopread(va_list *arg) +{ + void *a; + vlong off; + long n; + int fd; + + fd = va_arg(*arg, int); + a = va_arg(*arg, void*); + n = va_arg(*arg, long); + off = va_arg(*arg, vlong); + return pread(fd, a, n, off); +} + +static long +_iopwrite(va_list *arg) +{ + void *a; + vlong off; + long n; + int fd; + + fd = va_arg(*arg, int); + a = va_arg(*arg, void*); + n = va_arg(*arg, long); + off = va_arg(*arg, vlong); + return pwrite(fd, a, n, off); +} + +static void +wbtimer(void *) +{ + while(!shutdown) { + sleep(15000); + if(syncing != 1) + sendul(wbtrigger, 1); + } + sendul(wbtrigger, 1); +} + +static void +wbthread(void *d) +{ + Ioproc *wbio; + CBlock *p; + char *dev; + int fd; + + dev = d; + wbio = ioproc(); + fd = ioopen(wbio, dev, ORDWR); + if(fd < 0) + sysfatal("wb open: %r"); + while(!shutdown) { + recvul(wbtrigger); + syncing = 1; + do { + for(p = whd; p && p->ref.ref > 0 && p->blkno >= super.firstdat; p = p->wnext) ; + if(p) { + p->flags |= CFlushing; + p->flags &= ~CDirty; + decref(&ndirty); + rmw(p); + ++nwrite; + iocall(wbio, _iopwrite, fd, p->buf, BlkSize, p->blkno * BlkSize); + p->flags &= ~CFlushing; + } + } while(p); + syncing = 0; + } + ioclose(wbio, fd); + closeioproc(wbio); +} + +static int +_brelease(uvlong blk) +{ + CBlock *p; + int rv; + + rv = 0; + p = lookup(blk); + if(p) { + if(p->ref.ref == 0) { + fprint(2, "trying to decrement below 0: blk %ulld\n", blk); + rv = -1; + } + else + decref(&p->ref); + } + else + rv = -1; + dolru(); + return rv; +} + +static void * +_cbclean(uvlong blk) +{ + CBlock *p; + + p = lookup(blk); + if(p) { + memset(p->buf, 0, BlkSize); + mvlru(p); + incref(&p->ref); + updatestats(1); + return p->buf; + } + updatestats(0); + dolru(); + p = cballoc(); + memset(p->buf, 0, BlkSize); + p->blkno = blk; + incref(&p->ref); + insht(p); + inslru(p); + return p->buf; +} + +static void +reader(void *a) +{ + Cachereq r; + Cacheresp rsp; + Ioproc *cio; + Reader *rp; + CBlock *p; + int cfd, i; + + rp = a; + cio = ioproc(); + cfd = ioopen(cio, rp->dev, ORDWR); + if(cfd < 0) + sysfatal("Couldn't open device: %r"); + while(1) { + if(recv(rp->rdchan, &r) == 0) { + if(shutdown) { + closeioproc(cio); + threadexits(nil); + } + continue; + } + /* + * See if it got loaded while it was in the channel queue + */ + p = lookup(r.blk); + if(p) { + mvlru(p); + incref(&p->ref); + updatestats(1); + rsp.p = p->buf; + send(r.resp, &rsp); + continue; + } + /* + * If another reader is already loading this block, pass off the request + * to that reader. That way, by the time this request gets looked at + * again, the block will already be loaded. + */ + for(i = 0; i < Nreaders && (rds[i].state != Rloading || rds[i].loading != r.blk); ++i) ; + if(i < Nreaders) { + send(rds[i].rdchan, &r); + continue; + } + rp->state = Rloading; + rp->loading = r.blk; + dolru(); + p = cballoc(); + p->blkno = r.blk; + incref(&p->ref); + ++nread; + if(iocall(cio, _iopread, cfd, p->buf, BlkSize, r.blk * BlkSize) != BlkSize) { + rp->state = Ridle; + cbfree(p); + rsp.p = nil; + send(r.resp, &rsp); + continue; + } + insht(p); + inslru(p); + rp->state = Ridle; + rsp.p = p->buf; + send(r.resp, &rsp); + } +} + +static void +_cbread(Cachereq *r) +{ + Cacheresp rsp; + CBlock *p; + static int rr; + + p = lookup(r->blk); + if(p) { + mvlru(p); + incref(&p->ref); + updatestats(1); + rsp.p = p->buf; + send(r->resp, &rsp); + return; + } + updatestats(0); + send(rds[rr].rdchan, r); + ++rr; + if(rr >= Nreaders) + rr = 0; +} + +static void +_cbwrite(uvlong blk) +{ + CBlock *p; + + p = lookup(blk); + if(p) { + mvlru(p); + if(!(p->flags & CDirty)) { + p->flags |= CDirty; + incref(&ndirty); + insw(p); + } + } + if(ndirty.ref > ncache.ref / 10 && !syncing) + nbsendul(wbtrigger, 1); +} + +static int +_ccanfree(uvlong blk) +{ + CBlock *p; + + p = lookup(blk); + if(p) { + if(p->ref.ref > 0 /* || (p->flags & (CDirty | CFlushing)) */ ) { + fprint(2, "Wanting to free block %ulld with ref %ld and flags %x\n", blk, p->ref.ref, p->flags); + return 0; + } + if(p->flags & CDirty) { + decref(&ndirty); + rmw(p); + p->flags &= ~CDirty; + } + rmht(p); + rmlru(p); + cbfree(p); + } + return 1; +} + +static void +_resetcache(void) +{ + CBlock *p; + + while(1) { + for(p = chd; p && p->ref.ref > 0; p = p->next) ; + if(p == nil) + break; + rmht(p); + rmlru(p); + cbfree(p); + } + if(chd) + fprint(2, "warning: active blocks during reset\n"); +} + +static void +handler(void *) +{ + Cacheresp rsp; + Cachereq r; + + mypid = threadpid(threadid()); + while(1) { + if(recv(cachechan, &r) == 0) { + if(shutdown) + threadexits(nil); + continue; + } + switch(r.req) { + case CBrelease: + rsp.res = _brelease(r.blk); + if(r.resp) + send(r.resp, &rsp); + break; + case CBclean: + rsp.p = _cbclean(r.blk); + send(r.resp, &rsp); + break; + case CBread: + _cbread(&r); + break; + case CBwrite: + _cbwrite(r.blk); + send(r.resp, &rsp); + break; + case CCanfree: + rsp.res = _ccanfree(r.blk); + send(r.resp, &rsp); + break; + case CReset: + _resetcache(); + send(r.resp, &rsp); + break; + } + } +} + +void +initcache(char *dev, int m) +{ + int i; + + maxcache = m; + for(i = 0; i < Nreaders; ++i) { + rds[i].dev = dev; + rds[i].rdchan = chancreate(sizeof(Cachereq), 10); + threadcreate(reader, &rds[i], 8192); + } + cachechan = chancreate(sizeof(Cachereq), 2); + threadcreate(handler, nil, 8192); + wbtrigger = chancreate(sizeof(ulong), 2); + threadcreate(wbthread, dev, 8192); + timertid = proccreate(wbtimer, nil, 1024); +} + +void +haltcache(void) +{ + int i; + + for(i = 0; i < Nreaders; ++i) + chanclose(rds[i].rdchan); + chanclose(cachechan); + threadkill(timertid); + sendul(wbtrigger, 1); + for(i = 0; i < 30 && whd; ++i) { + fprint(2, "."); + sleep(1000); + } +} + +int +brelease(uvlong blk) +{ + Cachereq r; + + if(mypid == threadpid(threadid())) +// return _brelease(blk); +{ +int n; +n=_brelease(blk); +if(n==-1) fprint(2, "brelease error called from %p\n", getcallerpc(&blk)); +return n; +} + r.req = CBrelease; + r.blk = blk; + r.resp = nil; + send(cachechan, &r); + return 0; +} + +void * +cbclean(uvlong blk) +{ + Cachereq r; + Cacheresp rsp; + + if(mypid == threadpid(threadid())) + return _cbclean(blk); + r.req = CBclean; + r.blk = blk; + r.resp = chancreate(sizeof(Cacheresp), 0); + send(cachechan, &r); + recv(r.resp, &rsp); + chanfree(r.resp); + return rsp.p; +} + +void * +cbread(uvlong blk) +{ + Cachereq r; + Cacheresp rsp; + CBlock *p; + + if(mypid == threadpid(threadid())) { + p = lookup(blk); + if(p) { + mvlru(p); + incref(&p->ref); + updatestats(1); + return p->buf; + } + } + r.req = CBread; + r.blk = blk; + r.resp = chancreate(sizeof(Cacheresp), 0); + send(cachechan, &r); + recv(r.resp, &rsp); + chanfree(r.resp); + return rsp.p; +} + +void +cbwrite(uvlong blk) +{ + Cachereq r; + Cacheresp rsp; + + if(mypid == threadpid(threadid())) { + _cbwrite(blk); + return; + } + r.req = CBwrite; + r.blk = blk; + r.resp = chancreate(sizeof(Cacheresp), 0); + send(cachechan, &r); + recv(r.resp, &rsp); + chanfree(r.resp); +} + +int +ccanfree(uvlong blk) +{ + Cachereq r; + Cacheresp rsp; + + if(mypid == threadpid(threadid())) + return _ccanfree(blk); + r.req = CCanfree; + r.blk = blk; + r.resp = chancreate(sizeof(Cacheresp), 0); + send(cachechan, &r); + recv(r.resp, &rsp); + chanfree(r.resp); + return rsp.res; +} + +int +cread(void *a, int n, uvlong off) +{ + uchar *p; + uvlong blk; + ulong boff; + + blk = off / BlkSize; + boff = off % BlkSize; + if(boff + n > BlkSize) { + fprint(2, "invalid block crossing\n"); + return -1; + } + p = cbread(blk); + if(p == nil) + return -1; + memmove(a, p + boff, n); + brelease(blk); + return n; +} + +int +cwrite(void *a, int n, uvlong off) +{ + uchar *p; + uvlong blk; + ulong boff; + + blk = off / BlkSize; + if(blk == 0) + return -1; + boff = off % BlkSize; + if(boff + n > BlkSize) { + fprint(2, "invalid block crossing\n"); + return -1; + } + p = cbread(blk); + if(p == nil) + return -1; + memmove(p + boff, a, n); + cbwrite(blk); + brelease(blk); + return n; +} + +void +csync(void) +{ + syncing = 2; + threadint(timertid); + while(syncing != 0) + yield(); +} + +static char cstatbuf[1024]; + +char * +prcstat(void) +{ + CBlock *cb; + char *p, *e; + int ldirty, i, nhash; + int refhist[10]; +int saidit = 0; + + ldirty = 0; + p = cstatbuf; + e = p + nelem(cstatbuf); + memset(refhist, 0, 10 * sizeof(int)); + p = seprint(p, e, "Cache stats:\n"); + p = seprint(p, e, "ncache: %ld\n", ncache.ref); + p = seprint(p, e, "nwlist: %ld\n", nwlist.ref); + p = seprint(p, e, "ndirty: %ld\n", ndirty.ref); + for(cb = chd; cb; cb = cb->next) { + if(cb->flags & CDirty) +{ +if(!saidit) {p = seprint(p, e, "dirty block ref:%ld blk:%ulld\n", cb->ref.ref, cb->blkno); ++saidit;} + ++ldirty; +} + if(cb->ref.ref < 0) { + p = seprint(p, e, "bad ref count: %ld on block %ulld; setting to 0\n", cb->ref.ref, cb->blkno); + cb->ref.ref = 0; + } + else if(cb->ref.ref >= 9) + ++refhist[9]; + else + ++refhist[cb->ref.ref]; + if(cb->ref.ref > 0) + p = seprint(p, e, "In use block: %ulld flags %ux\n", cb->blkno, cb->flags); + } + nhash = 0; + for(i = 0; i < Ncht; ++i) { + for(cb = ht[i]; cb; cb = cb->htnext) + ++nhash; + } + p = seprint(p, e, "nhash: %d\n", nhash); + p = seprint(p, e, "ldirty: %d\n", ldirty); + p = seprint(p, e, "nread: %ulld\n", nread); + p = seprint(p, e, "nwrite: %ulld\n", nwrite); + p = seprint(p, e, "nmiss: %ulld\n", nmiss); + p = seprint(p, e, "hit rate: %uld%%\n", (hrate + 5000) / 10000); + p = seprint(p, e, "ref count histogram:\n"); + p = seprint(p, e, " 0 1 2 3 4 5 6 7 8 >8\n"); + for(i = 0; i < 10; ++i) + p = seprint(p, e, "%4d ", refhist[i]); + seprint(p, e, "\n"); + return cstatbuf; +} + +void +resetcache(void) +{ + Cachereq r; + Cacheresp rsp; + + if(mypid == threadpid(threadid())) { + _resetcache(); + return; + } + r.req = CReset; + r.resp = chancreate(sizeof(Cacheresp), 0); + send(cachechan, &r); + recv(r.resp, &rsp); + chanfree(r.resp); +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:27 2014 @@ -0,0 +1,789 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include +#include "dat.h" + +enum { + CMallow, + CMblockuse, + CMcheckalloc, + CMcstat, + CMdisallow, + CMfixfamilies, + CMfixpaths, + CMhalt, + CMhelp, + CMhstat, + CMlcreate, + CMlls, + CMlmeta, + CMlrm, + CMmpred, + CMmprint, + CMmstat, + CMnewroot, + CMnfsdebug, + CMp2q, + CMp9debug, + CMphash, + CMpmeta, + CMq2m, + CMqmeta, + CMrecovermeta, + CMrevert, + CMrmp, + CMrootallow, + CMrootdisallow, + CMsetmeta, + CMsetmstruct, + CMsetqhash, + CMsnap, + CMsuper, + CMsync, +}; + +enum { + SecPerDay = 24 * 60 * 60, +}; + +static void θconsread(Req *); +static void θconswrite(Req *); + +static Srv θconssrv = { + .start = θstart, + .read = θconsread, + .write = θconswrite, +}; + +static int snapid; +static Channel *snaptrigger; +static Ioproc *consio; +static int pfd[2]; +static Cmdtab ctab[] = { + {CMallow, "allow", 1}, + {CMblockuse, "blockuse", 2}, + {CMcheckalloc, "checkalloc", 1}, + {CMcstat, "cstat", 1}, + {CMdisallow, "disallow", 1}, + {CMfixfamilies, "fixfamilies", 1}, + {CMfixpaths, "fixpaths", 1}, + {CMhalt, "halt", 1}, + {CMhelp, "help", 1}, + {CMhstat, "hstat", 1}, + {CMlcreate, "lcreate", 3}, + {CMlls, "lls", 1}, + {CMlmeta, "lmeta", 2}, + {CMlrm, "lrm", 2}, + {CMmpred, "mpred", 2}, + {CMmprint, "mprint", 2}, + {CMmstat, "mstat", 1}, + {CMnewroot, "newroot", 2}, + {CMnfsdebug, "nfsdebug", 0}, + {CMp2q, "p2q", 2}, + {CMp9debug, "p9debug", 2}, + {CMphash, "phash", 2}, + {CMpmeta, "pmeta", 2}, + {CMq2m, "q2m", 2}, + {CMqmeta, "qmeta", 2}, + {CMrecovermeta, "recovermeta", 1}, + {CMrevert, "revert", 2}, + {CMrmp, "rmp", 2}, + {CMrootallow, "rootallow", 1}, + {CMrootdisallow, "rootdisallow", 1}, + {CMsetmeta, "setmeta", 5}, + {CMsetmstruct, "setmstruct", 6}, + {CMsetqhash, "setqhash", 3}, + {CMsnap, "snap", 1}, + {CMsuper, "super", 1}, + {CMsync, "sync", 1}, +}; + +extern int chatty9p; + +int allow; +int rootallow; + +static void +showhelp(void) +{ + int i; + + for(i = 0; i < nelem(ctab); ++i) + fprint(pfd[1], "%-15s %d\n", ctab[i].cmd, ctab[i].narg); +} + +static void +lcreate(char *aoeid, uvlong size) +{ + Qid nqid; + uvlong meta, dirblk, now, nblk, pperb; + int sperb; + int aoemajor, aoeminor; + + sperb = BlkSize / 512; + pperb = BlkSize / 8; + nblk = (size + sperb - 1) / sperb; + if(nblk + 3 >= super.nfree) { + fprint(pfd[1], "Not enough space\n"); + return; + } + sscanf(aoeid, "%d.%d", &aoemajor, &aoeminor); + nqid.path = ((uvlong)TLun << 60) | (aoemajor << 8) | aoeminor; + nqid.vers = 0; + nqid.type = QTFILE; + meta = q2m(-1, nqid.path, 1); + if(meta == 0) { + fprint(pfd[1], "Creation failure\n"); + return; + } + setmetaint(meta, "aoemajor", nil, aoemajor); + setmetaint(meta, "aoeminor", nil, aoeminor); + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + now = nsec(); + setmetaint(meta, "ctime", nil, now); + setmetaint(meta, "length", nil, size << 9); + dirblk = allocblock(); + if(dirblk != 0) { + cbclean(dirblk); + cbwrite(dirblk); + brelease(dirblk); + } + if(nblk <= pperb) + setmetaint(meta, "index", nil, dirblk); + else if(nblk <= pperb * pperb) + setmetaint(meta, "indirect", nil, dirblk); + else + setmetaint(meta, "dblindir", nil, dirblk); + setmetaint(meta, "nextlun", nil, super.firstlun); + setqhash(nqid.path, meta); + super.firstlun = nqid.path; + savesuper(); + starttarget(aoemajor, aoeminor, size); + resetmeta(); + csync(); + fprint(pfd[1], "Created %d.%d with qid %ulld\n", aoemajor, aoeminor, nqid.path); +} + +static char llsbuf[1024]; + +static char * +lls(void) +{ + char *p, *e; + uvlong x; + uvlong qpath, meta, length; + int aoemajor, aoeminor; + + p = llsbuf; + e = llsbuf + nelem(llsbuf); + p = seprint(p, e, "Luns:\n"); + for(qpath = super.firstlun; qpath; ) { + meta = q2m(-1, qpath, 0); + if(meta == 0) { + seprint(p, e, "no metadata for %ulld\n", qpath); + return llsbuf; + } + getmetaint(-1, meta, "aoemajor", &x); + aoemajor = x; + getmetaint(-1, meta, "aoeminor", &x); + aoeminor = x; + getmetaint(-1, meta, "length", &x); + length = x; + p = seprint(p, e, "%d.%d %ulld\n", aoemajor, aoeminor, length); + getmetaint(-1, meta, "nextlun", &qpath); + } + return llsbuf; +} + +static void +lmeta(char *aoeid) +{ + uvlong qpath; + int aoemajor, aoeminor; + + sscanf(aoeid, "%d.%d", &aoemajor, &aoeminor); + qpath = ((uvlong)TLun << 60) | (aoemajor << 8) | aoeminor; + fprint(pfd[1], "metadata for %d.%d:\n", aoemajor, aoeminor); + prmeta(pfd[1], qpath); +} + +static void +lrm(char *aoeid) +{ + uvlong qpath, meta, nextlun, qt, mt; + int aoemajor, aoeminor; + + sscanf(aoeid, "%d.%d", &aoemajor, &aoeminor); + qpath = ((uvlong)TLun << 60) | (aoemajor << 8) | aoeminor; + meta = q2m(-1, qpath, 0); + if(meta == 0) { + fprint(pfd[1], "Not found\n"); + return; + } + freedata(meta); + getmetaint(-1, meta, "nextlun", &nextlun); + if(super.firstlun == qpath) { + super.firstlun = nextlun; + savesuper(); + } + else { + qt = super.firstlun; + while(1) { + mt = q2m(-1, qt, 0); + if(mt == 0) { + fprint(pfd[1], "Missing metadata in LUN set\n"); + goto bail; + } + getmetaint(-1, mt, "nextlun", &qt); + if(qt == qpath) + break; + } + setmetaint(mt, "nextlun", nil, nextlun); + } +bail: + rmq(qpath, meta); + freeblock(meta); + rmtarget(aoemajor, aoeminor); + resetmeta(); + csync(); +} + +static void +newroot(char *name) +{ + Qid rootqid; + char *me, *path; + uvlong meta; + vlong now; + + path = smprint("/%s", name); + rootqid.path = p2q(-1, path, 1); + meta = q2m(-1, rootqid.path, 1); + setmetastr(meta, "name", nil, path, 0); + rootqid.vers = 0; + rootqid.type = QTDIR; + setmetaint(meta, "qpath", nil, rootqid.path); + setmetaint(meta, "qvers", nil, rootqid.vers); + setmetaint(meta, "qtype", nil, rootqid.type); + setmetaint(meta, "mode", nil, DMDIR | 0775); + now = nsec(); + setmetaint(meta, "atime", nil, now); + setmetaint(meta, "mtime", nil, now); + setmetaint(meta, "length", nil, 0); + me = getuser(); + setmetastr(meta, "uid", nil, me, 0); + setmetastr(meta, "gid", nil, me, 0); + setmetastr(meta, "muid", nil, me, 0); + setmetaint(meta, "child", nil, 0); + setqhash(rootqid.path, meta); + savesuper(); + free(path); +} + +static char * +dosnap(void) +{ + Qid qid; + Tm *today; + char *me; + uvlong meta, now, dqid, dmeta, yqid, ymeta, x; + int fd, seq, n; + char path[128], sname[32]; + + dqid = p2q(-1, "/dump", 0); + if(dqid == 0) + return "no dump"; + dmeta = q2m(-1, dqid, 0); + snprint(path, 127, "%s/ctl", ddir); + fd = open(path, ORDWR); + if(fd < 0) + return "no snap"; + today = localtime(time(0)); + snprint(path, 127, "/dump/%04d", today->year + 1900); + seq = 0; + yqid = p2q(-1, path, 0); + if(yqid == 0) { + qid.path = p2q(-1, path, 1); + yqid = qid.path; + qid.vers = 0; + qid.type = QTDIR; + ymeta = q2m(-1, qid.path, 1); + snprint(path, 127, "%04d", today->year + 1900); + setmetastr(ymeta, "name", nil, path, 0); + setmetaint(ymeta, "qpath", nil, qid.path); + setmetaint(ymeta, "qvers", nil, qid.vers); + setmetaint(ymeta, "qtype", nil, qid.type); + setmetaint(ymeta, "mode", nil, DMDIR | 0775); + setmetaint(ymeta, "parent", nil, dqid); + now = nsec(); + setmetaint(ymeta, "atime", nil, now); + setmetaint(ymeta, "mtime", nil, now); + setmetaint(ymeta, "length", nil, 0); + me = getuser(); + setmetastr(ymeta, "uid", nil, me, 0); + setmetastr(ymeta, "gid", nil, me, 0); + setmetastr(ymeta, "muid", nil, me, 0); + getmetaint(-1, dmeta, "child", &x); + setmetaint(ymeta, "sib", nil, x); + setmetaint(dmeta, "child", nil, yqid); + setmetaint(ymeta, "child", nil, 0); + setqhash(qid.path, ymeta); + savesuper(); + snprint(path, 127, "/dump/%04d/%02d%02d", today->year + 1900, today->mon+1, today->mday); + } + else { + snprint(path, 127, "/dump/%04d/%02d%02d", today->year + 1900, today->mon+1, today->mday); + if(p2q(-1, path, 0) != 0) { + for(seq = 1; seq < 10; ++seq) { + snprint(path, 127, "/dump/%04d/%02d%02d%d", + today->year + 1900, today->mon+1, today->mday, seq); + if(p2q(-1, path, 0) == 0) + break; + } + if(seq >= 10) { + close(fd); + return "too many snaps"; + } + } + ymeta = q2m(-1, yqid, 0); + } + qid.path = p2q(-1, path, 1); + qid.vers = 0; + qid.type = QTDIR; + meta = q2m(-1, qid.path, 1); + if(seq == 0) { + snprint(path, 127, "%02d%02d", today->mon+1, today->mday); + snprint(sname, 31, "%s.%04d%02d%02d", + dname, today->year + 1900, today->mon+1, today->mday); + } + else { + snprint(path, 127, "%02d%02d%d", today->mon+1, today->mday, seq); + snprint(sname, 31, "%s.%04d%02d%02d%d", + dname, today->year + 1900, today->mon+1, today->mday, seq); + } + resetmeta(); + csync(); + n = fprint(fd, "snap %s %s", dname, sname); + close(fd); + if(n < 0) + return (char *)(~0); + setmetastr(meta, "name", nil, path, 0); + setmetaint(meta, "qpath", nil, qid.path); + setmetaint(meta, "qvers", nil, qid.vers); + setmetaint(meta, "qtype", nil, qid.type); + setmetaint(meta, "mode", nil, DMDIR | 0775); + setmetaint(meta, "parent", nil, yqid); + now = nsec(); + setmetaint(meta, "atime", nil, now); + setmetaint(meta, "mtime", nil, now); + setmetaint(meta, "length", nil, 0); + me = getuser(); + setmetastr(meta, "uid", nil, me, 0); + setmetastr(meta, "gid", nil, me, 0); + setmetastr(meta, "muid", nil, me, 0); + getmetaint(-1, ymeta, "child", &x); + setmetaint(meta, "sib", nil, x); + setmetaint(ymeta, "child", nil, qid.path); + setmetastr(meta, "snap", nil, sname, 0); + setqhash(qid.path, meta); + savesuper(); + return nil; +} + +static char * +revert(char *snap) +{ + char *path, *p; + int fd, n; + + path = smprint("%s/ctl", ddir); + fd = open(path, ORDWR); + free(path); + if(fd < 0) + return (char *)(~0); + p = strchr(snap, '/'); + if(p) + path = smprint("%s.%.*s%s", dname, (int)(p - snap), snap, p + 1); + else + path = smprint("%s.%s", dname, snap); + n = fprint(fd, "revert %s %s", dname, path); + free(path); + close(fd); + resetmeta(); + resetcache(); + if(n < 0) + return (char *)(~0); + return nil; +} + +static void +doshutdown(void) +{ + shutdown = 1; + threadkill(snapid); + haltaoe(); + haltnfs(); + halt9p(); + haltfree(); + haltcache(); + threadkillgrp(threadgetgrp()); +} + +void +docons(void *x) +{ + Cmdbuf *cb; + Cmdtab *ct; + char *s; + char buf[256]; + uvlong vl; + int n; + + USED(x); + while(1) { + fprint(pfd[1], "> "); + n = ioread(consio, pfd[1], buf, 255); + if(n <= 0) + return; + buf[n] = 0; + cb = parsecmd(buf, n); + if(cb == nil) { + fprint(pfd[1], "Unparsable command %s\n", buf); + continue; + } + if(cb->nf == 0) + continue; + ct = lookupcmd(cb, ctab, nelem(ctab)); + if(ct == nil) { + fprint(pfd[1], "%s: %r\n", buf); + continue; + } + switch(ct->index) { + case CMallow: + allow = 1; + break; + case CMblockuse: + blockuse(pfd[1], strtoull(cb->f[1], nil, 0)); + break; + case CMcheckalloc: + checkalloc(pfd[1]); + break; + case CMcstat: + fprint(pfd[1], "%s", prcstat()); + break; + case CMdisallow: + allow = 0; + break; + case CMfixfamilies: + fixfamilies(pfd[1]); + break; + case CMfixpaths: + fixpaths(pfd[1]); + break; + case CMhalt: + doshutdown(); + return; + case CMhelp: + showhelp(); + break; + case CMhstat: + fprint(pfd[1], "%s", prhstat()); + break; + case CMlcreate: + lcreate(cb->f[1], strtoull(cb->f[2], nil, 10)); + break; + case CMlls: + fprint(pfd[1], "%s", lls()); + break; + case CMlmeta: + lmeta(cb->f[1]); + break; + case CMlrm: + lrm(cb->f[1]); + break; + case CMmpred: + mpred(pfd[1], strtoull(cb->f[1], nil, 0)); + break; + case CMmprint: + mprint(pfd[1], strtoull(cb->f[1], nil, 0)); + break; + case CMmstat: + fprint(pfd[1], "%s", prmstat()); + break; + case CMnewroot: + newroot(cb->f[1]); + break; + case CMnfsdebug: + if(cb->nf < 2) + fprint(pfd[1], "%d\n", debugnfs); + else + debugnfs = atoi(cb->f[1]); + break; + case CMp2q: + vl = p2q(-1, cb->f[1], 0); + fprint(pfd[1], "%ulld\n", vl); + break; + case CMp9debug: + chatty9p = atoi(cb->f[1]); + break; + case CMphash: + showphash(pfd[1], cb->f[1]); + break; + case CMpmeta: + fprint(pfd[1], "metadata for %s\n", cb->f[1]); + prmeta(pfd[1], p2q(-1, cb->f[1], 0)); + break; + case CMq2m: + vl = q2m(-1, strtoull(cb->f[1], nil, 10), 0); + fprint(pfd[1], "%ulld\n", vl); + break; + case CMqmeta: + fprint(pfd[1], "metadata for %s\n", cb->f[1]); + prmeta(pfd[1], strtoull(cb->f[1], nil, 10)); + break; + case CMrecovermeta: + recovermeta(pfd[1]); + break; + case CMrevert: + s = revert(cb->f[1]); + if(s == (char *)(~0)) + fprint(pfd[1], "%r\n"); + else if(s) + fprint(pfd[1], "%s\n", s); + break; + case CMrmp: + rmp(cb->f[1]); + break; + case CMrootallow: + rootallow = 1; + break; + case CMrootdisallow: + rootallow = 0; + break; + case CMsetmeta: + vl = q2m(-1, strtoull(cb->f[1], nil, 10), 0); + if(cb->f[2][0] == 's') + setmetastr(vl, cb->f[3], nil, cb->f[4], 0); + else + setmetaint(vl, cb->f[3], nil, strtoull(cb->f[4], nil, 0)); + break; + case CMsetmstruct: + vl = strtoull(cb->f[1], nil, 0); + setmstruct(vl, strtoull(cb->f[2], nil, 0), cb->f[3], atoi(cb->f[4]), strtoull(cb->f[5], nil, 0)); + break; + case CMsetqhash: + setqhash(strtoull(cb->f[1], nil, 0), strtoull(cb->f[2], nil, 0)); + break; + case CMsnap: + s = dosnap(); + if(s == (char *)(~0)) + fprint(pfd[1], "%r\n"); + else if(s) + fprint(pfd[1], "%s\n", s); + break; + case CMsuper: + fprint(pfd[1], "%s", prsuper()); + break; + case CMsync: + resetmeta(); + csync(); + break; + } + } +} + +static void +θconsread(Req *r) +{ + char *s; + + s = smprint("%s\n%s\n%s\n%s\n%s", prsuper(), prcstat(), prmstat(), prhstat(), lls()); + readstr(r, s); + free(s); + respond(r, nil); +} + +static void +θconswrite(Req *r) +{ + Cmdbuf *cb; + Cmdtab *ct; + char *s; + + s = nil; + cb = parsecmd(r->ifcall.data, r->ifcall.count); + if(cb == nil) { + respond(r, "unparsable command"); + return; + } + if(cb->nf == 0) { + respond(r, nil); + return; + } + ct = lookupcmd(cb, ctab, nelem(ctab)); + if(ct == nil) { + respond(r, r->error); + return; + } + switch(ct->index) { + case CMallow: + allow = 1; + break; + case CMcheckalloc: + checkalloc(pfd[1]); + break; + case CMdisallow: + allow = 0; + break; + case CMlcreate: + lcreate(cb->f[1], strtoull(cb->f[2], nil, 10)); + break; + case CMlrm: + lrm(cb->f[1]); + break; + case CMnewroot: + newroot(cb->f[1]); + break; + case CMp9debug: + chatty9p = atoi(cb->f[1]); + break; + case CMrevert: + s = revert(cb->f[1]); + break; + case CMrmp: + rmp(cb->f[1]); + break; + case CMrootallow: + rootallow = 1; + break; + case CMrootdisallow: + rootallow = 0; + break; + case CMsetmstruct: + setmstruct(strtoull(cb->f[1], nil, 0), strtoull(cb->f[2], nil, 0), cb->f[3], atoi(cb->f[4]), strtoull(cb->f[5], nil, 0)); + break; + case CMsetqhash: + setqhash(strtoull(cb->f[1], nil, 0), strtoull(cb->f[2], nil, 0)); + break; + case CMsnap: + s = dosnap(); + break; + case CMsync: + resetmeta(); + csync(); + break; + default: + s = "unsupported ctl command"; + break; + } + if(s == (char *)(~0)) + respond(r, r->error); + else + respond(r, s); +} + +static void +mysrvproc(void *a) +{ + Srv *s; + int data; + + s = a; + data = s->infd; + srv(s); + close(data); + threadexits(nil); +} + +static void +snapthread(void *) +{ + while(1) { + recvul(snaptrigger); + if(shutdown) + break; + dosnap(); + } + threadexits(nil); +} + +static void +snapproc(void *) +{ +// Tm *now; + ulong cursec, waitsec; + + sleep(300*1000); /* Give sometime to get the clock set before looking at tod */ + while(1) { + /* + * We'd like to get the time zone correction here, but + * it's doesn't play nice with the threading. I'll come + * back to this later. + */ +// now = localtime(time(nil)); +// cursec = (now->hour * 60 + now->min) * 60 + now->sec; + cursec = time(nil) % SecPerDay; + waitsec = (super.snaptime + SecPerDay - cursec) % SecPerDay; + if(waitsec < 60) + waitsec = SecPerDay; + sleep(waitsec*1000); + sendul(snaptrigger, 1); + if(shutdown) + break; + } + threadexits(nil); +} + +void +initcons(int postcons) +{ + char *me; + int cfd[2]; + + if(postcons) { + consio = ioproc(); + me = getuser(); + θconssrv.tree = alloctree(me, me, 0555, nil); + createfile(θconssrv.tree->root, "θfsctl", me, 0664, nil); + if(pipe(cfd) < 0) + sysfatal("pipe: %r"); + θconssrv.infd = θconssrv.outfd = cfd[1]; + conspost(cfd, pfd); + threadcreate(mysrvproc, &θconssrv, 32 * 1024); + } + snaptrigger = chancreate(sizeof(ulong), 2); + threadcreate(snapthread, nil, 8192); + snapid = proccreate(snapproc, nil, 1024); +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:26 2014 @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +enum { +// BlkSize = 65536, + BlkSize = 32768, + NPerBlk = BlkSize / sizeof(uvlong), + + BlobQuan = 64, /* must be a power of 2 */ + + TFile = 0, + TObject, + TLun, + + Magicθ = 0x4207abcddcba0742LL, + + FSClean = 1, + + MTnone = 0, /* must be 0 */ + MTint = 1, + MTistring = 2, + MTstring = 3, + MTblob = 4, +}; + +typedef struct Blob Blob; +typedef struct GMeta GMeta; +typedef union MVal MVal; +typedef struct PQMap PQMap; +typedef struct Super Super; + +#pragma pack on +struct Blob { + short len; + union { + uvlong next; + char data[1]; + }; +}; + +union MVal { + uvlong val; + char str[8]; +}; + +struct GMeta { + uvlong next; + uchar type; + char name[15]; + MVal m; +}; + +struct PQMap { + uvlong qpath; + ushort plen; + char pname[1]; +}; +#pragma pack off + +struct Super { + uvlong magic; + uvlong version; + uvlong qgen; + uvlong nblk; + uvlong nfreemap; + uvlong freemap; + uvlong state; + uvlong firstdat; + uvlong nfree; + uvlong firstlun; + uvlong nmeta; + uvlong firstmeta; + uvlong ffmeta; + uvlong nblob; + uvlong firstblob; + uvlong ffblob; + uvlong lfblob; + uvlong nht; + uvlong nhashblk; + uvlong snaptime; +}; + +/* aoe.c */ +extern void haltaoe(void); +extern void initaoe(void); +extern void rmtarget(int, int); +extern void starttarget(int, int, uvlong); + +/* cache.c */ +extern int brelease(uvlong); +extern void *cbclean(uvlong); +extern void *cbread(uvlong); +extern void cbwrite(uvlong); +extern int ccanfree(uvlong); +extern int cread(void *, int, uvlong); +extern void csync(void); +extern int cwrite(void *, int, uvlong); +extern void haltcache(void); +extern void initcache(char *, int); +extern char *prcstat(void); +extern void resetcache(void); + +/* cons.c */ +extern void docons(void *); +extern void initcons(int); + +extern int allow; +extern int rootallow; + +/* free.c */ +extern uvlong allocblock(void); +extern void freeblock(uvlong); +extern void haltfree(void); +extern void initfree(void); + +/* fs.c */ +extern void halt9p(void); +extern void θstart(Srv *); + +extern char *ddir, *dname; +extern uvlong starttime; +extern int doatimes; +extern int shutdown; + +/* hash.c */ +extern void fixpaths(int); +extern uvlong p2q(int, char *, int); +extern char *prhstat(void); +extern uvlong q2m(int, uvlong, int); +extern void rehashpath(uvlong, char *, char *); +extern void rmp(char *); +extern void rmq(uvlong, uvlong); +extern void setqhash(uvlong, uvlong); +extern void showphash(int, char *); + +/* meta.c */ +extern void blockuse(int, uvlong); +extern void checkalloc(int); +extern void fixfamilies(int); +extern void freedata(uvlong); +extern void *getblob(int, uvlong, int *); +extern int getmeta(int, uvlong, char *, MVal *); +extern int getmetaint(int, uvlong, char *, uvlong *); +extern char *getmetastr(int, uvlong, char *); +extern uvlong locate(int, uvlong, uvlong, int); +extern void mpred(int, uvlong); +extern void mprint(int, uvlong); +extern void prmeta(int, uvlong); +extern char *prmstat(void); +extern void reammeta(int); +extern void recovermeta(int); +extern void resetmeta(void); +extern uvlong rmmeta(uvlong, uvlong); +extern void rmmlist(uvlong); +extern uvlong setblob(void *, int, uvlong); +extern uvlong setmeta(uvlong, char *, char *, uvlong, int); +extern uvlong setmetaint(uvlong, char *, char *, uvlong); +extern uvlong setmetablob(uvlong, char *, char *, uchar *, int, uvlong); +extern uvlong setmetastr(uvlong, char *, char *, char *, uvlong); +extern void setmstruct(uvlong, uvlong, char *, int, uvlong); + +/* nfs.c */ +extern void haltnfs(void); +extern void initnfs(void); + +extern int debugnfs; + +/* super.c */ +extern void loadsuper(void); +extern char *prsuper(void); +extern void ream(char *); +extern void savesuper(void); + +extern Super super; + +/* uid.c */ +extern char *id2gname(char *, int); +extern char *id2uname(char *, int); +extern int ingroup(char *, char *); +extern void inituid(void); +extern int isleader(char *, char *); + +/* util.c */ +extern void *θmalloc(ulong); +extern long θpread(int, uvlong, void *, long, uvlong); +extern long θpwrite(uvlong, void *, long, uvlong, int); +extern void rmdlist(uvlong, uvlong); +extern long spread(int, void *, long, uvlong); + +/* platform specific */ +extern void conspost(int [], int []); +extern uvlong devsize(char *); --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:33 2014 @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma lib "libfis.a" +#pragma src "/sys/src/libfis" + +/* ata errors */ +enum { + Emed = 1<<0, /* media error */ + Enm = 1<<1, /* no media */ + Eabrt = 1<<2, /* abort */ + Emcr = 1<<3, /* media change request */ + Eidnf = 1<<4, /* no user-accessible address */ + Emc = 1<<5, /* media change */ + Eunc = 1<<6, /* data error */ + Ewp = 1<<6, /* write protect */ + Eicrc = 1<<7, /* interface crc error */ + + Efatal = Eidnf|Eicrc, /* must sw reset */ +}; + +/* ata status */ +enum { + ASerr = 1<<0, /* error */ + ASdrq = 1<<3, /* request */ + ASdf = 1<<5, /* fault */ + ASdrdy = 1<<6, /* ready */ + ASbsy = 1<<7, /* busy */ + + ASobs = 1<<1|1<<2|1<<4, +}; + +enum { + /* fis types */ + H2dev = 0x27, + D2host = 0x34, + + /* fis flags bits */ + Fiscmd = 0x80, + + /* ata bits */ + Ataobs = 0xa0, + Atalba = 0x40, + + /* nominal fis size (fits any fis) */ + Fissize = 0x20, +}; + +/* sata device-to-host (0x27) fis layout */ +enum { + Ftype, + Fflags, + Fcmd, + Ffeat, + Flba0, + Flba8, + Flba16, + Fdev, + Flba24, + Flba32, + Flba40, + Ffeat8, + Fsc, + Fsc8, + Ficc, /* isochronous cmd completion */ + Fcontrol, +}; + +/* sata host-to-device fis (0x34) differences */ +enum{ + Fioport = 1, + Fstatus, + Frerror, +}; + +/* ata protcol type */ +enum{ + Pnd = 0<<0, /* data direction */ + Pin = 1<<0, + Pout = 2<<0, + Pdatam = 3<<0, + + Ppio = 1<<2, /* ata protocol */ + Pdma = 2<<2, + Pdmq = 3<<2, + Preset = 4<<2, + Pdiag = 5<<2, + Ppkt = 6<<2, + Pprotom = 7<<2, + + P48 = 0<<5, /* command “size” */ + P28 = 1<<5, + Pcmdszm = 1<<5, + + Pssn = 0<<6, /* sector size */ + P512 = 1<<6, + Pssm = 1<<6, +}; + +typedef struct Sfis Sfis; +struct Sfis { + ushort feat; + uchar udma; + uchar speeds; + uint sig; + uint lsectsz; + uint physshift; /* log2(log/phys) */ + uint c; /* disgusting, no? */ + uint h; + uint s; +}; + +enum { + Dlba = 1<<0, /* required for sata */ + Dllba = 1<<1, + Dsmart = 1<<2, + Dpower = 1<<3, + Dnop = 1<<4, + Datapi = 1<<5, + Datapi16= 1<<6, + Data8 = 1<<7, + Dsct = 1<<8, + Dnflag = 9, +}; + +enum { + Pspinup = 1<<0, + Pidready = 1<<1, +}; + +void setfissig(Sfis*, uint); +int txmodefis(Sfis*, uchar*, uchar); +int atapirwfis(Sfis*, uchar*, uchar*, int, int); +int featfis(Sfis*, uchar*, uchar); +int flushcachefis(Sfis*, uchar*); +int identifyfis(Sfis*, uchar*); +int nopfis(Sfis*, uchar*, int); +int rwfis(Sfis*, uchar*, int, int, uvlong); +void skelfis(uchar*); +void sigtofis(Sfis*, uchar*); +uvlong fisrw(Sfis*, uchar*, int*); + +void idmove(char*, ushort*, int); +vlong idfeat(Sfis*, ushort*); +uvlong idwwn(Sfis*, ushort*); +int idss(Sfis*, ushort*); +int idpuis(ushort*); +ushort id16(ushort*, int); +uint id32(ushort*, int); +uvlong id64(ushort*, int); +char *pflag(char*, char*, Sfis*); +uint fistosig(uchar*); + +/* scsi */ +typedef struct Cfis Cfis; +struct Cfis { + uchar phyid; + uchar encid[8]; + uchar tsasaddr[8]; + uchar ssasaddr[8]; + uchar ict[2]; +}; + +void smpskelframe(Cfis*, uchar*, int); +uint sashash(uvlong); +uchar *sasbhash(uchar*, uchar*); --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:32 2014 @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +enum { + Falloc = 1, + Ffree, +}; + +typedef struct Freereq Freereq; + +struct Freereq { + int req; + uvlong blk; + Channel *resp; +}; + +static uvlong firstfree; +static Channel *freechan; + +static void +_allocblock(Freereq *r) +{ + uchar *p, *tfree; + vlong ff, fb, i; + int blk, bit; + int j, x; + + ff = firstfree; + fb = firstfree / (8 * BlkSize); + tfree = cbread(fb + super.freemap); + blk = (firstfree / 8) % BlkSize; + bit = firstfree & 7; + tfree[blk] &= ~(1 << bit); + cbwrite(fb + super.freemap); + p = nil; /* make the compiler happy */ + for(i = fb; i < super.nfreemap; ++i) { + for(p = tfree; p < tfree + BlkSize && *p == 0; ++p) ; + if(p < tfree + BlkSize) + break; + brelease(i + super.freemap); + tfree = cbread(i + 1 + super.freemap); + } + if(i >= super.nfreemap) + sysfatal("No free space"); + brelease(i + super.freemap); + for(j = 0, x = *p; j < 8 && (x&1) == 0; ++j, x >>= 1) ; + firstfree = 8 * (i * BlkSize + (p - tfree)) + j; + --super.nfree; + send(r->resp, &ff); +} + +static void +_freeblock(Freereq *r) +{ + uchar *tfree; + vlong fb; + int blk, bit; + + if(!ccanfree(r->blk)) { + fprint(2, "wanting to free active block\n"); + return; + } + blk = (r->blk / 8) % BlkSize; + bit = r->blk & 7; + fb = r->blk / (8 * BlkSize); + tfree = cbread(fb + super.freemap); + if(tfree == 0) { + fprint(2, "invalid free block: fb:%lld freemap:%ulld\n", fb, super.freemap); + return; + } + tfree[blk] |= 1 << bit; + cbwrite(fb + super.freemap); + brelease(fb + super.freemap); + ++super.nfree; + if(r->blk < firstfree) + firstfree = r->blk; +} + +static void +handler(void *) +{ + Freereq r; + + while(1) { + if(recv(freechan, &r) == 0) { + if(shutdown) + threadexits(nil); + continue; + } + switch(r.req) { + case Falloc: + _allocblock(&r); + break; + case Ffree: + _freeblock(&r); + break; + } + } +} + +void +initfree(void) +{ + uchar *p, *tfree; + vlong i; + int j, x; + + p = nil; /* make the compiler happy */ + tfree = nil; + for(i = 0; i < super.nfreemap; ++i) { + tfree = cbread(i + super.freemap); + for(p = tfree; p < tfree + BlkSize && *p == 0; ++p) ; + if(p < tfree + BlkSize) + break; + brelease(i + super.freemap); + } + if(i >= super.nfreemap) + sysfatal("No free space"); + for(j = 0, x = *p; j < 8 && (x&1) == 0; ++j, x >>= 1) ; + firstfree = 8 * (i * BlkSize + (p - tfree)) + j; + brelease(i + super.freemap); + freechan = chancreate(sizeof(Freereq), 2); + threadcreate(handler, nil, 8192); +} + +void +haltfree(void) +{ +/* chanclose(freechan); */ +} + +uvlong +allocblock(void) +{ + Freereq r; + uvlong blk; + + r.req = Falloc; + r.resp = chancreate(sizeof(uvlong), 0); + send(freechan, &r); + recv(r.resp, &blk); + chanfree(r.resp); + return blk; +} + +void +freeblock(uvlong block) +{ + Freereq r; + + if(block < super.firstdat || block > super.nblk) { + fprint(2, "Bogus block in free from %p: %ulld\n", getcallerpc(&block), block); + return; + } + r.req = Ffree; + r.blk = block; + send(freechan, &r); +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:25 2014 @@ -0,0 +1,1207 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +typedef struct Fdref Fdref; +typedef struct Fidaux Fidaux; +typedef struct Lmsg Lmsg; +typedef struct Srvaux Srvaux; +typedef struct Uglymap Uglymap; + +struct Fdref { + Ref ref; + int fd; +}; + +struct Fidaux { + char *path; + char *uname; + uvlong lsearch; + int dirindex; + Fdref *store; +}; + +struct Lmsg { + int data; + char *rsys; +}; + +struct Srvaux { + Ioproc *io9p; +}; + +struct Uglymap { + Srv *s; + uchar *rbuf; + Uglymap *next; +}; + +static void θattach(Req *); +static void θcreate(Req *); +static void θdestroyfid(Fid *); +static void θend(Srv *); +static void θflush(Req *); +static void θopen(Req *); +static void θread(Req *); +static void θremove(Req *); +static void θstat(Req *); +static void θwalk(Req *); +static void θwstat(Req *); +static void θwrite(Req *); +static void mylistenproc(void *); +static void srvstarter(void *); + +Srv θsrv = { + .attach = θattach, + .auth = auth9p, + .open = θopen, + .create = θcreate, + .read = θread, + .write = θwrite, + .remove = θremove, + .flush = θflush, + .stat = θstat, + .wstat = θwstat, + .walk = θwalk, + .destroyfid = θdestroyfid, + .start = θstart, + .end = θend, +}; + +static char *dev; +static char *laddr; +static Uglymap *uhd; +static Channel *lchan; + +char *ddir, *dname; +uvlong starttime; +int doatimes; +int shutdown; +int mainstacksize = 16384; + +static void +usage(void) +{ + fprint(2, "Usage: %s [-anrsACD] [-m nblk] [-p port] device\n", argv0); + threadexits("usage"); +} + +void +threadmain(int argc, char *argv[]) +{ + Lmsg lmsg; + char *lstr, *p; + int doream, postcons, port, poststdin; + int doaoe, donfs, maxcache; + + doream = 0; + postcons = 0; + poststdin = 0; + doaoe = 1; + donfs = 1; + maxcache = 4000; + port = 564; + ARGBEGIN { + case 'a': + doaoe = 0; + break; + case 'm': + maxcache = atoi(EARGF(usage())); + break; + case 'n': + donfs = 0; + break; + case 'p': + port = atoi(EARGF(usage())); + break; + case 'r': + doream = 1; + break; + case 's': + poststdin = 1; + break; + case 'A': + doatimes = 1; + break; + case 'C': + postcons = 1; + break; + case 'D': + ++chatty9p; + break; + default: + usage(); + } ARGEND + if(argc != 1) + usage(); + dev = *argv; + lstr = smprint("tcp!*!%d", port); + starttime = nsec(); + p = strrchr(dev, '/'); + if(p == nil) { + ddir = "."; + dname = strdup(dev); + } + else { + ddir = mallocz(p - dev + 1, 1); + strncpy(ddir, dev, p - dev); + dname = strdup(p+1); + } + initcache(dev, maxcache); + if(doream) + ream(dev); + else + loadsuper(); + inituid(); + if(doaoe) + initaoe(); + if(donfs) + initnfs(); + lchan = chancreate(sizeof(Lmsg), 4); + laddr = lstr; + threadcreate(srvstarter, nil, 8192); + if(poststdin) { + lmsg.data = 1; + lmsg.rsys = estrdup9p("boot"); + send(lchan, &lmsg); + postfd("θfs", 0); + } + /* + * Because the main in libthread runs the thread scheduler in + * the initial process, we can't daemonize in the usual way. + * The backgrounding is no big deal, but we want the parent + * to be able to wait until we're ready for an attach. So we + * don't do the console until almost the end and the parent + * can wait until θfsctl appears in /srv. It's not as elegant as + * letting the wait synchronize, but it's better than an arbitrary + * sleep. + */ + initcons(postcons); + proccreate(mylistenproc, nil, 8192); +} + +void +halt9p(void) +{ + Srvaux *sa; + Uglymap *u; + +/* chanclose(lchan); */ + for(u = uhd; u; u = u->next) { + close(u->s->infd); + close(u->s->outfd); + sa = u->s->aux; + closeioproc(sa->io9p); + } +} + +static void +mysrvproc(void *a) +{ + Srv *s; + int data; + + s = a; + data = s->infd; + srv(s); + close(data); + threadexits(nil); +} + +static void +srvstarter(void *) +{ + Lmsg m; + Srv *s; + + while(recv(lchan, &m)) { + if(shutdown) + break; + s = emalloc9p(sizeof(Srv)); + *s = θsrv; + s->addr = m.rsys; + s->infd = s->outfd = m.data; + s->fpool = nil; + s->rpool = nil; + s->rbuf = nil; + s->wbuf = nil; + threadcreate(mysrvproc, s, 32 * 1024); + } + threadexits(nil); +} + +static char* +getremotesys(char *ndir) +{ + char buf[128], *serv, *sys; + int fd, n; + + snprint(buf, sizeof buf, "%s/remote", ndir); + sys = nil; + fd = open(buf, OREAD); + if(fd >= 0) { + n = read(fd, buf, sizeof(buf)-1); + if(n>0) { + buf[n-1] = 0; + serv = strchr(buf, '!'); + if(serv) + *serv = 0; + sys = estrdup9p(buf); + } + close(fd); + } + if(sys == nil) + sys = estrdup9p("unknown"); + return sys; +} + +static void +mylistenproc(void *) +{ + Lmsg m; + char ndir[NETPATHLEN], dir[NETPATHLEN]; + int ctl, data, nctl; + + ctl = announce(laddr, dir); + if(ctl < 0) { + fprint(2, "%s: announce %s: %r", argv0, laddr); + return; + } + + for(;;){ + nctl = listen(dir, ndir); + if(nctl < 0){ + fprint(2, "%s: listen %s: %r", argv0, laddr); + break; + } + + data = accept(ctl, ndir); + if(data < 0){ + fprint(2, "%s: accept %s: %r\n", argv0, ndir); + continue; + } + m.data = data; + m.rsys = getremotesys(ndir); + send(lchan, &m); + } +} + +int +read9pmsg(int fd, void *abuf, uint n) +{ + Srvaux *sa; + Uglymap *um; + Ioproc *io9p; + int m, len; + uchar *buf; + + buf = abuf; + + /* + * Grotesque, but this is research :) + */ + for(um = uhd; um && um->rbuf != buf; um = um->next) ; + if(um == nil) { + fprint(2, "no ugly mapping"); + return 0; + } + sa = um->s->aux; + io9p = sa->io9p; + + /* read count */ + m = ioreadn(io9p, fd, buf, BIT32SZ); + if(m != BIT32SZ){ + if(m < 0) + return -1; + return 0; + } + + len = GBIT32(buf); + if(len <= BIT32SZ || len > n){ + werrstr("bad length in 9P2000 message header"); + return -1; + } + len -= BIT32SZ; + m = ioreadn(io9p, fd, buf+BIT32SZ, len); + if(m < len) + return 0; + return BIT32SZ+m; +} + +static int +θhasperm(int fd, uvlong meta, char *uid, int p) +{ + uvlong mode; + char *fuser, *fgroup; + int m; + + if(allow) + return 1; + if(getmetaint(fd, meta, "mode", &mode) == MTnone) + return 1; + m = mode & 7; /* other */ + if((p & m) == p) + return 1; + + if((fuser = getmetastr(fd, meta, "uid")) != nil) { + if(strcmp(fuser, uid) == 0) { + m |= (mode>>6) & 7; + if((p & m) == p) { + free(fuser); + return 1; + } + } + free(fuser); + } + + if((fgroup = getmetastr(fd, meta, "gid")) != nil) { + if(ingroup(uid, fgroup)) { + m |= (mode>>3) & 7; + if((p & m) == p) { + free(fgroup); + return 1; + } + } + free(fgroup); + } + return 0; +} + + +static void +attacher(void *a) +{ + Req *r; + Fidaux *fa; + char *path; + uvlong rmeta, x; + + r = a; + if(r->ifcall.aname == nil || strlen(r->ifcall.aname) == 0) + path = smprint("/"); + else + path = smprint("/%s", r->ifcall.aname); + rmeta = q2m(-1, p2q(-1, path, 0), 0); + if(rmeta == 0) + respond(r, "no root"); + else { + getmetaint(-1, rmeta, "qpath", &x); + r->fid->qid.path = x; + getmetaint(-1, rmeta, "qvers", &x); + r->fid->qid.vers = x; + getmetaint(-1, rmeta, "qtype", &x); + r->fid->qid.type = x; + r->ofcall.qid = r->fid->qid; + fa = malloc(sizeof(Fidaux)); + r->fid->aux = fa; + fa->path = path; + fa->uname = estrdup9p(r->ifcall.uname); + fa->lsearch = 0; + fa->store = θmalloc(sizeof(Fdref)); + incref(&fa->store->ref); + fa->store->fd = -1; + respond(r, nil); + } + threadexits(nil); +} + +static void +θattach(Req *r) +{ + if(authattach(r) < 0) + return; + threadcreate(attacher, r, 8192); +} + +static void +_θcreate(void *a) +{ + Req *r; + Qid nqid; + Fidaux *fa; + char *npath; + uvlong x; +// uvlong meta, pmeta, dirblk, now; + uvlong meta, pmeta, now; + + r = a; + fa = r->fid->aux; + pmeta = q2m(-1, r->fid->qid.path, 0); + if(θhasperm(fa->store->fd, pmeta, fa->uname, AWRITE) == 0) { + respond(r, "permission denied"); + threadexits(nil); + } + npath = smprint("%s/%s", fa->path, r->ifcall.name); + nqid.path = p2q(-1, npath, 1); + meta = q2m(-1, nqid.path, 1); + if(meta == 0) { + respond(r, "create failure"); + free(npath); + threadexits(nil); + } + setmetastr(meta, "name", nil, r->ifcall.name, 0); + setmetaint(meta, "parent", nil, r->fid->qid.path); + nqid.vers = 0; + nqid.type = 0; + if(r->ifcall.perm & DMDIR) + nqid.type |= QTDIR; + if(r->ifcall.perm & DMAPPEND) + nqid.type |= QTAPPEND; + if(r->ifcall.perm & DMEXCL) + nqid.type |= QTEXCL; + if(r->ifcall.perm & DMTMP) + nqid.type |= QTTMP; + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + setmetaint(meta, "mode", nil, r->ifcall.perm); + now = nsec(); + setmetaint(meta, "atime", nil, now); + setmetaint(meta, "mtime", nil, now); + setmetaint(meta, "length", nil, 0); + setmetastr(meta, "uid", nil, fa->uname, 0); + setmetastr(meta, "gid", nil, fa->uname, 0); + setmetastr(meta, "muid", nil, fa->uname, 0); + if(getmetaint(-1, pmeta, "child", &x) == MTint) + setmetaint(meta, "sib", nil, x); + else + setmetaint(meta, "sib", nil, 0); + if(r->ifcall.perm & DMDIR) + setmetaint(meta, "child", nil, 0); + else + setmetaint(meta, "dblock", nil, 0); + setmetaint(pmeta, "child", nil, nqid.path); + if(getmetaint(-1, pmeta, "qvers", &x) != MTnone) + setmetaint(pmeta, "qvers", nil, x+1); + setmetaint(pmeta, "mtime", nil, now); + setmetastr(pmeta, "muid", nil, fa->uname, 0); + setqhash(nqid.path, meta); + free(fa->path); + fa->path = npath; + fa->lsearch = 0; + r->fid->qid = nqid; + r->ofcall.qid = nqid; + respond(r, nil); + savesuper(); + threadexits(nil); +} + +static void +θcreate(Req *r) +{ + threadcreate(_θcreate, r, 8192); +} + +static void +θdestroyfid(Fid *fid) +{ + Fidaux *fa; + uvlong meta; + + if(fid->qid.type & QTAUTH) { + authdestroy(fid); + return; + } + fa = fid->aux; + if(fid->omode != -1 && (fid->omode & ORCLOSE)) { + meta = q2m(fa->store->fd, fid->qid.path, 0); + if(meta != 0) { + freedata(meta); + rmdlist(meta, fid->qid.path); + rmq(fid->qid.path, meta); + rmmlist(meta); + if(fa) + rmp(fa->path); + } + } + if(fa == nil) + return; + if(fa->store && decref(&fa->store->ref) == 0) { + if(fa->store->fd != -1) + close(fa->store->fd); + free(fa->store); + } + free(fa->path); + free(fa->uname); + free(fa); +} + +static void +θend(Srv *s) +{ + Srvaux *sa; + Uglymap *um, *u; + + resetmeta(); + csync(); + sa = s->aux; + if(sa) { + if(sa->io9p) + closeioproc(sa->io9p); + free(sa); + } + if(uhd == nil) + return; + if(uhd->s == s) { + um = uhd; + uhd = um->next; + free(um); + return; + } + for(um = uhd; um && um->next && um->next->s != s; um = um->next) ; + if(um && um->next) { + u = um->next; + um->next = u->next; + free(u); + } +} + +static void +θflush(Req *r) +{ + respond(r, nil); +} + +static void +_θopen(void *a) +{ + Fidaux *fa; + Req *r; + Fid *fid; + uvlong meta, x; + ulong need; + + r = a; + fid = r->fid; + fa = fid->aux; + meta = q2m(fa->store->fd, fid->qid.path, 0); + if(meta == 0) { + respond(r, "no file"); + threadexits(nil); + } + switch(r->ifcall.mode & 3) { + case OREAD: + need = AREAD; + break; + case OWRITE: + need = AWRITE; + break; + case ORDWR: + need = AREAD | AWRITE; + break; + case OEXEC: + need = AEXEC; + break; + default: + need = AREAD | AWRITE | AEXEC; + break; + } + if(r->ifcall.mode & OTRUNC) + need |= AWRITE; + if(θhasperm(fa->store->fd, meta, fa->uname, need) == 0) { + respond(r, "permission denied"); + threadexits(nil); + } + if(r->ifcall.mode & ORCLOSE) { + /* check write permission on parent */ + } + if(r->ifcall.mode & OTRUNC) { + setmetaint(meta, "length", nil, 0LL); + if(getmetaint(fa->store->fd, meta, "qvers", &x) != MTnone) + setmetaint(meta, "qvers", nil, x+1); + } + respond(r, nil); + threadexits(nil); +} + +static void +θopen(Req *r) +{ + threadcreate(_θopen, r, 8192); +} + +static int +lzstat(int fd, uvlong meta, Dir *d) +{ + uvlong x; + + memset(&d->qid, 0, sizeof(Qid)); + if(getmetaint(fd, meta, "qpath", &x) != MTnone) + d->qid.path = x; + if(getmetaint(fd, meta, "qvers", &x) != MTnone) + d->qid.vers = x; + if(getmetaint(fd, meta, "qtype", &x) != MTnone) + d->qid.type = x; + if(getmetaint(fd, meta, "mode", &x) != MTnone) + d->mode = x; + else + d->mode = 0; + if(getmetaint(fd, meta, "atime", &x) != MTnone) + d->atime = x / 1000000000; + else + d->atime = 0; + if(getmetaint(fd, meta, "mtime", &x) != MTnone) + d->mtime = x / 1000000000; + else + d->mtime = 0; + if(getmetaint(fd, meta, "length", &x) != MTnone) + d->length = x; + else + d->length = 0; + if((d->name = getmetastr(fd, meta, "name")) == nil) { + fprint(2, "where the streets have no name\n"); + d->name = estrdup9p(""); + } + /* If this is one of the roots, just call it '/' */ + if(d->name[0] == '/') + d->name[1] = 0; + if((d->uid = getmetastr(fd, meta,"uid")) == nil) + d->uid = estrdup9p("none"); + if((d->gid = getmetastr(fd, meta, "gid")) == nil) + d->gid = estrdup9p("none"); + if((d->muid = getmetastr(fd, meta, "muid")) == nil) + d->muid = estrdup9p("none"); + return 0; +} + +static int +θgen(int n, Dir *dir, void *a) +{ + Fidaux *fa; + Fid *fid; + uvlong meta, x; + int i; + + fid = a; + fa = fid->aux; + if(n == fa->dirindex + 1 && fa->lsearch != 0) { + if(getmetaint(fa->store->fd, fa->lsearch, "sib", &x) == MTint) + meta = q2m(fa->store->fd, x, 0); + else { + meta = 0; + fprint(2, "no sibling in mblock %ulld\n", fa->lsearch); + } + } + else { + meta = q2m(fa->store->fd, fid->qid.path, 0); + if(meta == 0) + return -1; + if(getmetaint(fa->store->fd, meta, "child", &x) != MTint) + return -1; + meta = q2m(fa->store->fd, x, 0); + for(i = 0; i < n && meta != 0; ++i) { + getmetaint(fa->store->fd, meta, "sib", &x); + meta = q2m(fa->store->fd, x, 0); + } + } + fa->dirindex = n; + fa->lsearch = meta; + if(meta == 0) + return -1; + i = lzstat(fa->store->fd, meta, dir); + return i; +} + +static void +_θread(void *a) +{ + Fidaux *fa; + Req *r; + ulong tot; + + r = a; + fa = r->fid->aux; + fa->lsearch = 0; + fa->dirindex = 0; + if(r->fid->qid.type & QTDIR) { + dirread9p(r, θgen, r->fid); + respond(r, nil); + threadexits(nil); + } + tot = θpread(fa->store->fd, r->fid->qid.path, r->ofcall.data, r->ifcall.count, r->ifcall.offset); + if(tot == -1) { + respond(r, "no metadata"); + threadexits(nil); + } + r->ofcall.count = tot; + respond(r, nil); + threadexits(nil); +} + +static void +θauthread(void *a) +{ + Req *r; + + r = a; + authread(r); + threadexits(nil); +} + +static void +θread(Req *r) +{ + if(r->fid->qid.type & QTAUTH) { + proccreate(θauthread, r, 8192); + return; + } + threadcreate(_θread, r, 8192); +} + +static void +_θremove(void *a) +{ + static QLock rlock; + Req *r; + Fidaux *fa; + uvlong meta, pmeta, qpath, now; + + /* + * This lock is ugly. Its purpose is to serialize the removes so + * that we don't end up in the process of removing the same + * file more than once concurrently. It comes up when doing + * a mk clean on the kernel. I'm going to give some thought + * to better ways to handle this, but this should get around + * the issue for now. + */ + qlock(&rlock); + r = a; + fa = r->fid->aux; + meta = q2m(-1, r->fid->qid.path, 0); + if(meta == 0) { + qunlock(&rlock); + respond(r, nil); + threadexits(nil); + } + pmeta = 0; + /* check parent permission */ + if(getmetaint(-1, meta, "parent", &qpath) != MTnone && qpath != 0) { + pmeta = q2m(-1, qpath, 0); + if(pmeta != 0) { + if(θhasperm(fa->store->fd, pmeta, fa->uname, AWRITE) == 0) { + qunlock(&rlock); + respond(r, "permission denied"); + threadexits(nil); + } + } + } + if(r->fid->qid.type & QTDIR) { + if(getmetaint(-1, meta, "child", &qpath) != MTnone && qpath != 0) { + qunlock(&rlock); + respond(r, "not empty"); + threadexits(nil); + } + } + now = nsec(); + rmq(r->fid->qid.path, meta); + setmetaint(pmeta, "mtime", nil, now); + setmetastr(pmeta, "muid", nil, fa->uname, 0); + freedata(meta); + rmdlist(meta, r->fid->qid.path); + rmmlist(meta); + rmp(fa->path); + qunlock(&rlock); + respond(r, nil); + threadexits(nil); +} + +static void +θremove(Req *r) +{ + threadcreate(_θremove, r, 8192); +} + +void +θstart(Srv *s) +{ + Srvaux *sa; + Uglymap *um; + + sa = malloc(sizeof(Srvaux)); + sa->io9p = ioproc(); + s->aux = sa; + um = malloc(sizeof(Uglymap)); + um->s = s; + um->rbuf = s->rbuf; + um->next = uhd; + uhd = um; +} + +static void +_θstat(void *a) +{ + Req *r; + Fidaux *fa; + uvlong meta; + int n; + + r = a; + fa = r->fid->aux; + meta = q2m(fa->store->fd, r->fid->qid.path, 0); + if(meta == 0) + respond(r, "no file"); + else { + n = lzstat(fa->store->fd, meta, &r->d); + if(n == 0) + respond(r, nil); + else + respond(r, "errnt"); + } + threadexits(nil); +} + +static void +θstat(Req *r) +{ + threadcreate(_θstat, r, 8192); +} + +static char * +θwalk1(Fid *fid, char *name, void *) +{ + Fidaux *fa; + char *npath, *sname, *spath; + uvlong meta, x; + int fd; + + fa = (Fidaux *)(fid->aux); + npath = smprint("%s/%s", fa->path, name); + meta = q2m(fa->store->fd, p2q(fa->store->fd, npath, 0), 0); + if(meta == 0) + return "does not exit"; + sname = getmetastr(fa->store->fd, meta, "snap"); + if(sname == nil) { + free(fa->path); + fa->path = npath; + } + else { + free(npath); + spath = smprint("%s/%s", ddir, sname); + free(sname); + fd = open(spath, OREAD); + if(fd < 0) + return "snap open"; + free(fa->path); + fa->path = estrdup9p("/"); + if(decref(&fa->store->ref) == 0) { + if(fa->store->fd != -1) + close(fa->store->fd); + free(fa->store); + } + fa->store = θmalloc(sizeof(Fdref)); + incref(&fa->store->ref); + fa->store->fd = fd; + meta = q2m(fa->store->fd, p2q(fa->store->fd, "/", 0), 0); + if(meta == 0) + return "no root"; + } + if(getmetaint(fa->store->fd, meta, "qpath", &x) != MTint) + return "no qid"; + fid->qid.path = x; + getmetaint(fa->store->fd, meta, "qvers", &x); + fid->qid.vers = x; + getmetaint(fa->store->fd, meta, "qtype", &x); + fid->qid.type = x; + return nil; +} + +static char * +θclone(Fid *oldfid, Fid *newfid, void *) +{ + Fidaux *ofa, *nfa; + + ofa = (Fidaux *)(oldfid->aux); + nfa = newfid->aux = θmalloc(sizeof(Fidaux)); + *nfa = *ofa; + nfa->path = estrdup9p(ofa->path); + nfa->uname = estrdup9p(ofa->uname); + incref(&nfa->store->ref); + return nil; +} + +static void +_θwalk(void *a) +{ + Req *r; + Fdref *store; + Fidaux *fa; + char *npath, *p, *e; + uvlong qp, meta, x; + int nlen; + int i, fd; + + r = a; + fa = r->fid->aux; + store = fa->store; + fd = store->fd; + if(r->ifcall.nwname == 1 && strcmp(r->ifcall.wname[0], "..") == 0) { + npath = estrdup9p(fa->path); + p = strrchr(npath, '/'); + if(p && p != npath) + *p = 0; + } + else { + nlen = strlen(fa->path); + for(i = 0; i < r->ifcall.nwname; ++i) + nlen += strlen(r->ifcall.wname[i]) + 1; + npath = θmalloc(nlen + 1); + p = npath; + e = npath + nlen + 1; + p = seprint(p, e, "%s", fa->path); + for(i = 0; i < r->ifcall.nwname; ++i) + p = seprint(p, e, "/%s", r->ifcall.wname[i]); + } + /* + * If we can get there directly, do it, otherwise, fall + * back to the one step at a time using walkandclone + */ + meta = q2m(fd, p2q(fd, npath, 0), 0); + if(meta == 0) { + walkandclone(r, θwalk1, θclone, nil); + free(npath); + threadexits(nil); + } + if(p = getmetastr(fd, meta, "snap")) { + free(p); + walkandclone(r, θwalk1, θclone, nil); + free(npath); + threadexits(nil); + } + fa = r->newfid->aux; + if(r->fid == r->newfid) + free(fa->path); + else { + fa = r->newfid->aux = θmalloc(sizeof(Fidaux)); + fa->uname = estrdup9p(((Fidaux *)(r->fid->aux))->uname); + fa->store = store; + incref(&store->ref); + } + fa->path = npath; + if(r->ifcall.nwname == 0) { + respond(r, nil); + threadexits(nil); + } + r->ofcall.nwqid = r->ifcall.nwname; + for(i = r->ifcall.nwname - 1; i >= 0; --i) { + if(getmetaint(fd, meta, "qpath", &x) == MTnone) { + respond(r, "errnt"); + threadexits(nil); + } + r->ofcall.wqid[i].path = x; + getmetaint(fd, meta, "qvers", &x); + r->ofcall.wqid[i].vers = x; + getmetaint(fd, meta, "qtype", &x); + r->ofcall.wqid[i].type = x; + getmetaint(fd, meta, "parent", &qp); + meta = q2m(fd, qp, 0); + } + respond(r, nil); + threadexits(nil); +} + +static void +θwalk(Req *r) +{ + threadcreate(_θwalk, r, 8192); +} + +static void +_θwrite(void *a) +{ + Req *r; + ulong tot; + + r = a; + if(r->fid->qid.type & QTAPPEND) + tot = θpwrite(r->fid->qid.path, r->ifcall.data, r->ifcall.count, 0, 2); + else + tot = θpwrite(r->fid->qid.path, r->ifcall.data, r->ifcall.count, r->ifcall.offset, 1); + if(tot == -1) { + respond(r, "no metadata"); + threadexits(nil); + } + r->ofcall.count = tot; + respond(r, nil); + threadexits(nil); +} + +static void +θauthwrite(void *a) +{ + Req *r; + + r = a; + authwrite(r); + threadexits(nil); +} + +static void +θwrite(Req *r) +{ + if(r->fid->qid.type & QTAUTH) { + proccreate(θauthwrite, r, 8192); + return; + } + threadcreate(_θwrite, r, 8192); +} + +static void +_θwstat(void *a) +{ + Req *r; + Fidaux *fa; + Qid nqid; + char *p, *gid, *uid, *newpath; + uvlong meta, pmeta, x, pqpath; + + r = a; + fa = r->fid->aux; + meta = q2m(-1, r->fid->qid.path, 0); + if(meta == 0) { + respond(r, "no metadata"); + threadexits(nil); + } + p = strrchr(fa->path, '/'); + if(p && fa->path) + newpath = smprint("%.*s/%s", (int)(p - fa->path), fa->path, r->d.name); + else + newpath = estrdup9p(r->d.name); + + if(allow) + goto skipperm; + uid = getmetastr(-1, meta, "uid"); + gid = getmetastr(-1, meta, "gid"); + + /* Becuase wstat is defined to be all or none, first check all the permissions */ + if(strlen(r->d.name) > 0) { + if(getmetaint(-1, meta, "parent", &pqpath) != MTnone && pqpath != 0) { + pmeta = q2m(-1, pqpath, 0); + if(pmeta != 0) { + if(θhasperm(-1, pmeta, fa->uname, AWRITE) == 0) { + free(newpath); + free(gid); + free(uid); + respond(r, "permission denied"); + threadexits(nil); + } + } + } + if(q2m(-1, p2q(-1, newpath, 0), 0) != 0) { + free(gid); + free(uid); + respond(r, "file extists"); + threadexits(nil); + } + + } + if(r->d.length != 0xffffffffffffffffLL) { + if((r->fid->qid.type & QTDIR) && r->d.length != 0) { + free(newpath); + free(gid); + free(uid); + respond(r, "non-zero size on directory"); + threadexits(nil); + } + if(θhasperm(-1, meta, fa->uname, AWRITE) == 0) { + free(newpath); + free(gid); + free(uid); + respond(r, "permission denied"); + threadexits(nil); + } + } + if(r->d.mode != 0xffffffff || r->d.mtime != 0xffffffff) { + if(!(strcmp(fa->uname, uid) == 0 || isleader(fa->uname, gid))) { + free(gid); + free(uid); + free(newpath); + respond(r, "not owner"); + threadexits(nil); + } + } + if(strlen(r->d.gid) > 0) { + if(!(strcmp(fa->uname, uid) == 0 && ingroup(fa->uname, gid) || isleader(fa->uname, gid))) { + free(gid); + free(newpath); + respond(r, "not owner"); + threadexits(nil); + } + } + free(gid); + free(uid); + +skipperm: + /* Now the we know we have permission, make all the changes */ + if(r->d.mode != 0xffffffff) { + getmetaint(-1, meta, "qpath", &x); + nqid.path = x; + getmetaint(-1, meta, "qvers", &x); + nqid.vers = x; + getmetaint(-1, meta, "qtype", &x); + nqid.type = x; + x = nqid.type & QTDIR; + if(r->d.mode & DMAPPEND) + x |= QTAPPEND; + if(r->d.mode & DMEXCL) + x |= QTEXCL; + if(r->d.mode & DMTMP) + x |= QTTMP; + if(x != nqid.type) + setmetaint(meta, "qtype", nil, x); + setmetaint(meta, "mode", nil, r->d.mode); + if(getmetaint(-1, meta, "unixmode", &x) != MTnone) + setmetaint(meta, "unixmode", nil, x & ~0777 | r->d.mode & 0777); + } + if(r->d.mtime != 0xffffffff) + setmetaint(meta, "mtime", nil, r->d.mtime * 1000000000LL); + if(r->d.length != 0xffffffffffffffffLL) + setmetaint(meta, "length", nil, r->d.length); + if(strlen(r->d.name) > 0) { + setmetastr(meta, "name", nil, r->d.name, 0); + rehashpath(r->fid->qid.path, fa->path, newpath); + free(fa->path); + fa->path = newpath; + } + if(allow && strlen(r->d.uid) > 0) + setmetastr(meta, "uid", nil, r->d.uid, 0); + if(strlen(r->d.gid) > 0) + setmetastr(meta, "gid", nil, r->d.gid, 0); + respond(r, nil); + threadexits(nil); +} + +static void +θwstat(Req *r) +{ + threadcreate(_θwstat, r, 8192); +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:24 2014 @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +static uvlong np2q, nq2m; +static int maxp2q, maxq2m; +static int p2qcoll, q2mcoll; + +/* FNV hash */ +static ulong +pathhash(char *path) +{ + uchar *p; + ulong h; + + h = 2166136261UL; + for(p = (uchar *)path; *p; ++p) + h = (h ^ *p) * 16777619; + return h % super.nht; +} + +static ulong +qidhash(uvlong qpath) +{ + return qpath % super.nht; +} + +static uvlong +qoffset(ulong bucket) +{ + return BlkSize * (super.nhashblk + 1) + bucket * sizeof(uvlong); +} + +static PQMap * +nextpq(PQMap *pq) +{ + uchar *p; + + p = (uchar *)pq; + p += pq->plen + offsetof(PQMap, pname[0]); + return (PQMap *)p; +} + +uvlong +p2q(int fd, char *path, int create) +{ + PQMap *pq, *pend; + uchar *p; + uvlong *uvp; + uvlong hlist, next, qpath; + ulong bucket; + int plen, nsearch, n; + + ++np2q; + plen = strlen(path); + bucket = pathhash(path); + if(fd == -1) + n = cread(&hlist, sizeof(uvlong), BlkSize + bucket * sizeof(uvlong)); + else + n = spread(fd, &hlist, sizeof(uvlong), BlkSize + bucket * sizeof(uvlong)); + if(n < 0) + sysfatal("cread failure: %r"); + if(hlist == 0) { + if(create) { + hlist = allocblock(); + if(hlist == 0) + return 0; + p = cbclean(hlist); + pq = (PQMap *)p; + qpath = super.qgen++ | ((uvlong)TFile << 60); + pq->qpath = qpath; + pq->plen = plen; + memmove(pq->pname, path, plen); + cbwrite(hlist); + brelease(hlist); + uvp = cbread(bucket / NPerBlk + 1); + uvp[bucket % NPerBlk] = hlist; + cbwrite(bucket / NPerBlk + 1); + brelease(bucket / NPerBlk + 1); + return qpath; + } + return 0; + } + nsearch = 1; + p = nil; /* make the compiler happy */ + if(fd != -1) + p = θmalloc(BlkSize); + while(hlist) { + if(fd == -1) { + p = cbread(hlist); + if(p == nil) { + fprint(2, "cbread failed on block %ulld\n", hlist); + return 0; + } + } + else + spread(fd, p, BlkSize, hlist * BlkSize); + pend = (PQMap *)(p + BlkSize); + --pend; + for(pq = (PQMap *)p; pq < pend && pq->qpath != 0; pq = nextpq(pq)) { + if(plen == pq->plen && memcmp(path, pq->pname, plen) == 0) + goto found; + ++nsearch; + } + next = *((uvlong *)(p + BlkSize - sizeof(uvlong))); + if(next == 0 && create) + goto addone; + if(fd == -1) + brelease(hlist); + hlist = next; + } + if(fd != -1) + free(p); + return 0; +found: + if(nsearch > maxp2q) + maxp2q = nsearch; + next = pq->qpath; + if(fd == -1) + brelease(hlist); + else + free(p); + if(create) + return 0; + return next; +addone: + for(pq = (PQMap *)p; pq < pend && pq->qpath != 0; pq = nextpq(pq)) ; + if(pq != (PQMap *)p) + ++p2qcoll; + if(pq >= pend) { +fprint(2, "HUH?"); + next = allocblock(); + if(next == 0) + return 0; + *((uvlong *)(p + BlkSize - sizeof(uvlong))) = next; + cbwrite(hlist); + brelease(hlist); + hlist = next; + p = cbclean(hlist); + pq = (PQMap *)p; + } + qpath = super.qgen++ | ((uvlong)TFile << 60); + pq->qpath = qpath; + pq->plen = plen; + memmove(pq->pname, path, plen); + if(hlist != 0) { /* shouldn't be possible, but just to be safe */ + cbwrite(hlist); + brelease(hlist); + } + return qpath; +} + +void +setqhash(uvlong qpath, uvlong midx) +{ + ulong bucket; + + bucket = qidhash(qpath); + cwrite(&midx, sizeof(uvlong), qoffset(bucket)); +} + +uvlong +q2m(int fd, uvlong qpath, int create) +{ + uvlong val; + uvlong first, meta; + ulong bucket; + int nsearch, n; + + if(qpath == 0) + return 0; + ++nq2m; + bucket = qidhash(qpath); + if(fd == -1) + n = cread(&first, sizeof(uvlong), qoffset(bucket)); + else + n= spread(fd, &first, sizeof(uvlong), qoffset(bucket)); + if(n < 0) + sysfatal("cread failure: %r"); + if(first == 0) { + if(create) { + meta = setmetaint(0, "qhnext", nil, 0); + //setqhash(qpath, meta); + return meta; + } + return 0; + } + nsearch = 1; + for(meta = first; meta; ) { + if(getmetaint(fd, meta, "qpath", &val) != MTnone && val == qpath) + break; + if(getmetaint(fd, meta, "qhnext", &meta) == MTnone) + meta = 0; + ++nsearch; + } + if(meta == 0) { + if(create) { + meta = setmetaint(0, "qhnext", nil, first); + //setqhash(qpath, meta); + } + } + else + if(nsearch > maxq2m) + maxq2m = nsearch; + return meta; +} + +void +rehashone(uvlong qpath, char *from, char *to) +{ + PQMap *pq, *rend; + uchar *p; + uvlong *uvp; + uvlong hlist, next; + ulong bucket; + int plen; + + rmp(from); + plen = strlen(to); + bucket = pathhash(to); + if(cread(&hlist, sizeof(uvlong), BlkSize + bucket * sizeof(uvlong)) < 0) + sysfatal("cread failure: %r"); + if(hlist == 0) { + hlist = allocblock(); + if(hlist == 0) + return; + p = cbclean(hlist); + pq = (PQMap *)p; + pq->qpath = qpath; + pq->plen = plen; + memmove(pq->pname, to, plen); + cbwrite(hlist); + brelease(hlist); + uvp = cbread(bucket / NPerBlk + 1); + uvp[bucket % NPerBlk] = hlist; + cbwrite(bucket / NPerBlk + 1); + brelease(bucket / NPerBlk + 1); + return; + } + while(hlist) { + p = cbread(hlist); + rend = (PQMap *)(p + BlkSize); + --rend; + for(pq = (PQMap *)p; pq < rend; pq = nextpq(pq)) + if(plen == pq->plen && memcmp(to, pq->pname, plen) == 0) + goto found; + next = *((uvlong *)(p + BlkSize - sizeof(uvlong))); + if(next == 0) + goto addone; + brelease(hlist); + hlist = next; + } + return; +found: + fprint(2, "Impossible! Repath destination exists\n"); + brelease(hlist); + return; +addone: + for(pq = (PQMap *)p; pq < rend && pq->qpath != 0; pq = nextpq(pq)) ; + if(pq >= rend) { + next = allocblock(); + *((uvlong *)(p + BlkSize - sizeof(uvlong))) = next; + cbwrite(hlist); + brelease(hlist); + if(next == 0) + return; + hlist = next; + p = cbclean(hlist); + pq = (PQMap *)p; + } + pq->qpath = qpath; + pq->plen = plen; + memmove(pq->pname, to, plen); + cbwrite(hlist); + brelease(hlist); +} + +void +rehashpath(uvlong qpath, char *from, char *to) +{ + char *f, *t, *name; + uvlong cqid, meta; + + meta = q2m(-1, qpath, 0); + if(meta != 0 && getmetaint(-1, meta, "child", &cqid) != MTnone) { + while(cqid != 0) { + meta = q2m(-1, cqid, 0); + if(meta == 0) + break; + name = getmetastr(-1, meta, "name"); + f = smprint("%s/%s", from, name); + t = smprint("%s/%s", to, name); + free(name); + rehashpath(cqid, f, t); + free(f); + free(t); + if(getmetaint(-1, meta, "sib", &cqid) == MTnone) + break; + } + } + rehashone(qpath, from, to); +} + +static PQMap * +rmpath(PQMap *full, PQMap *victim) +{ + PQMap *next, *last; + PQMap *rend; + int plen; + + rend = (PQMap *)((char *)full + BlkSize - sizeof(uvlong)); + for(last = victim; last < rend - 1 && last->plen > 0; last = nextpq(last)) ; + /* + * last now points to the start of the first empty path/qid map slot + */ + plen = victim->plen + offsetof(PQMap, pname[0]); + next = nextpq(victim); + memmove(victim, next, (char *)rend - (char *)next); + /* + * now last has moved up by plen bytes + */ + last = (PQMap *)((char *)last - plen); + memset(last, 0, (char *)rend - (char *)last); + return last; +} + +void +rmp(char *path) +{ + PQMap *pq, *rend, *last; + uchar *p; + uvlong hlist, next; + ulong bucket; + int plen; + + plen = strlen(path); + if(plen == 0) + return; + bucket = pathhash(path); + if(cread(&hlist, sizeof(uvlong), BlkSize + bucket * sizeof(uvlong)) < 0) + sysfatal("cread failure: %r"); + while(hlist) { + p = cbread(hlist); + rend = (PQMap *)(p + BlkSize); + --rend; + for(pq = (PQMap *)p; pq < rend; pq = nextpq(pq)) + if(plen == pq->plen && memcmp(path, pq->pname, plen) == 0) + goto found; + next = *((uvlong *)(p + BlkSize - sizeof(uvlong))); + brelease(hlist); + hlist = next; + } + return; +found: + while(hlist) { + last = rmpath((PQMap *)p, pq); + next = *((uvlong *)(p + BlkSize - sizeof(uvlong))); + if(next != 0) { + p = cbread(next); + pq = (PQMap *)p; + if(pq->plen == 0 || pq->plen > (char *)rend - (char *)last) { + brelease(next); + next = 0; + } + else + memmove(last, pq, pq->plen + offsetof(PQMap, pname[0])); + } + cbwrite(hlist); + brelease(hlist); + hlist = next; + } +} + +void +rmq(uvlong qpath, uvlong victim) +{ + uvlong prev, meta, next; + ulong bucket; + + if(qpath == 0) + return; + bucket = qidhash(qpath); + if(cread(&meta, sizeof(uvlong), qoffset(bucket)) < 0) + sysfatal("cread failure: %r"); + if(meta == victim) { + if(getmetaint(-1, meta, "qhnext", &next) == MTnone) + next = 0; + if(cwrite(&next, sizeof(uvlong), qoffset(bucket)) < 0) + sysfatal("cwrite failure: %r"); + return; + } + for(prev = meta; prev; ) { + if(getmetaint(-1, prev, "qhnext", &meta) == MTnone) + meta = 0; + if(meta == victim) { + if(getmetaint(-1, victim, "qhnext", &next) == MTnone) + next = 0; + setmetaint(prev, "qhnext", nil, next); + return; + } + prev = meta; + } +} + +static char hstatbuf[1024]; + +char * +prhstat(void) +{ + char *p, *e; + + p = hstatbuf; + e = p + nelem(hstatbuf); + p = seprint(p, e, "Hash stats:\n"); + p = seprint(p, e, "np2q: %ulld\n", np2q); + p = seprint(p, e, "p2qcoll: %ud\n", p2qcoll); + p = seprint(p, e, "maxp2q: %ud\n", maxp2q); + p = seprint(p, e, "nq2m: %ulld\n", nq2m); + p = seprint(p, e, "q2mcoll: %ud\n", q2mcoll); + seprint(p, e, "maxq2m: %ud\n", maxq2m); + return hstatbuf; +} + +void +showphash(int fd, char *path) +{ + uvlong hlist; + ulong bucket; + + bucket = pathhash(path); + cread(&hlist, sizeof(uvlong), BlkSize + bucket * sizeof(uvlong)); + fprint(fd, "%s: bucket:%uld hlist:%ulld\n", path, bucket, hlist); +} + +void +fixpaths(int fd) +{ + PQMap *pq, *pend; + uvlong *hb; + uchar *p; + char *path; + uvlong hlist, next; + ulong bucket; + int i, j; + + fprint(fd, "Checking for dangling path names\n"); + for(bucket = 0, i = 0; i < super.nhashblk; ++i) { + hb = cbread(i + 1); + for(j = 0; j < BlkSize / sizeof(uvlong) && bucket < super.nht; ++j, ++bucket) { + if(bucket % 100000 == 0) + fprint(fd, "."); +restart: + hlist = hb[j]; + while(hlist) { + p = cbread(hlist); + if(p == nil) { + fprint(fd, "hlist block read failure in fixpaths\n"); + return; + } + pend = (PQMap *)(p + BlkSize); + --pend; + for(pq = (PQMap *)p; pq < pend && pq->qpath != 0; pq = nextpq(pq)) { + if(q2m(-1, pq->qpath, 0) == 0) { + path = θmalloc(pq->plen + 1); + memmove(path, pq->pname, pq->plen); + fprint(fd, "removing dangling path %s\n", path); + rmp(path); + free(path); + brelease(hlist); + goto restart; + } + } + next = *((uvlong *)(p + BlkSize - sizeof(uvlong))); + brelease(hlist); + hlist = next; + } + } + brelease(i + 1); + } +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:32 2014 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +uvlong +devsize(char *dev) +{ + Dir *d; + uvlong len; + + d = dirstat(dev); + if(d == nil) + return ~0ULL; + len = d->length; + free(d); + return len; +} + +int +threadpid(int dummy) +{ + return getpid(); +} + +static void +startcons(void *x) +{ + int *pfd; + char *ns, *path; + int acfd, lcfd; + char adir[40], ldir[40]; + + pfd = x; + ns = getns(); + path = smprint("unix!%s/thetafscons", ns); + acfd = announce(path, adir); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(lcfd < 0) + break; + pfd[0] = lcfd; + pfd[1] = lcfd; + docons(nil); + } + threadexits(nil); +} + +void +conspost(int cfd[], int pfd[]) +{ + threadcreate(startcons, pfd, 8192); + if(post9pservice(cfd[0], "thetafsctl", nil) < 0) + fprint(2, "post9pservice failed:%r\n"); +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:31 2014 @@ -0,0 +1,1293 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +enum { + GMperBlk = BlkSize / sizeof(GMeta), +}; + +static void freeblob(uvlong); + +static uvlong nget; +static uvlong nrm; +static uvlong nset; +static uvlong nalloc; +static uvlong nfree; +static uvlong nmiss; +static QLock mlock; +static QLock alock; +static QLock block; +static GMeta *mbuf; +static uvlong mblk; + +void +reammeta(int fd) +{ + GMeta *mp; + Blob *bp; + char *p, *e; + int i, j, n; + + /* First the GMeta structures */ + p = θmalloc(16 * BlkSize); + e = p + 16 * BlkSize; + mp = (GMeta *)p; + mp->next = 0; + mp->type = MTistring; + strcpy(mp->name, "invalid"); + strcpy(mp->m.str, "errnt"); + for(++mp, j = 2; (char *)mp < e; ++mp, ++j) + mp->next = j; + pwrite(fd, p, 16 * BlkSize, super.firstmeta * BlkSize); + memset(p, 0, sizeof(GMeta)); + for(i = 16; i < super.nmeta; i += 16) { + n = super.nmeta - i; + if(n > 16) + n = 16; + for(mp = (GMeta *)p; (char *)mp < e; ++mp, ++j) + mp->next = j; +if((i/16) % 10 == 0) fprint(2, ","); + pwrite(fd, p, n * BlkSize, (super.firstmeta + i) * BlkSize); + } + /* Now the string/blob pool */ + for(i = 0; i < super.nblob; i += 16) { + n = super.nblob - i; + if(n > 16) + n = 16; + memset(p, 0, n * BlkSize); + for(j = 0; j < n; ++j) { + bp = (Blob *)(p + j * BlkSize); + bp->len = 0x8000 | (BlkSize/2 - sizeof(short)); + bp->next = (super.firstblob + i + j) * BlkSize + BlkSize/2; + bp = (Blob *)(p + j * BlkSize + BlkSize/2); + bp->len = 0x8000 | (BlkSize/2 - sizeof(short)); + if(i + j + 1 < super.nblob) + bp->next = (super.firstblob + i + j + 1) * BlkSize; + } +if((i/16) % 10 == 0) fprint(2, ";"); + pwrite(fd, p, n * BlkSize, (super.firstblob + i) * BlkSize); + } + free(p); +} + +void +resetmeta(void) +{ + qlock(&mlock); + if(mbuf) { + brelease(mblk); + mbuf = nil; + mblk = 0; + } + qunlock(&mlock); +} + +static int +getmstruct(int fd, GMeta *buf, uvlong idx) +{ + uvlong off, blk; + + if(idx > super.nmeta * (BlkSize / sizeof(GMeta))) { + fprint(2, "Invalid metadata index: %ulld\n", idx); + return -1; + } + if(fd != -1) + return spread(fd, buf, sizeof(GMeta), idx * sizeof(GMeta) + super.firstmeta * BlkSize); + blk = idx / GMperBlk + super.firstmeta; + off = idx % GMperBlk; + qlock(&mlock); + if(blk != mblk) { + ++nmiss; + if(mbuf) + brelease(mblk); + mbuf = cbread(blk); + mblk = blk; + } + memmove(buf, mbuf + off, sizeof(GMeta)); + qunlock(&mlock); + return sizeof(GMeta); +} + +static int +savemstruct(GMeta *buf, uvlong idx) +{ + uvlong off, blk; + + if(idx > super.nmeta * (BlkSize / sizeof(GMeta))) { + fprint(2, "Invalid metadata index: %ulld\n", idx); + return -1; + } + blk = idx / GMperBlk + super.firstmeta; + off = idx % GMperBlk; + qlock(&mlock); + if(blk != mblk) { + ++nmiss; + if(mbuf) + brelease(mblk); + mbuf = cbread(blk); + mblk = blk; + } + memmove(mbuf + off, buf, sizeof(GMeta)); + cbwrite(blk); + qunlock(&mlock); + return sizeof(GMeta); +} + +static uvlong +allocmeta(GMeta *buf) +{ + uvlong nmeta; + + qlock(&alock); + if(super.ffmeta == 0) { + fprint(2, "Out of metadata space!\n"); + qunlock(&alock); + return 0; + } + if(getmstruct(-1, buf, super.ffmeta) < 0) { + qunlock(&alock); + return 0; + } + ++nalloc; + nmeta = super.ffmeta; + super.ffmeta = buf->next; + savesuper(); + qunlock(&alock); + return nmeta; +} + +static void +freemeta(uvlong idx) +{ + GMeta buf; + + qlock(&alock); + if(getmstruct(-1, &buf, idx) < 0) { + qlock(&alock); + return; + } + if(buf.type == MTstring || buf.type == MTblob) + freeblob(buf.m.val); + ++nfree; + memset(&buf, 0, sizeof(GMeta)); + buf.next = super.ffmeta; + super.ffmeta = idx; + savesuper(); + savemstruct(&buf, idx); + qunlock(&alock); +} + +/* +static void +insmeta(GMeta *buf, uvlong idx, uvlong after) +{ + GMeta abuf; + + if(getmstruct(-1, &abuf, after) < 0) + return; + buf->next = abuf.next; + abuf.next = idx; + savemstruct(&abuf, after); + savemstruct(buf, idx); +} +*/ + +static Blob * +getbstruct(int fd, uvlong bp) +{ + Blob *b; + void *a; + uvlong blk; + ulong off, m; + + blk = bp / BlkSize; + if(blk < super.firstblob || blk >= super.firstblob + super.nblob) + return nil; + off = bp % BlkSize; + m = BlkSize - off; + if(m > 32768) + m = 32768; + if(fd == -1) + b = (Blob *)((char *)cbread(blk) + off); + else { + b = θmalloc(m); + spread(fd, b, m, bp); + } + m = b->len & 0x7fff; + a = θmalloc(m + sizeof(short)); + memmove(a, b, m + sizeof(short)); + if(fd == -1) + brelease(blk); + else + free(b); + return a; +} + +static int +savebstruct(Blob *b, uvlong bp) +{ + return cwrite(b, b->len & 0x7fff, bp); +} + +static Blob * +allocblob(int n, uvlong *bp) +{ + Blob *cb, *pb, *nb; + uvlong cur, prev; + int an; + + an = (n + BlobQuan - 1) & ~(BlobQuan - 1); + prev = 0; + cb = nil; + pb = nil; + qlock(&block); + for(cur = super.ffblob; cur; ) { + cb = getbstruct(-1, cur); + if(cb == nil) { + qunlock(&block); + return nil; + } + if((cb->len & 0x7fff) >= an) + break; + pb = cb; + prev = cur; + cur = cb->next; + } + if(cur == 0) { + qunlock(&block); + free(cb); + fprint(2, "No free blobs\n"); + return nil; + } + if((cb->len & 0x7fff) >= an + BlobQuan) { + if(prev != 0) { + pb->next = cur + an; + savebstruct(pb, prev); + } + else { + super.ffblob = cur + an; + savesuper(); + } + nb = (Blob *)((char *)cb + an); + nb->len = cb->len - an; + nb->next = cb->next; + savebstruct(nb, cur + an); + cb->len = an - sizeof(short); + } + else { + if(prev != 0) { + pb->next = cb->next; + savebstruct(pb, prev); + } + else { + super.ffblob = cb->next; + savesuper(); + } + cb->len &= 0x7fff; + } + qunlock(&block); + free(pb); + if(bp) + *bp = cur; + return cb; +} + +/* + * This is a pretty gross hack, and probably oversimplified. + * However, I'm not real happy with this part and may redo + * it anyway. When freeing a blob (or long string) in the pool, + * we don't attempt to coalesce them, we just add it on to + * the end of the list. Until we've done enough allocations + * to use up all the pool space once, we don't really care. + * Because the most common use for the pool space is strings + * and the quantum is set to 64, I suspect nearly all requests + * will be satisfied with a single quantum and coalescing + * wouldn't have significant benefit anyway. So there's + * the excuse for taking what is probably too simple an + * approach. + */ +static void +freeblob(uvlong bp) +{ + Blob *b, *b2; + + qlock(&block); + b = getbstruct(-1, bp); + b->len |= 0x8000; + b->next = 0; + savebstruct(b, bp); + b2 = getbstruct(-1, super.lfblob); + if(b2) { + b2->next = bp; + savebstruct(b2, super.lfblob); + super.lfblob = bp; + savesuper(); + } + else + fprint(2, "Unexpected failure to read super.lfblob\n"); + qunlock(&block); +} + +void * +getblob(int fd, uvlong bp, int *n) +{ + Blob *b; + void *a; + + b = getbstruct(fd, bp); + if(b == nil) + return nil; + if(b->len & 0x8000) { + free(b); + return nil; + } + if(n) + *n = b->len; + a = θmalloc(b->len); + memmove(a, b->data, b->len); + free(b); + return a; +} + +uvlong +setblob(void *blob, int n, uvlong bp) +{ + Blob *b; + uvlong nbp; + + b = getbstruct(-1, bp); + if(b) { + if(b->len == n) { + memmove(b->data, blob, n); + savebstruct(b, bp); + return bp; + } + else { + b->len |= 0x8000; + savebstruct(b, bp); + } + } + b = allocblob(n, &nbp); + if(b == nil) + return 0; + memmove(b->data, blob, n); + savebstruct(b, nbp); + free(b); + return nbp; +} + +int +getmeta(int fd, uvlong stidx, char *name, MVal *val) +{ + GMeta buf; + uvlong next; + + ++nget; + for(next = stidx; next; ) { + if(getmstruct(fd, &buf, next) < 0) + return MTnone; + if(strcmp(name, buf.name) == 0) + break; + next = buf.next; + } + if(next == 0) + return MTnone; + *val = buf.m; + return buf.type; +} + +int +getmetaint(int fd, uvlong stidx, char *name, uvlong *val) +{ + MVal x; + int typ; + + typ = getmeta(fd, stidx, name, &x); + switch(typ) { + case MTint: + *val = x.val; + return typ; + default: + return MTnone; + } +} + +char * +getmetastr(int fd, uvlong stidx, char *name) +{ + MVal x; + char *p; + int typ; + + typ = getmeta(fd, stidx, name, &x); + switch(typ) { + case MTistring: + p = estrdup9p(x.str); + return p; + case MTstring: + return getblob(fd, x.val, nil); + default: + return nil; + } +} + +uvlong +setmeta(uvlong stidx, char *name, char *newname, uvlong val, int type) +{ + GMeta buf, nbuf; + uvlong next, last, nmeta; + + ++nset; + last = 0; + for(next = stidx; next; ) { + if(getmstruct(-1, &buf, next) < 0) + return 0; + last = next; + if(strcmp(name, buf.name) == 0) { + if(type == MTistring) { + if(buf.type == MTstring) + freeblob(buf.m.val); + strcpy(buf.m.str, (char *)val); + } + else + buf.m.val = val; + buf.type = type; /* in case we're changing the string length */ + if(newname) + strcpy(buf.name, newname); + savemstruct(&buf, last); + return last; + } + next = buf.next; + } + nmeta = allocmeta(&nbuf); + if(nmeta == 0) + return 0; + if(last == 0) + nbuf.next = 0; + else { + nbuf.next = buf.next; + buf.next = nmeta; + savemstruct(&buf, last); + } + nbuf.type = type; + if(newname) + strcpy(nbuf.name, newname); + else + strcpy(nbuf.name, name); + if(type == MTistring) + strcpy(nbuf.m.str, (char *)val); + else + nbuf.m.val = val; + savemstruct(&nbuf, nmeta); + return nmeta; +} + +uvlong +setmetaint(uvlong stidx, char *name, char *newname, uvlong val) +{ + return setmeta(stidx, name, newname, val, MTint); +} + +uvlong +setmetastr(uvlong stidx, char *name, char *newname, char *s, uvlong bp) +{ + uvlong nbp; + int n; + + n = strlen(s); + if(n <= 7) { + return setmeta(stidx, name, newname, (uvlong)s, MTistring); + } + nbp = setblob(s, n + 1, bp); + return setmeta(stidx, name, newname, nbp, MTstring); +} + +void +setmstruct(uvlong idx, uvlong next, char *name, int type, uvlong val) +{ + GMeta mb; + + if(getmstruct(-1, &mb, idx) < 0) + return; + mb.next = next; + strcpy(mb.name, name); + mb.type = type; + mb.m.val = val; + savemstruct(&mb, idx); +} + +uvlong +setmetablob(uvlong stidx, char *name, char *newname, uchar *blob, int n, uvlong bp) +{ + uvlong nbp; + + nbp = setblob(blob, n, bp); + return setmeta(stidx, name, newname, nbp, MTblob); +} + +uvlong +rmmeta(uvlong midx, uvlong victim) +{ + GMeta buf, vbuf; + uvlong next; + + ++nrm; + if(getmstruct(-1, &vbuf, victim) < 0) + return midx; + if(midx == victim) { + next = vbuf.next; + freemeta(victim); + return next; + } + for(next = midx; next; ) { + if(getmstruct(-1, &buf, next) < 0) + return midx; + if(buf.next == victim) { + buf.next = vbuf.next; + freemeta(victim); + savemstruct(&buf, next); + return midx; + } + next = vbuf.next; + } + return midx; +} + +void +rmmlist(uvlong midx) +{ + GMeta buf; + uvlong next; + + ++nrm; + next = midx; + while(next) { + if(getmstruct(-1, &buf, next) < 0) + return; + freemeta(next); + next = buf.next; + } +} + +static uvlong +promote1(uvlong midx, uvlong dblk, int) +{ + uvlong *p; + uvlong nblk; + + nblk = allocblock(); + if(nblk == 0) + return 0; + p = cbclean(nblk); + p[0] = dblk; + cbwrite(nblk); + setmetaint(midx, "dblock", "index", nblk); + brelease(nblk); + return nblk; +} + +static uvlong +promote2(uvlong midx, uvlong iblk, int) +{ + uvlong *p; + uvlong nblk; + + nblk = allocblock(); + if(nblk == 0) + return 0; + p = cbclean(nblk); + p[0] = iblk; + cbwrite(nblk); + setmetaint(midx, "index", "indirect", nblk); + brelease(nblk); + return nblk; +} + +static uvlong +promote3(uvlong midx, uvlong iblk, int levels) +{ + uvlong *p; + char *name; + uvlong nblk; + + if(levels == 1) { + nblk = allocblock(); + if(nblk == 0) + return 0; + p = cbclean(nblk); + p[0] = iblk; + cbwrite(nblk); + brelease(nblk); + iblk = nblk; + } + nblk = allocblock(); + if(nblk == 0) + return 0; + p = cbclean(nblk); + p[0] = iblk; + cbwrite(nblk); + if(levels == 1) + name = "index"; + else + name = "indirect"; + setmetaint(midx, name, "dblindir", nblk); + brelease(nblk); + return nblk; +} + +static uvlong +doindir(int fd, uvlong iblk, int off, int allocate) +{ + uvlong *p; + uvlong pblk; + + if(iblk < super.firstdat || iblk >= super.nblk) + return 0; + if(fd != -1) + spread(fd, &pblk, sizeof(uvlong), iblk * BlkSize + off * sizeof(uvlong)); + else { + p = cbread(iblk); + pblk = p[off]; + if(pblk == 0) { + if(allocate) { + pblk = allocblock(); + if(pblk == 0) + return 0; + p[off] = pblk; + cbwrite(iblk); + cbclean(pblk); + cbwrite(pblk); + brelease(pblk); + } + } + brelease(iblk); + } + return pblk; +} + +uvlong +locate(int fd, uvlong midx, uvlong vblk, int allocate) +{ + uvlong *p; + uvlong iblk, pblk; + ulong pperb; + int levels, l1off, l2off, l3off; + + if(getmetaint(fd, midx, "dblindir", &iblk) == MTint) + levels = 3; + else if(getmetaint(fd, midx, "indirect", &iblk) == MTint) + levels = 2; + else if(getmetaint(fd, midx, "index", &iblk) == MTint) + levels = 1; + else if(getmetaint(fd, midx, "dblock", &iblk) == MTint) + levels = 0; + else + return 0; + pperb = BlkSize / sizeof(uvlong); + l1off = vblk % pperb; + l2off = (vblk / pperb) % pperb; + l3off = vblk / (pperb * pperb); + if(levels < 3 && l3off != 0) { + iblk = promote3(midx, iblk, levels); + levels = 3; + } + else if(levels < 2 && l2off != 0) { + iblk = promote2(midx, iblk, levels); + levels = 2; + } + else if(levels < 1 && l1off > 0) { + iblk = promote1(midx, iblk, levels); + levels = 1; + } + pblk = 0; + switch(levels) { + case 3: + iblk = doindir(fd, iblk, l3off, allocate); + case 2: + iblk = doindir(fd, iblk, l2off, allocate); + case 1: + if(iblk == 0) + return 0; + p = cbread(iblk); + pblk = p[l1off]; + if(pblk == 0 && allocate) { + pblk = allocblock(); + cbclean(pblk); + p[l1off] = pblk; + cbwrite(pblk); + brelease(pblk); + cbwrite(iblk); + } + brelease(iblk); + break; + case 0: + pblk = iblk; + if(pblk == 0 && allocate) { + pblk = allocblock(); + cbclean(pblk); + setmetaint(midx, "dblock", nil, pblk); + cbwrite(pblk); + brelease(pblk); + } + break; + } + if(pblk < super.firstdat || pblk >= super.nblk) { + fprint(2, "Bogus block number found in locate: index:%ulld\n", iblk); + return 0; + } + return pblk; +} + +void +freedata(uvlong midx) +{ + uvlong *index1, *index2, *index3; + uvlong iblk; + int i, j, k; + + if(getmetaint(-1, midx, "dblindir", &iblk) == MTint) { + if(iblk == 0) + return; + if(iblk < super.firstdat) { + fprint(2,"Bogus dblindir block in freedat: %ulld\n", iblk); + return; + } + index3 = cbread(iblk); + for(i = 0; i < BlkSize / sizeof(uvlong); ++i) { + if(index3[i] >= super.firstdat && index3[i] < super.nblk) { + index2 = cbread(index3[i]); + for(j = 0; j < BlkSize / sizeof(uvlong); ++j) { + if(index2[j] >= super.firstdat && index2[j] < super.nblk) { + index1 = cbread(index2[j]); + for(k = 0; k < BlkSize / sizeof(uvlong); ++k) + if(index1[k] != 0) + freeblock(index1[k]); + brelease(index2[j]); + freeblock(index2[j]); + } + } + brelease(index3[i]); + freeblock(index3[i]); + } + } + brelease(iblk); + freeblock(iblk); + } + else if(getmetaint(-1, midx, "indirect", &iblk) == MTint) { + if(iblk == 0) + return; + if(iblk < super.firstdat) { + fprint(2, "Bogus indirect block in freedat: %ulld\n", iblk); + return; + } + index2 = cbread(iblk); + for(i = 0; i < BlkSize / sizeof(uvlong); ++i) { + if(index2[i] >= super.firstdat && index2[i] < super.nblk) { + index1 = cbread(index2[i]); + for(j = 0; j < BlkSize / sizeof(uvlong); ++j) + if(index1[j] != 0) + freeblock(index1[j]); + brelease(index2[i]); + freeblock(index2[i]); + } + } + brelease(iblk); + freeblock(iblk); + } + else if(getmetaint(-1, midx, "index", &iblk) == MTint) { + if(iblk == 0) + return; + if(iblk < super.firstdat) { + fprint(2, "Bogus index block in freedat: %ulld\n", iblk); + return; + } + index1 = cbread(iblk); + for(i = 0; i < BlkSize / sizeof(uvlong); ++i) + if(index1[i] != 0) + freeblock(index1[i]); + brelease(iblk); + freeblock(iblk); + } +} + +void +prmeta(int fd, uvlong qpath) +{ + GMeta buf; + char *p; + uvlong meta, next; + int i, n; + + meta = q2m(-1, qpath, 0); + if(meta == 0) { + fprint(fd, "no metadata\n"); + return; + } + for(next = meta; next; ) { + if(getmstruct(-1, &buf, next) < 0) + break; + switch(buf.type) { + case MTnone: + break; + case MTint: + fprint(fd, "%s: %ulld(%016ullx)\n", buf.name, buf.m.val, buf.m.val); + break; + case MTistring: + fprint(fd, "%s: %s\n", buf.name, buf.m.str); + break; + case MTstring: + p = getblob(-1, buf.m.val, nil); + fprint(fd, "%s: %s\n", buf.name, p); + free(p); + break; + case MTblob: + fprint(fd, "%s:", buf.name); + p = getblob(-1, buf.m.val, &n); + for(i = 0; i < n; ++i) + fprint(fd, " %02x", p[i]); + fprint(fd, "\n"); + free(p); + break; + } + next = buf.next; + } +} + +static char mstatbuf[1024]; + +char * +prmstat(void) +{ + char *p, *e; + + p = mstatbuf; + e = p + nelem(mstatbuf); + p = seprint(p, e, "Metadata stats:\n"); + p = seprint(p, e, "getmeta calls: %ulld\n", nget); + p = seprint(p, e, "setmeta calls: %ulld\n", nset); + p = seprint(p, e, "rmmeta calls: %ulld\n", nrm); + p = seprint(p, e, "alloc calls: %ulld\n", nalloc); + p = seprint(p, e, "free calls: %ulld\n", nfree); + seprint(p, e, "misses: %ulld\n", nmiss); + return mstatbuf; +} + +static uvlong +qoffset(ulong bucket) +{ + return BlkSize * (super.nhashblk + 1) + bucket * sizeof(uvlong); +} + +void +recovermeta(int fd) +{ + GMeta mb; + uvlong midx1, midx2, qhnext, qb, n; + int saidit; + + /* First set all the marker flags */ + fprint(fd, "Setting flags\n"); + for(midx1 = 1; midx1 < super.nmeta * GMperBlk; ++midx1) { + getmstruct(-1, &mb, midx1); + mb.type |= 0x80; + savemstruct(&mb, midx1); + } + + /* Go through all the q2m hash table and mark referenced ones in use */ + fprint(fd, "Marking ones referenced from QID hash table\n"); + for(qb = 0; qb < super.nht; ++qb) { + cread(&midx1, sizeof(uvlong), qoffset(qb)); + qhnext = 0; + n = 0; + saidit = 0; + while(midx1 != 0) { + ++n; + getmstruct(-1, &mb, midx1); + mb.type &= 0x7f; + if(mb.type == MTnone && !saidit) { + fprint(fd, "Unexpected null metadatum at %ulld in bucket %ulld\n", midx1, qb); + saidit = 1; + } + midx2 = midx1; + midx1 = mb.next; + if(strcmp(mb.name, "qhnext") == 0 && mb.m.val != 0) { + qhnext = mb.m.val; + fprint(fd, "Warning, QID collision qb:%ulld midx:%ulld\n", qb, midx2); + } + savemstruct(&mb, midx2); + if(midx1 == 0) { + midx1 = qhnext; + qhnext = 0; + } + } + if(n > 128) + fprint(fd, "Unexpected large list at qb %ulld, size %ulld\n", qb, n); + } + + fprint(fd, "Sizing old free list\n"); + n = 0; + for(midx1 = super.ffmeta; midx1 != 0; ) { + ++n; + if(n >= super.nmeta * GMperBlk) { + fprint(fd, "Cycle in old free list?\n"); + break; + } + getmstruct(-1, &mb, midx1); + midx1 = mb.next; + } + fprint(fd, "Old free list has %ulld structures\n", n); + + /* Reclaim the free ones */ + fprint(fd, "Rebuilding free list\n"); + n = 0; + super.ffmeta = 0; + for(midx1 = super.nmeta * GMperBlk - 1; midx1 != 0; --midx1) { + getmstruct(-1, &mb, midx1); + if(mb.type & 0x80) { + memset(&mb, 0, sizeof(GMeta)); + mb.next = super.ffmeta; + super.ffmeta = midx1; + savemstruct(&mb, midx1); + ++n; + } + } + savesuper(); + fprint(fd, "Recovered %ulld free metadata structures\n", n); +} + +static int +markinuse(char *shadow, uvlong blk) +{ + long byt; + int bit, old; + + if(blk < super.firstdat || blk >= super.nblk) + return -1; + byt = blk / 8; + bit = blk % 8; + old = shadow[byt] & (1 << bit); + shadow[byt] &= ~(1 << bit); + return old; +} + +/* Dealing with humans always makes the code ugly. */ +static char *idxnames[] = {"data", "index", "indirect", "dblindir"}; + +static void +chkidxalloc(int fd, char *shadow, uvlong blk, uvlong midx, int lev) +{ + uvlong *iblk; + int i; + + if(blk == 0) + return; + switch(markinuse(shadow, blk)) { + case -1: + fprint(fd, "Bogus %s block: %ulld metadataum %ulld\n", idxnames[lev+1], blk, midx); + return; + case 0: + fprint(fd, "Doubly allocated %s block: %ulld meta %ulld\n", idxnames[lev+1], blk, midx); + break; + } + iblk = cbread(blk); + if(iblk == nil) { + fprint(fd, "unexpected error reading block %ulld\n", blk); + return; + } + for(i = 0; i < BlkSize / sizeof(uvlong); ++i) { + if(iblk[i] == 0) + continue; + if(lev > 0) + chkidxalloc(fd, shadow, iblk[i], midx, lev - 1); + else { + switch(markinuse(shadow, iblk[i])) { + case -1: + fprint(fd, "Bogus %s block: %ulld in %s block %ulld meta %ulld\n", + idxnames[lev], iblk[i], idxnames[lev+1], blk, midx); + i = BlkSize / sizeof(uvlong); + break; + case 0: + fprint(fd, "Doubly allocated %s block: %ulld in %s block %ulld meta %ulld\n", + idxnames[lev], iblk[i], idxnames[lev+1], blk, midx); + break; + } + } + } + brelease(blk); +} + +void +checkalloc(int fd) +{ + GMeta mb; + uvlong *hblk; + char *shadow, *fb; + uvlong idx; + long i; + int j, k, l; + + fprint(fd, "Initializing shadow free map\n"); + shadow = θmalloc(super.nfreemap * BlkSize); + for(i = super.firstdat; i < super.nblk; ++i) + shadow[i/8] |= 1 << (i % 8); + for(i = 0, j = 0; j < super.nhashblk; ++j) { + hblk = cbread(j + 1); + for(k = 0; k < BlkSize / sizeof(uvlong) && i < super.nht; ++k, ++i) { + if(i % 100000 == 0) + fprint(fd, "."); + idx = hblk[k]; + while(idx != 0) { + switch(markinuse(shadow, idx)) { + case -1: + fprint(fd, "Bogus block number in hash list for bucket %ld\n", i); + idx = 0; + break; + case 0: + fprint(fd, "Doubly allocated block in hash table: %ulld bucket %ld\n", idx, i); + default: + if(cread(&idx, sizeof(uvlong), idx * BlkSize + (BlkSize - sizeof(uvlong))) < 0) { + fprint(fd, "Error reading bucket next link: %ld %ulld\n", i, idx); + idx = 0; + } + if(idx != 0) + fprint(fd, ","); + break; + } + } + } + brelease(j + 1); + } + fprint(fd, "Scanning metadata\n"); + for(idx = 1; idx < super.nmeta * GMperBlk; ++idx) { + if(idx % 100000 == 0) + fprint(fd, "."); + getmstruct(-1, &mb, idx); + if(mb.type != MTint) + continue; + if(strcmp(mb.name, "index") == 0) + chkidxalloc(fd, shadow, mb.m.val, idx, 0); + else if(strcmp(mb.name, "indirect") == 0) + chkidxalloc(fd, shadow, mb.m.val, idx, 1); + else if(strcmp(mb.name, "dblindir") == 0) + chkidxalloc(fd, shadow, mb.m.val, idx, 2); + } + fprint(fd, "Comparing to on-disk free map\n"); + l = 0; + for(j = 0; j < super.nfreemap; ++j) { + fb = cbread(super.freemap + j); + for(k = 0; k < BlkSize; ++k) { + if(fb[k] != shadow[j*BlkSize+k]) { + if(++l < 10) /* don't flood the output with too many */ + fprint(fd, "%d:%02ux-%02ux\n", j*BlkSize+k, fb[k], (uchar)shadow[j*BlkSize+k]); + if((fb[k] & shadow[j*BlkSize+k]) != fb[k]) { + fprint(fd, "Marking in use\n"); + fb[k] &= shadow[j*BlkSize + k]; + cbwrite(super.freemap + j); + } + } + } + brelease(super.freemap + j); + } + free(shadow); +} + +void +mprint(int fd, uvlong idx) +{ + GMeta mb; + char *p; + int i, n; + + getmstruct(-1, &mb, idx); + switch(mb.type) { + case MTnone: + fprint(fd, "Meta:%ulld name:%s type:none next:%ulld val:%ulld\n", idx, mb.name, mb.next, mb.m.val); + break; + case MTint: + fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld val:%ulld(%016ullx)\n", idx, mb.name, mb.type, mb.next, mb.m.val, mb.m.val); + break; + case MTistring: + fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld val:%s\n", idx, mb.name, mb.type, mb.next, mb.m.str); + break; + case MTstring: + p = getblob(-1, mb.m.val, nil); + fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld val:(%ulld)%s\n", idx, mb.name, mb.type, mb.next, mb.m.val, p); + free(p); + break; + case MTblob: + fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld ", idx, mb.name, mb.type, mb.next); + p = getblob(-1, mb.m.val, &n); + for(i = 0; i < n; ++i) + fprint(fd, " %02x", p[i]); + fprint(fd, "\n"); + free(p); + break; + default: + fprint(fd, "unknown Meta:%ulld type%d next%ulld\n", idx, mb.type, mb.next); + break; + } +} + +void +mpred(int fd, uvlong idx) +{ + GMeta mb; + uvlong i; + + for(i = 1; i < super.nmeta * GMperBlk; ++i) { + getmstruct(-1, &mb, i); + if(mb.type != MTnone && mb.next == idx) { + fprint(fd, "Meta:%ulld predecessor:%ulld\n", idx, i); + mprint(fd, i); + return; + } + } +} + +static void +idxuse(int fd, uvlong iblk, uvlong blk, uvlong midx, int lev) +{ + uvlong *bp; + int i; + + if(iblk == 0) + return; + if(iblk == blk) + fprint(fd, "%s entry meta: %ulld\n", idxnames[lev+1], midx); + bp = cbread(iblk); + if(bp == nil) { + fprint(fd, "error reading block %ulld\n", iblk); + return; + } + for(i = 0; i < BlkSize / sizeof(uvlong); ++i) { + if(bp[i] == blk) + fprint(fd, "%s block in %s block %ulld in meta %ulld\n", idxnames[lev], idxnames[lev+1], iblk, midx); + if(lev > 0) + idxuse(fd, bp[i], blk, midx, lev - 1); + } + brelease(iblk); +} + +void +blockuse(int fd, uvlong blk) +{ + GMeta mb; +// PQMap *pq, *pend; +// char *p; + uvlong /* hlist, */ midx; +// long i; + + if(blk == 0) { + fprint(fd, "superblock\n"); + return; + } + if(blk < super.nhashblk + 1) { + fprint(fd, "P2Q hash table\n"); + return; + } + if(blk < 2 * super.nhashblk + 1) { + fprint(fd, "Q2M hash table\n"); + return; + } + if(blk >= super.freemap && blk < super.freemap + super.nfreemap) { + fprint(fd, "free bitmap\n"); + return; + } + if(blk >= super.firstmeta && blk < super.firstmeta + super.nmeta) { + fprint(fd, "metadata structure pool\n"); + return; + } + if(blk >= super.firstblob && blk < super.firstblob + super.nblob) { + fprint(fd, "string/blob pool\n"); + return; + } +#ifdef NOTDEF + for(i = 0; i < super.nht; ++i) { + if(i % 100000 == 0) + fprint(fd, "."); + if(cread(&hlist, sizeof(uvlong), BlkSize + i * sizeof(uvlong)) < 0) { + fprint(fd, "Error reading bucket %ld\n", i); + continue; + } + if(hlist == blk) + fprint(fd, "P2Q hash bucket %ld\n", i); + while(hlist != 0) { + p = cbread(hlist); + if(p == nil) { + fprint(fd, "Error reading hash list block %ulld\n", hlist); + break; + } + pend = (PQMap *)(p + BlkSize); + --pend; +// for(pq = (PQMap *)p; pq < pend && pq->qpath != 0; pq = nextpq(pq)) { +// } + } + } +#endif + for(midx = 1; midx < super.nmeta * GMperBlk; ++midx) { + getmstruct(-1, &mb, midx); + if(mb.type != MTint) + continue; + if(mb.m.val == 0) + continue; + if(strcmp(mb.name, "index") == 0) + idxuse(fd, mb.m.val, blk, midx, 0); + else if(strcmp(mb.name, "indirect") == 0) + idxuse(fd, mb.m.val, blk, midx, 1); + else if(strcmp(mb.name, "dblindir") == 0) + idxuse(fd, mb.m.val, blk, midx, 2); + } +} + +void +fixfamilies(int fd) +{ + GMeta mb; + uvlong midx; + + for(midx = 1; midx < super.nmeta * GMperBlk; ++midx) { + if(getmstruct(-1, &mb, midx) < 0) + continue; + if(mb.type != MTint) + continue; + if(strcmp(mb.name, "child") != 0 && strcmp(mb.name, "sib") != 0) + continue; + if(mb.m.val == 0) + continue; + if(q2m(-1, mb.m.val, 0) == 0) { + fprint(fd, "clearing dangling %s:%ulld in meta struct %ulld\n", mb.name, mb.m.val, midx); + mb.m.val = 0; + savemstruct(&mb, midx); + } + } +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:30 2014 @@ -0,0 +1,34 @@ + +#include +#include +#include +#include <9p.h> +#include +#include "dat.h" + +/* RPC -- RFC 1057 */ +enum { + AUTH_NULL = 0, + AUTH_UNIX, + AUTH_SHORT, + AUTH_DES, + + CALL = 0, + REPLY, + + MSG_ACCEPTED = 0, + MSG_DENIED, + + SUCCESS = 0, + PROG_UNAVAIL, + PROG_MISMATCH, + PROC_UNAVAIL, + GARBAGE_ARGS, + + RPC_MISMATCH = 0, + AUTH_ERROR, + + AUTH_BADCRED = 1, + AUTH_REJECTEDCRED, + AUTH_BADVERF, + AUTH_REJECTEDVERF, + AUTH_TOOWEAK, + + PMAP_PROG = 100000, + PMAP_VERS = 2, + PMAP_PORT = 111, + IPPROTO_TCP = 6, + IPPROTO_UDP = 17, + + PMAPPROC_NULL = 0, + PMAPPROC_SET, + PMAPPROC_UNSET, + PMAPPROC_GETPORT, + PMAPPROC_DUMP, + PMAPPROC_CALLIT, +}; + +/* NFSv3 -- RFC 1813 */ +enum { + NFS_PROG = 100003, + NFS_VERS = 3, + NFS_PORT = 2049, + + NFS3_FHSIZE = 64, + NFS3_COOKIEVERFSIZE = 8, + NFS3_CREATEVERFSIZE = 8, + NFS3_WRITEVERFSIZE = 8, + + NFS3_OK = 0, + NFS3ERR_PERM, + NFS3ERR_NOENT, + NFS3ERR_IO = 5, + NFS3ERR_NXIO, + NFS3ERR_ACCES = 13, + NFS3ERR_EXIST = 17, + NFS3ERR_XDEV, + NFS3ERR_NODEV, + NFS3ERR_NOTDIR, + NFS3ERR_ISDIR, + NFS3ERR_INVAL, + NFS3ERR_FBIG = 27, + NFS3ERR_NOSPC, + NFS3ERR_ROFS = 30, + NFS3ERR_MLINK, + NFS3ERR_NAMETOOLONG = 63, + NFS3ERR_NOTEMPTY = 66, + NFS3ERR_DQUOT = 69, + NFS3ERR_STALE, + NFS3ERR_REMOTE, + NFS3ERR_BADHANDLE = 10001, + NFS3ERR_NOT_SYNC, + NFS3ERR_BAD_COOKIE, + NFS3ERR_NOTSUPP, + NFS3ERR_TOOSMALL, + NFS3ERR_SERVERFAULT, + NFS3ERR_BADTYPE, + NFS3ERR_JUKEBOX, + + NF3REG = 1, + NF3DIR, + NF3BLK, + NF3CHR, + NF3LNK, + NF3SOCK, + NF3FIFO, + + DONT_CHANGE = 0, + SET_TO_SERVER_TIME, + SET_TO_CLIENT_TIME, + + NFSPROC3_NULL = 0, + NFSPROC3_GETATTR, + NFSPROC3_SETATTR, + NFSPROC3_LOOKUP, + NFSPROC3_ACCESS, + NFSPROC3_READLINK, + NFSPROC3_READ, + NFSPROC3_WRITE, + NFSPROC3_CREATE, + NFSPROC3_MKDIR, + NFSPROC3_SYMLINK, + NFSPROC3_MKNOD, + NFSPROC3_REMOVE, + NFSPROC3_RMDIR, + NFSPROC3_RENAME, + NFSPROC3_LINK, + NFSPROC3_READDIR, + NFSPROC3_READDIRPLUS, + NFSPROC3_FSSTAT, + NFSPROC3_FSINFO, + NFSPROC3_PATHCONF, + NFSPROC3_COMMIT, + + ACCESS3_READ = 0x0001, + ACCESS3_LOOKUP = 0x0002, + ACCESS3_MODIFY = 0x0004, + ACCESS3_EXTEND = 0x0008, + ACCESS3_DELETE = 0x0010, + ACCESS3_EXECUTE = 0x0020, + + UNSTABLE = 0, + DATA_SYNC, + FILE_SYNC, + + UNCHECKED = 0, + GUARDED, + EXCLUSIVE, + + FSF3_LINK = 0x0001, + FSF3_SYMLINK = 0x0002, + FSF3_HOMOGENEOUS= 0x0008, + FSF3_CANSETTIME = 0x0010, + + MNT_PROG = 100005, + MNT_MIN_VERS = 2, + MNT_MAX_VERS = 3, + MNT_PORT = 4003, + + MNTPATHLEN = 1024, + MNTNAMELEN = 255, + FHSIZE3 = NFS3_FHSIZE, + + MNT3_OK = 0, + MNT3ERR_PERM, + MNT3ERR_NOENT, + MNT3ERR_IO = 5, + MNT3ERR_ACCES = 13, + MNT3ERR_NOTDIR = 20, + MNT3ERR_INVAL = 22, + MNT3ERR_NAMETOOLONG = 63, + MNT3ERR_NOTSUPP = 10004, + MNT3ERR_SERVERFAULT = 10006, + + MOUNTPROC3_NULL = 0, + MOUNTPROC3_MNT, + MOUNTPROC3_DUMP, + MOUNTPROC3_UMNT, + MOUNTPROC3_UMNTALL, + MOUNTPROC3_EXPORT, + + NLM_PROG = 100021, + NLM_VERS = 4, + NLM_PORT = 4002, + + NLM4_GRANTED = 0, + NLM4_DENIED, + NLM4_DENIED_NLOCKS, + NLM4_BLOCKED, + NLM4_DENIED_GRACE_PERIOD, + NLM4_DEADLOCK, + NLM4_ROFS, + NLM4_STALE_FH, + NLM4_FBIG, + NLM4_FAILED, + + NLMPROC4_NULL = 0, + NLMPROC4_TEST, + NLMPROC4_LOCK, + NLMPROC4_CANCEL, + NLMPROC4_UNLOCK, + NLMPROC4_GRANTED, + NLMPROC4_TEST_MSG, + NLMPROC4_LOCK_MSG, + NLMPROC4_CANCEL_MSG, + NLMPROC4_UNLOCK_MSG, + NLMPROC4_GRANTED_MSG, + NLMPROC4_TEST_RES, + NLMPROC4_LOCK_RES, + NLMPROC4_CANCEL_RES, + NLMPROC4_UNLOCK_RES, + NLMPROC4_GRANTED_RES, + NLMPROC4_SHARE = 20, + NLMPROC4_UNSHARE, + NLMPROC4_NM_LOCK, + NLMPROC4_FREE_ALL, +}; + +typedef struct Rcb Rcb; + +struct Rcb { + int inuse; + int fd; + Ioproc *io; + ulong myprog; + ulong minver; + ulong maxver; + int (*dispatch)(char *, char *, ulong, char *, char *, ulong); + Rcb *next; +}; + +static Channel *upchan, *tpchan, *mchan, *nchan; +static Rcb *rcbhd; +static int nfstid, mounttid, tmaptid, umaptid; + +int debugnfs; + +static int +round4(int x) +{ + return (x + 3) & ~3; +} + +static char * +rpcputl(char *p, ulong l) +{ + hnputl(p, l); + return p + 4; +} + +static char * +rpcputv(char *p, uvlong v) +{ + hnputv(p, v); + return p + 8; +} + +static char * +getauth(char **pp) +{ + char *a; + int n; + + n = nhgetl(*pp + 4); + a = malloc(n + 8); + memmove(a, *pp, n + 8); + *pp += n + 8; + return a; +} + +static char * +putauth(char *p, char *verf) +{ + int n; + + n = nhgetl(verf + 4); + memmove(p, verf, n + 8); + return p + n + 8; +} + +static char * +initreply(char *buf, ulong xid, ulong stat, void *verf, int rstat) +{ + char *p; + + p = buf; + p = rpcputl(p, xid); + p = rpcputl(p, REPLY); + p = rpcputl(p, stat); + if(stat == MSG_ACCEPTED) + p = putauth(p, verf); + p = rpcputl(p, rstat); + return p; +} + +static void +tcprpcreader(void *a) +{ + Rcb *r; + char *buf, *p, *auth, *verf; + ulong xid, mtype, rpcvers, prog, vers, proc; + int n; + + r = a; + buf = malloc(34004); + while(1) { + n = ioreadn(r->io, r->fd, buf, 4); + if(shutdown || n < 4) { + free(buf); + ioclose(r->io, r->fd); + r->inuse = 0; + threadexits(nil); + } + n = nhgetl(buf) & 0x7fffffff; + if(n > 34000) { + fprint(2, "bogus read size: %d\n", n); + continue; + } + n = ioreadn(r->io, r->fd, buf+4, n); + if(n <= 0) { + if(debugnfs) + fprint(2, "leaving tcpreader for prog %uld\n", r->myprog); + free(buf); + ioclose(r->io, r->fd); + r->inuse = 0; + threadexits(nil); + } + /* if we don't at least have the xid and mtype, ignore */ + if(n < 8) + continue; + p = buf+4; + xid = nhgetl(p); + p += 4; + mtype = nhgetl(p); + p += 4; + /* we're only a server - ignore replies */ + if(mtype != CALL) + continue; + rpcvers = nhgetl(p); + p += 4; + prog = nhgetl(p); + p += 4; + vers = nhgetl(p); + p += 4; + proc = nhgetl(p); + p += 4; + if(debugnfs) + fprint(2, "got message in prog %uld len=%d xid=%uld(%ulx) mtype=%uld rpcvers=%uld prog=%uld vers=%uld proc=%uld\n", r->myprog, n, xid, xid, mtype, rpcvers, prog, vers, proc); + if(rpcvers != 2) { + p = initreply(buf+4, xid, MSG_DENIED, nil, RPC_MISMATCH); + p = rpcputl(p, 2); + p = rpcputl(p, 2); + hnputl(buf, (p-(buf+4)) | 0x80000000); + iowrite(r->io, r->fd, buf, p-buf); + continue; + } + auth = getauth(&p); + verf = getauth(&p); + if(prog != r->myprog) { + p = initreply(buf+4, xid, MSG_ACCEPTED, verf, PROG_UNAVAIL); + hnputl(buf, (p-(buf+4)) | 0x80000000); + iowrite(r->io, r->fd, buf, p-buf); + free(auth); + free(verf); + continue; + } + if(vers < r->minver || vers > r->maxver) { + p = initreply(buf+4, xid, MSG_ACCEPTED, verf, PROG_MISMATCH); + p = rpcputl(p, r->minver); + p = rpcputl(p, r->maxver); + hnputl(buf, (p-(buf+4)) | 0x80000000); + iowrite(r->io, r->fd, buf, p-buf); + free(auth); + free(verf); + continue; + } + n = r->dispatch(buf+4, p, xid, auth, verf, proc); + if(debugnfs) { + fprint(2, "writing %d bytes in response\n", n); + if(debugnfs > 1) { + int i; + for(i = 0; i < n+4; i += 4) fprint(2, " %ud", nhgetl(buf + i)); + fprint(2, "\n"); + } + } + hnputl(buf, n | 0x80000000); + iowrite(r->io, r->fd, buf, n+4); + free(auth); + free(verf); + } +} + +static void +udprpcreader(void *a) +{ + Rcb *r; + char *buf, *p, *auth, *verf; + ulong xid, mtype, rpcvers, prog, vers, proc; + int n; + + r = a; + buf = malloc(8500); + n = ioread(r->io, r->fd, buf, 8500); + if(shutdown || n <= 0) + goto done2; + /* if we don't at least have the xid and mtype, ignore */ + if(n < 8) + goto done2; + p = buf; + xid = nhgetl(p); + p += 4; + mtype = nhgetl(p); + p += 4; + if(debugnfs) + fprint(2, "got message in prog %uld len=%d xid=%uld(%ulx) mtype=%uld\n", r->myprog, n, xid, xid, mtype); + /* we're only a server - ignore replies */ + if(mtype != CALL) + goto done2; + rpcvers = nhgetl(p); + p += 4; + prog = nhgetl(p); + p += 4; + vers = nhgetl(p); + p += 4; + proc = nhgetl(p); + p += 4; + if(debugnfs) + fprint(2, "rpcvers=%uld prog=%uld vers=%uld proc=%uld\n", rpcvers, prog, vers, proc); + if(rpcvers != 2) { + p = initreply(buf, xid, MSG_DENIED, nil, RPC_MISMATCH); + p = rpcputl(p, 2); + p = rpcputl(p, 2); + iowrite(r->io, r->fd, buf, p-buf); + goto done2; + } + auth = getauth(&p); + verf = getauth(&p); + if(prog != r->myprog) { + p = initreply(buf, xid, MSG_ACCEPTED, verf, PROG_UNAVAIL); + iowrite(r->io, r->fd, buf, p-buf); + goto done1; + } + if(vers < r->minver || vers > r->maxver) { + p = initreply(buf, xid, MSG_ACCEPTED, verf, PROG_MISMATCH); + p = rpcputl(p, r->minver); + p = rpcputl(p, r->maxver); + iowrite(r->io, r->fd, buf, p-buf); + goto done1; + } + n = r->dispatch(buf, p, xid, auth, verf, proc); + if(debugnfs) { + fprint(2, "writing %d bytes in response\n", n); + if(debugnfs > 1) { + int i; + for(i = 0; i < n; i += 4) fprint(2, " %ud", nhgetl(buf + i)); + fprint(2, "\n"); + } + } + iowrite(r->io, r->fd, buf, n); +done1: + free(auth); + free(verf); +done2: + free(buf); + ioclose(r->io, r->fd); + r->inuse = 0; + threadexits(nil); +} + +static char * +rpcnull(char *buf, ulong xid, char *verf) +{ + char *rp; + + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, 0); + return rp; +} + +/* + * Fake Port Mapper + */ +static int +pmapdis(char *buf, char *p, ulong xid, char *auth, char *verf, ulong proc) +{ + char *rp; + ulong prog, vers, prot, nproc; + + switch(proc) { + case PMAPPROC_NULL: + rp = rpcnull(buf, xid, verf); + break; + case PMAPPROC_GETPORT: + prog = nhgetl(p); + p += 4; + vers = nhgetl(p); + p += 4; + prot = nhgetl(p); + if(debugnfs) + fprint(2, "In portmap getport prog=%uld vers=%uld prot=%uld\n", prog, vers, prot); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + hnputl(rp, 0); + switch(prog) { + case NFS_PROG: + if(vers == NFS_VERS && prot == IPPROTO_TCP) + hnputl(rp, NFS_PORT); + break; + case MNT_PROG: + if(vers >= MNT_MIN_VERS && vers <= MNT_MAX_VERS && prot == IPPROTO_TCP) + hnputl(rp, MNT_PORT); + break; + case NLM_PROG: + if(vers == NLM_VERS && prot == IPPROTO_TCP) + hnputl(rp, NLM_PORT); + break; + } + rp += 4; + break; + case PMAPPROC_DUMP: + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, NFS_PROG); + rp = rpcputl(rp, NFS_VERS); + rp = rpcputl(rp, IPPROTO_TCP); + rp = rpcputl(rp, NFS_PORT); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, MNT_PROG); + rp = rpcputl(rp, MNT_MAX_VERS); + rp = rpcputl(rp, IPPROTO_TCP); + rp = rpcputl(rp, MNT_PORT); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, NLM_PROG); + rp = rpcputl(rp, NLM_VERS); + rp = rpcputl(rp, IPPROTO_TCP); + rp = rpcputl(rp, NLM_PORT); + rp = rpcputl(rp, 0); + break; + case PMAPPROC_CALLIT: +SET(nproc); +USED(nproc); +USED(auth); +/* + prog = nhgetl(p); + p += 4; + vers = nhgetl(p); + p += 4; + nproc = nhgetl(p); + p += 4; + switch(prog) { + case NFS_PROG: + return nfsdis(buf, p, xid, auth, verf, nproc); + break; + case MNT_PROG: + return mntdis(buf, p, xid, auth, verf, nproc); + break; + case NLM_PROG: + return nlmdis(buf, p, xid, auth, verf, nproc); + break; + default: + rp = initreply(buf, xid, MSG_ACCEPTED, verf); + break; + } + break; +*/ + case PMAPPROC_SET: /* not used here for fake port mapper */ + case PMAPPROC_UNSET: + default: + rp = initreply(buf, xid, MSG_ACCEPTED, verf, PROG_UNAVAIL); + rp += 4; + break; + } + return rp - buf; +} + +static char * +domnt(char *buf, char *p, ulong xid, char *, char *verf) +{ + Qid qid; + char *rp, *path; + uvlong meta, x; + int n; + + n = nhgetl(p); + path = malloc(n + 1); + memmove(path, p + 4, n); + path[n] = 0; + if(debugnfs) + fprint(2, "Attempting to mount %s qpath=%ulld\n", path, p2q(-1, path, 0)); + meta = q2m(-1, p2q(-1, path, 0), 0); + free(path); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + if(meta == 0) { + rp = rpcputl(rp, MNT3ERR_NOENT); + return rp; + } + if(getmetaint(-1, meta, "qpath", &x) == MTnone) { + rp = rpcputl(rp, MNT3ERR_IO); + return rp; + } + qid.path = x; + getmetaint(-1, meta, "qvers", &x); + qid.vers = x; + getmetaint(-1, meta, "qtype", &x); + qid.type = x; + if(!(qid.type & QTDIR)) { + rp = rpcputl(rp, MNT3ERR_NOTDIR); + return rp; + } + if(debugnfs) + fprint(2, "meta=%ulld qid=(%ulld,%uld,%d)\n", meta, qid.path, qid.vers, qid.type); + rp = rpcputl(rp, MNT3_OK); + rp = rpcputl(rp, sizeof(Qid)); + memmove(rp, &qid, sizeof(Qid)); + rp += round4(sizeof(Qid)); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, AUTH_UNIX); + return rp; +} + +static int +mntdis(char *buf, char *p, ulong xid, char *auth, char *verf, ulong proc) +{ + char *rp; + + switch(proc) { + case MOUNTPROC3_NULL: + rp = rpcnull(buf, xid, verf); + break; + case MOUNTPROC3_MNT: + rp = domnt(buf, p, xid, auth, verf); + break; + case MOUNTPROC3_DUMP: + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + hnputl(rp, 0); + rp += 4; + break; + case MOUNTPROC3_UMNT: + rp = rpcnull(buf, xid, verf); + break; + case MOUNTPROC3_UMNTALL: + rp = rpcnull(buf, xid, verf); + break; + case MOUNTPROC3_EXPORT: + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, 1); + memmove(rp, "/\0\0\0", 4); + rp += 4; + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + break; + default: + rp = initreply(buf, xid, MSG_DENIED, verf, PROC_UNAVAIL); + break; + } + return rp - buf; +} + +static char * +fattr3(int fd, char *rp, Qid *qid) +{ + char *symlink; + uvlong meta, len, mtime, x; + + meta = q2m(fd, qid->path, 0); + if(meta == 0) +{ +fprint(2, "nil meta in fattr3: caller:%p qpath:%ulld fd:%d\n", getcallerpc(&fd), qid->path, fd); + return nil; +} + if(qid->type & QTDIR) + rp = rpcputl(rp, NF3DIR); + else { + if((symlink = getmetastr(fd, meta, "symlink")) != nil) { + rp = rpcputl(rp, NF3LNK); + free(symlink); + } + else if(getmetaint(fd, meta, "nodetype", &x) != MTnone) + rp = rpcputl(rp, x); + else + rp = rpcputl(rp, NF3REG); + } + if(getmetaint(fd, meta, "unixmode", &x) != MTnone) + rp = rpcputl(rp, x); + else if(getmetaint(fd, meta, "mode", &x) != MTnone) + rp = rpcputl(rp, x & 0777); + else + rp = rpcputl(rp, 0777); + rp = rpcputl(rp, 1); /* nlink */ + if(getmetaint(fd, meta, "nuid", &x) == MTnone) /* uid */ + rp = rpcputl(rp, -2); + else + rp = rpcputl(rp, x); + if(getmetaint(fd, meta, "ngid", &x) == MTnone) /* gid */ + rp = rpcputl(rp, -2); + else + rp = rpcputl(rp, x); + if(getmetaint(fd, meta, "length", &len) == MTnone) + len = 0; + rp = rpcputv(rp, len); /* size */ + if(getmetaint(fd, meta, "used", &x) == MTnone) + rp = rpcputv(rp, len); + else + rp = rpcputv(rp, x); + if(getmetaint(fd, meta, "majordev", &x) == MTnone) /* rdev */ + rp = rpcputl(rp, 0); + else + rp = rpcputl(rp, x); + if(getmetaint(fd, meta, "minordev", &x) == MTnone) + rp = rpcputl(rp, 0); + else + rp = rpcputl(rp, x); + rp = rpcputv(rp, 0); /* fsid */ + rp = rpcputv(rp, qid->path); /* fileid */ + if(getmetaint(fd, meta, "atime", &x) == MTnone) + rp = rpcputv(rp, 0); + else { + rp = rpcputl(rp, x / 1000000000LL); + rp = rpcputl(rp, x % 1000000000LL); + } + if(getmetaint(fd, meta, "mtime", &x) == MTnone) + mtime = 0; + else + mtime = x; + rp = rpcputl(rp, mtime / 1000000000LL); + rp = rpcputl(rp, mtime % 1000000000LL); + if(getmetaint(fd, meta, "ctime", &x) == MTnone) { + rp = rpcputl(rp, mtime / 1000000000LL); + rp = rpcputl(rp, mtime % 1000000000LL); + } + else { + rp = rpcputl(rp, x / 1000000000LL); + rp = rpcputl(rp, x % 1000000000LL); + } + return rp; +} + +static char * +opattr(int fd, char *rp, Qid *qid) +{ + char *trp; + + rp = rpcputl(rp, 1); + trp = fattr3(fd, rp, qid); + if(trp == nil) { +fprint(2, "nil in opattr from %p\n", getcallerpc(&fd)); + rp -= 4; + rp = rpcputl(rp, 0); + return rp; + } + return trp; +} + +static ulong +getperm(int fd, uvlong meta, char *auth) +{ + char *host, *uid, *gid, *s; + uvlong mode, x; + ulong perm; + int n, nuid, ngid; + + if(allow) + return 0007; + + getmetaint(fd, meta, "mode", &mode); + perm = mode & 0007; + host = nil; + switch(nhgetl(auth)) { + case AUTH_UNIX: + auth += 12; + n = nhgetl(auth); + host = emalloc9p(n + 1); + auth += 4; + memmove(host, auth, n); + auth += n; + nuid = nhgetl(auth); + auth += 4; + ngid = nhgetl(auth); + if(rootallow && nhgetl(auth) == 0) { + perm = 0007; + break; + } + if((uid = getmetastr(fd, meta, "uid")) != nil && (s = id2uname(host, nuid))) { + if(strcmp(s, uid) == 0) { + perm = (mode >> 6) & 0007; + free(uid); + break; + } + } + else if(getmetaint(fd, meta, "nuid", &x) != MTnone && x == nuid) { + perm = (mode >> 6) & 0007; + free(uid); + break; + } + if((gid = getmetastr(fd, meta, "gid")) != nil && (s = id2gname(host, ngid))) { + if(strcmp(s, uid) == 0) + perm = (mode >> 3) & 0007; + } + else if(getmetaint(fd, meta, "ngid", &x) != MTnone && x == ngid) + perm = (mode >> 3) & 0007; + free(uid); + free(gid); + break; + case AUTH_NULL: + case AUTH_SHORT: + case AUTH_DES: + default: + break; + } + free(host); + return perm; +} + +static int +prewcc(int fd, uvlong qpath, uvlong *len, uvlong *mtime, uvlong *ctime) +{ + uvlong meta, x; + + meta = q2m(fd, qpath, 0); + if(meta == 0) + return -1; + if(getmetaint(fd, meta, "length", &x) == MTnone) + x = 0; + *len = x; + getmetaint(fd, meta, "mtime", &x); + *mtime = x; + if(getmetaint(fd, meta, "ctime", &x) == MTnone) + *ctime = *mtime; + else + *ctime = x; + return 0; +} + +static char * +dowcc(int fd, char *rp, Qid *qid, uvlong prelen, uvlong premtime, uvlong prectime) +{ + rp = rpcputl(rp, 1); + rp = rpcputv(rp, prelen); + rp = rpcputl(rp, premtime / 1000000000LL); + rp = rpcputl(rp, premtime % 1000000000LL); + rp = rpcputl(rp, prectime / 1000000000LL); + rp = rpcputl(rp, prectime % 1000000000LL); + rp = opattr(fd, rp, qid); + return rp; +} + +static char * +mkpath(int fd, uvlong qpath, int len) +{ + char *str, *name, *p; + uvlong meta, parent; + int n; + + if(qpath == 1) { + str = malloc(len + 2); + strcpy(str, "/"); + return str; + } + meta = q2m(fd, qpath, 0); + if(meta == 0) + return nil; + name = getmetastr(fd, meta, "name"); + n = strlen(name); + if(getmetaint(fd, meta, "parent", &parent) == MTnone) { + str = malloc(len + n + 2); + strcpy(str, name); + free(name); + return str; + } + str = mkpath(fd, parent, len + n + 1); + p = str + strlen(str); + *p++ = '/'; + strcpy(p, name); + free(name); + return str; +} + +static char * +dosattr(uvlong meta, char *p) +{ + uvlong now, x; + ulong setit; + + now = nsec(); + setit = nhgetl(p); + p += 4; + if(setit) { + getmetaint(-1, meta, "mode", &x); + setmetaint(meta, "mode", nil, (nhgetl(p) & 0777) | (x & ~0777)); + setmetaint(meta, "unixmode", nil, nhgetl(p)); + p += 4; + } + setit = nhgetl(p); + p += 4; + if(setit) { + setmetaint(meta, "nuid", nil, nhgetl(p)); + p += 4; + } + setit = nhgetl(p); + p += 4; + if(setit) { + setmetaint(meta, "ngid", nil, nhgetl(p)); + p += 4; + } + setit = nhgetl(p); + p += 4; + if(setit) { + setmetaint(meta, "length", nil, nhgetv(p)); + p += 8; + setmetaint(meta, "mtime", nil, now); + } + setit = nhgetl(p); + p += 4; + if(setit == SET_TO_CLIENT_TIME) { + setmetaint(meta, "atime", nil, nhgetl(p) * 1000000000LL + nhgetl(p + 4)); + p += 8; + } + setit = nhgetl(p); + p += 4; + if(setit == SET_TO_CLIENT_TIME) { + setmetaint(meta, "mtime", nil, nhgetl(p) * 1000000000LL + nhgetl(p + 4)); + p += 8; + } + setmetaint(meta, "ctime", nil, now); + return p; +} + +static int +opensnap(uvlong qid) +{ + char *sname, *spath; + uvlong meta; + int fd; + + meta = q2m(-1, qid, 0); + if(meta == 0) + return -1; + sname = getmetastr(-1, meta, "snap"); + if(sname == nil) + return -1; + spath = smprint("%s/%s", ddir, sname); + free(sname); + fd = open(spath, OREAD); + free(spath); + return fd; +} + +static char * +nfsgetattr(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp, *a; + int fd; + + fd = -1; + if(nhgetl(p) != sizeof(Qid)) { + fd = opensnap(nhgetv(p + sizeof(Qid) + 4)); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + a = fattr3(fd, rp, &qid); + if(a == nil) + hnputl(rp-4, NFS3ERR_BADHANDLE); + else + rp = a; + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfssetattr(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp; + uvlong meta, prelen, premeta, prectime; + + if(nhgetl(p) != sizeof(Qid)) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + if(prewcc(-1, qid.path, &prelen, &premeta, &prectime) < 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + meta = q2m(-1, qid.path, 0); + dosattr(meta, p); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + rp = dowcc(-1, rp, &qid, prelen, premeta, prectime); + return rp; +} + +static char * +nfslookup(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid, qid2; + char *rp, *name, *path, *sname, *spath; + uvlong meta, qp, x, sqid; + ulong perms; + int n, m, fd,pfd; + + pfd = fd = -1; + sqid = 0; + if(nhgetl(p) != round4(sizeof(Qid))) { + sqid = nhgetv(p + round4(sizeof(Qid)) + 4); + pfd = fd = opensnap(sqid); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + n = nhgetl(p); + p += 4; + name = malloc(n + 1); + if(name == nil) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_SERVERFAULT); + return rp; + } + memmove(name, p, n); + name[n] = 0; + meta = q2m(fd, qid.path, 0); + if(debugnfs) + fprint(2, "in nfslookup: qid=(%ulld,%uld,%ud) name=%s\n", qid.path, qid.vers, qid.type, name); + perms = getperm(fd, meta, auth); + if((perms & DMEXEC) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + if(fd != -1) + close(fd); + return rp; + } + if(strcmp(name, ".") == 0) { + /* don't need to do anything */ + } + else if(strcmp(name, "..") == 0) { + getmetaint(fd, meta, "parent", &qp); + meta = q2m(fd, qp, 0); + } + else { + path = mkpath(fd, qid.path, n + 1); + m = strlen(path); + path[m] = '/'; + strcpy(path + m + 1, name); + x = p2q(fd, path, 0); + meta = q2m(fd, x, 0); + if(meta == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOENT); + rp = rpcputl(rp, 0); + return rp; + } + free(path); + sname = getmetastr(fd, meta, "snap"); + if(sname) { + spath = smprint("%s/%s", ddir, sname); + free(sname); + fd = open(spath, OREAD); + free(spath); + sqid = x; + meta = q2m(fd, p2q(fd, "/", 0), 0); + } + } + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + getmetaint(fd, meta, "qpath", &x); + qid2.path = x; + getmetaint(fd, meta, "qvers", &x); + qid2.vers = x; + getmetaint(fd, meta, "qtype", &x); + qid2.type = x; + if(fd == -1) + m = round4(sizeof(Qid)); + else + m = round4(sizeof(Qid)) + sizeof(uvlong); + rp = rpcputl(rp, m); + memmove(rp, &qid2, sizeof(Qid)); + rp += round4(sizeof(Qid)); + if(fd != -1) + rp = rpcputv(rp, sqid); + rp = opattr(fd, rp, &qid2); + rp = opattr(pfd, rp, &qid); + free(name); + if(fd != -1) + close(fd); + if(pfd != -1 && pfd != fd) + close(pfd); + return rp; +} + +static char * +nfsaccess(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid; + char *rp, *a; + uvlong meta; + ulong reqacc, rspacc, perms; + int fd; + + fd = -1; + if(nhgetl(p) != round4(sizeof(Qid))) { + fd = opensnap(nhgetv(p + round4(sizeof(Qid)) + 4)); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + reqacc = nhgetl(p); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + if(fd != -1) + close(fd); + return rp; + } + perms = getperm(fd, meta, auth); + rspacc = 0; + if(perms & DMREAD) + rspacc |= ACCESS3_READ; + if(perms & DMWRITE) + rspacc |= ACCESS3_MODIFY | ACCESS3_EXTEND; + if(perms & DMEXEC) + rspacc |= ACCESS3_LOOKUP | ACCESS3_EXECUTE; + rspacc &= reqacc; + rp = rpcputl(rp, NFS3_OK); + a = fattr3(fd, rp + 4, &qid); + if(a == nil) { + hnputl(rp-4, NFS3ERR_BADHANDLE); + rpcputl(rp, 0); + } + else { + rpcputl(rp, 1); + rp = a; + rp = rpcputl(rp, rspacc); + } + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfsreadlink(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp, *pp; + uvlong meta; + int n, fd; + + fd = -1; + if(nhgetl(p) != round4(sizeof(Qid))) { + fd = opensnap(nhgetv(p + round4(sizeof(Qid)) + 4)); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + if((pp = getmetastr(fd, meta, "symlink")) == nil) { + rp = rpcputl(rp, NFS3ERR_INVAL); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + n = strlen(pp); + rp = rpcputl(rp, NFS3_OK); + rp = opattr(fd, rp, &qid); + rp = rpcputl(rp, n); + memmove(rp, pp, n); + rp += round4(n); + free(pp); + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfsread(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid; + char *rp, *a; + uvlong offset, meta, len; + ulong perms; + long count1, count2; + int fd; + + fd = -1; + if(nhgetl(p) != round4(sizeof(Qid))) { + fd = opensnap(nhgetv(p + round4(sizeof(Qid)) + 4)); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + offset = nhgetv(p); + p += 8; + count1 = nhgetl(p); + if(count1 > 32768) + count1 = 32768; + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + a = rp + 104; + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + perms = getperm(fd, meta, auth); + if((perms & DMREAD) == 0) { + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + if(getmetaint(fd, meta, "length", &len) == MTnone) + len = 0; + count2 = θpread(fd, qid.path, a, count1, offset); + a = fattr3(fd, rp + 8, &qid); + if(a == nil) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + hnputl(rp + 4, 1); + if(count2 < 0) { + hnputl(rp, NFS3ERR_IO); + rp = a; + if(fd != -1) + close(fd); + return rp; + } + hnputl(rp, NFS3_OK); + rp = a; + rp = rpcputl(rp, count2); + if(offset + count2 >= len) + rp = rpcputl(rp, 1); + else + rp = rpcputl(rp, 0); + rp = rpcputl(rp, count2); + rp += round4(count2); + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfswrite(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid; + char *rp; + uvlong offset, meta, prelen, premtime, prectime; + ulong perms; + long count1, count2, stable; + + if(nhgetl(p) != round4(sizeof(Qid))) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + offset = nhgetv(p); + p += 8; + count1 = nhgetl(p); + p += 4; + stable = nhgetl(p); + p += 8; /* also skip the count at the beginning of the opaque data */ + meta = q2m(-1, qid.path, 0); + if(meta == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + return rp; + } + perms = getperm(-1, meta, auth); + if((perms & DMWRITE) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + if(prewcc(-1, qid.path, &prelen, &premtime, &prectime) < 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + count2 = θpwrite(qid.path, p, count1, offset, 1); + if(stable != UNSTABLE) { + resetmeta(); + csync(); + } + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + if(count2 < 0) { + rp = rpcputl(rp, NFS3ERR_IO); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + rp = rpcputl(rp, NFS3_OK); + rp = dowcc(-1, rp, &qid, prelen, premtime, prectime); + rp = rpcputl(rp, count2); + if(stable == UNSTABLE) + rp = rpcputl(rp, UNSTABLE); + else + rp = rpcputl(rp, FILE_SYNC); + rp = rpcputv(rp, starttime); + return rp; +} + +static char * +mkfile(char *buf, char *p, ulong xid, char *auth, char *verf, int ilk) +{ + Qid qid, nqid; + char *name, *path, *rp; + uvlong meta, pmeta, dirblk, now, x; + uvlong prelen, premeta, prectime; + ulong perms; + int n, m, how, nodetype; + + if(nhgetl(p) != round4(sizeof(Qid))) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + n = nhgetl(p); + p += 4; + name = malloc(n + 1); + if(name == nil) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_SERVERFAULT); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + memmove(name, p, n); + name[n] = 0; + p += round4(n); + if(ilk == NF3REG) { + how = nhgetl(p); + p += 4; + } + else + how = GUARDED; + if(debugnfs) + fprint(2, "in nfscreate: qid=(%ulld,%uld,%ud) name=%s\n", qid.path, qid.vers, qid.type, name); + if((qid.type & QTDIR) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOTDIR); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_EXIST); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } +if(how == EXCLUSIVE) { +rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); +rp = rpcputl(rp, NFS3ERR_NOTSUPP); +rp = rpcputl(rp, 0); +rp = rpcputl(rp, 0); +return rp; +} + pmeta = q2m(-1, qid.path, 0); + perms = getperm(-1, pmeta, auth); + if((perms & DMWRITE) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + path = mkpath(-1, qid.path, n + 1); + m = strlen(path); + path[m] = '/'; + strcpy(path + m + 1, name); + nqid.path = p2q(-1, path, 1); + switch(how) { + case UNCHECKED: + break; + case GUARDED: + meta = q2m(-1, nqid.path, 0); + if(meta != 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_EXIST); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + meta = q2m(-1, nqid.path, 1); + free(path); + nqid.vers = 0; + if(ilk == NF3DIR) + nqid.type = QTDIR; + else + nqid.type = QTFILE; + prewcc(-1, qid.path, &prelen, &premeta, &prectime); + setmetastr(meta, "name", nil, name, 0); + setmetaint(meta, "parent", nil, qid.path); + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + getmetaint(-1, pmeta, "mode", &x); + if(ilk == NF3DIR) + setmetaint(meta, "mode", nil, x & 0777 | DMDIR); + else + setmetaint(meta, "mode", nil, x & 0777); + now = nsec(); + setmetaint(pmeta, "mtime", nil, now); + getmetaint(-1, pmeta, "child", &x); + setmetaint(meta, "sib", nil, x); + setmetaint(pmeta, "child", nil, nqid.path); + nodetype = 0; + switch(ilk) { + case NF3DIR: + setmetaint(meta, "child", nil, 0); + break; + case NF3REG: + dirblk = allocblock(); + cbclean(dirblk); + cbwrite(dirblk); + brelease(dirblk); + setmetaint(meta, "index", nil, dirblk); + break; + case NF3CHR: + nodetype = nhgetl(p); + p += 4; + setmetaint(meta, "nodetype", nil, nodetype); + break; + } + setqhash(nqid.path, meta); + p = dosattr(meta, p); + if(ilk == NF3LNK) { + n = nhgetl(p); + p += 4; + path = malloc(n + 1); + memmove(path, p, n); + path[n] = 0; + setmetastr(meta, "symlink", nil, path, 0); + free(path); + } + else if(ilk == NF3CHR) { + if(nodetype == NF3CHR || nodetype == NF3BLK) { + setmetaint(meta, "majordev", nil, nhgetl(p)); + p += 4; + setmetaint(meta, "minordev", nil, nhgetl(p)); + } + } + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, round4(sizeof(Qid))); + memmove(rp, &nqid, sizeof(Qid)); + rp += round4(sizeof(Qid)); + rp = opattr(-1, rp, &nqid); + rp = dowcc(-1, rp, &qid, prelen, premeta, prectime); + savesuper(); + return rp; +} + +static char * +nfscreate(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + return mkfile(buf, p, xid, auth, verf, NF3REG); +} + +static char * +nfsmkdir(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + return mkfile(buf, p, xid, auth, verf, NF3DIR); +} + +static char * +nfssymlink(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + return mkfile(buf, p, xid, auth, verf, NF3LNK); +} + +static char * +nfsmknod(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + return mkfile(buf, p, xid, auth, verf, NF3CHR); /* sort out the exact node type in mkfile */ +} + +static char * +nfsremove(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid; + char *rp, *name, *path; + uvlong meta, qpath, pmeta, x; + uvlong prelen, premtime, prectime; + ulong perms; + int n; + + if(nhgetl(p) != round4(sizeof(Qid))) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + if((qid.type & QTDIR) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOTDIR); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + pmeta = q2m(-1, qid.path, 0); + if(pmeta == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + perms = getperm(-1, pmeta, auth); + if((perms & DMWRITE) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + if(prewcc(-1, qid.path, &prelen, &premtime, &prectime) < 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + n = nhgetl(p); + p += 4; + name = malloc(n + 1); + memmove(name, p, n); + name[n] = 0; + path = mkpath(-1, qid.path, n + 1); + n = strlen(path); + path[n] = '/'; + strcpy(path + n + 1, name); + qpath = p2q(-1, path, 0); + meta = q2m(-1, qpath, 0); + if(meta == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOENT); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + free(name); + free(path); + return rp; + } + if(getmetaint(-1, meta, "child", &x) != MTnone && x != 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOTEMPTY); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + free(name); + free(path); + return rp; + } + rmq(qpath, meta); + freedata(meta); + rmdlist(meta, qpath); + freeblock(meta); + rmp(path); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + rp = dowcc(-1, rp, &qid, prelen, premtime, prectime); + free(name); + free(path); + return rp; +} + +static char * +nfsfsstat(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp, *a; + + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + a = fattr3(-1, rp + 4, &qid); + if(a == nil) { + hnputl(rp - 4, NFS3ERR_BADHANDLE); + hnputl(rp, 0); + return rp; + } + hnputl(rp, 1); + rp = a; + rp = rpcputv(rp, super.nblk * BlkSize); /* tbytes */ + rp = rpcputv(rp, super.nfree * BlkSize); /* fbytes */ + rp = rpcputv(rp, super.nfree * BlkSize); /* abytes */ + rp = rpcputv(rp, super.nht); /* tfiles */ + rp = rpcputv(rp, super.nht); /* ffiles */ + rp = rpcputv(rp, super.nht); /* afiles */ + rp = rpcputl(rp, 0); /* invarsec */ + return rp; +} + +static char * +nfsfsinfo(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp, *a; + + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + hnputl(rp, NFS3_OK); + rp += 4; + a = fattr3(-1, rp + 4, &qid); + if(a == nil) { + hnputl(rp - 4, NFS3ERR_BADHANDLE); + hnputl(rp, 0); + return rp; + } + hnputl(rp, 1); + rp = a; + rp = rpcputl(rp, 32768); /* rtmax */ + rp = rpcputl(rp, 32768); /* rtpref */ + rp = rpcputl(rp, 1); /* rtmult */ + rp = rpcputl(rp, 32768); /* wtmax */ + rp = rpcputl(rp, 32768); /* wtpref */ + rp = rpcputl(rp, 1); /* wtmult */ + rp = rpcputl(rp, 8192); /* dtpref */ + rp = rpcputv(rp, 1LL << 55); /* maxfilesize */ + rp = rpcputl(rp, 0); /* time_delta */ + rp = rpcputl(rp, 1); + rp = rpcputl(rp, FSF3_SYMLINK | FSF3_HOMOGENEOUS | FSF3_CANSETTIME); /* properties */ + return rp; +} + +static char * +nfspathconf(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp, *a; + + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + hnputl(rp, NFS3_OK); + rp += 4; + a = fattr3(-1, rp + 4, &qid); + if(a == nil) { + hnputl(rp - 4, NFS3ERR_BADHANDLE); + return rp; + } + hnputl(rp, 1); + rp = a; + rp = rpcputl(rp, 1); + rp = rpcputl(rp, MNTNAMELEN); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 1); + return rp; +} + +static char * +nfsrename(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid fqid, tqid; + char *fname, *tname, *fpath, *tpath, *rp; + uvlong fdmeta, fmeta, tmeta, qpath, now, x; + uvlong fprelen, fpremtime, fprectime, tprelen, tpremtime, tprectime; + ulong perms; + int fn, tn, n; + + if(nhgetl(p) != round4(sizeof(Qid))) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + fqid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + fn = nhgetl(p); + p += 4; + fname = malloc(fn + 1); + memmove(fname, p, fn); + fname[fn] = 0; + p += round4(fn); + fpath = mkpath(-1, fqid.path, fn + 1); + n = strlen(fpath); + fpath[n] = '/'; + strcpy(fpath + n + 1, fname); + tqid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + tn = nhgetl(p); + p += 4; + tname = malloc(tn + 1); + memmove(tname, p, tn); + tname[tn] = 0; + tpath = mkpath(-1, tqid.path, tn + 1); + n = strlen(tpath); + tpath[n] = '/'; + strcpy(tpath + n + 1, tname); + prewcc(-1, fqid.path, &fprelen, &fpremtime, &fprectime); + prewcc(-1, tqid.path, &tprelen, &tpremtime, &tprectime); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + fdmeta = q2m(-1, fqid.path, 0); + if(fdmeta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + goto done; + } + perms = getperm(-1, fdmeta, auth); + if((perms & DMWRITE) == 0) { + rp = rpcputl(rp, NFS3ERR_ACCES); + goto done; + } + qpath = p2q(-1, fpath, 0); + if(qpath == 0) { + rp = rpcputl(rp, NFS3ERR_NOENT); + goto done; + } + if((tqid.type & QTDIR) == 0) { + rp = rpcputl(rp, NFS3ERR_NOTDIR); + goto done; + } + now = nsec(); + fmeta = q2m(-1, qpath, 0); + if(fqid.path != tqid.path) { + tmeta = q2m(-1, tqid.path, 0); + if(tmeta == 0) { + rp = rpcputl(rp, NFS3ERR_NOENT); + goto done; + } + perms = getperm(-1, tmeta, auth); + if((perms & DMWRITE) == 0) { + rp = rpcputl(rp, NFS3ERR_ACCES); + goto done; + } + rmdlist(fmeta, qpath); + setmetaint(fmeta, "parent", nil, tqid.path); + getmetaint(-1, tmeta, "child", &x); + setmetaint(fmeta, "sib", nil, x); + setmetaint(tmeta, "child", nil, qpath); + setmetaint(tmeta, "mtime", nil, now); + setmetaint(tmeta, "atime", nil, now); + } + setmetastr(fmeta, "name", nil, tname, 0); + rehashpath(qpath, fpath, tpath); + setmetaint(fmeta, "ctime", nil, now); + setmetaint(fdmeta, "mtime", nil, now); + setmetaint(fdmeta, "atime", nil, now); + rp = rpcputl(rp, NFS3_OK); +done: + rp = dowcc(-1, rp, &fqid, fprelen, fpremtime, fprectime); + rp = dowcc(-1, rp, &tqid, tprelen, tpremtime, tprectime); + free(fname); + free(fpath); + free(tname); + free(tpath); + return rp; +} + +static char * +nfsreaddir(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid; + char *rp, *a, *xs; + uvlong cookie, meta; + ulong perms; + long count1, count2; + int n, fd; + + fd = -1; + if(nhgetl(p) != round4(sizeof(Qid))) { + fd = opensnap(nhgetv(p + round4(sizeof(Qid)) + 4)); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + cookie = nhgetv(p); + p += 16; + count1 = nhgetl(p); + if(count1 > 8192) + count1 = 8192; + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + perms = getperm(fd, meta, auth); + if((perms & DMREAD) == 0) { + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + if(cookie == 0) { + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + return rp; + } + getmetaint(fd, meta, "child", &cookie); + } + a = rp + 92; + a = rpcputv(a, 0); + count2 = a - rp; + while(cookie != 0) { + meta = q2m(fd, cookie, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_IO); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + xs = getmetastr(fd, meta, "name"); + n = strlen(xs); + if(count2 + round4(n) + 24 + 8 > count1) { + free(xs); + break; + } + a = rpcputl(a, 1); + a = rpcputv(a, cookie); + a = rpcputl(a, n); + memmove(a, xs, n); + a += round4(n); + free(xs); + a = rpcputv(a, cookie); + getmetaint(fd, meta, "sib", &cookie); + count2 += round4(n) + 24; + } + a = fattr3(fd, rp + 8, &qid); + if(a == nil) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + hnputl(rp, NFS3_OK); + hnputl(rp + 4, 1); + rp += count2; + rp = rpcputl(rp, 0); + if(cookie == 0) + rp = rpcputl(rp, 1); + else + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfsreaddirplus(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid, qid2; + char *rp, *a, *xs; + uvlong cookie, meta, x, sqid; + ulong perms; + long count1, count2; + int n, m, fd; + + fd = -1; + sqid = 0; + if(nhgetl(p) != round4(sizeof(Qid))) { + sqid = nhgetv(p + round4(sizeof(Qid)) + 4); + fd = opensnap(sqid); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + cookie = nhgetv(p); + p += 16; + p += 4; /* use maxcount instead of dircount */ + count1 = nhgetl(p); + if(count1 > 8192) + count1 = 8192; + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + perms = getperm(fd, meta, auth); + if((perms & DMREAD) == 0) { + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + if(cookie == 0) { + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + getmetaint(fd, meta, "child", &cookie); + } + a = rp + 92; + a = rpcputv(a, 0); /* cookieverf */ + count2 = a - rp; + while(cookie != 0) { + meta = q2m(fd, cookie, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_IO); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + xs = getmetastr(fd, meta, "name"); + n = strlen(xs); + getmetaint(fd, meta, "qpath", &x); + qid2.path = x; + getmetaint(fd, meta, "qvers", &x); + qid2.vers = x; + getmetaint(fd, meta, "qtype", &x); + qid2.type = x; + if(fd == -1) + m = round4(sizeof(Qid)); + else + m = round4(sizeof(Qid)) + sizeof(uvlong); + if(count2 + 4 + 8 + 4 + round4(n) + 8 + 88 + 4 + 4 + m + 8 > count1) { + free(xs); + break; + } + a = rpcputl(a, 1); + a = rpcputv(a, cookie); /* fileid */ + a = rpcputl(a, n); /* name */ + memmove(a, xs, n); + a += round4(n); + free(xs); + a = rpcputv(a, cookie); /* cookie */ + a = opattr(fd, a, &qid2); /* name_attributes */ + a = rpcputl(a, 1); /* name_handle */ + a = rpcputl(a, m); + memmove(a, &qid2, sizeof(Qid)); + a += round4(sizeof(Qid)); + if(fd != -1) + rp = rpcputv(rp, sqid); + getmetaint(fd, meta, "sib", &cookie); + count2 += 4 + 8 + 4 + round4(n) + 8 + 88 + 4 + 4+ m; + } + a = fattr3(fd, rp + 8, &qid); + if(a == nil) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + hnputl(rp, NFS3_OK); + hnputl(rp + 4, 1); + rp += count2; + rp = rpcputl(rp, 0); /* no more entries */ + if(cookie == 0) /* eof? */ + rp = rpcputl(rp, 1); + else + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfscommit(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp; + uvlong prelen, premtime, prectime; + + if(*((long *)p) != round4(sizeof(Qid))) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + if(prewcc(-1, qid.path, &prelen, &premtime, &prectime) < 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + resetmeta(); + csync(); + rp = rpcputl(rp, NFS3_OK); + rp = dowcc(-1, rp, &qid, prelen, premtime, prectime); + rp = rpcputv(rp, starttime); + return rp; +} + +static char * +nfsunsupp(char *buf, ulong xid, char *verf) +{ + char *rp; + + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOTSUPP); + rp = rpcputl(rp, 0); + return rp; +} + +static int +nfsdis(char *buf, char *p, ulong xid, char *auth, char *verf, ulong proc) +{ + char *rp; + + switch(proc) { + case NFSPROC3_NULL: + rp = rpcnull(buf, xid, verf); + break; + case NFSPROC3_GETATTR: + rp = nfsgetattr(buf, p, xid, verf); + break; + case NFSPROC3_SETATTR: + rp = nfssetattr(buf, p, xid, verf); + break; + case NFSPROC3_LOOKUP: + rp = nfslookup(buf, p, xid, auth, verf); + break; + case NFSPROC3_ACCESS: + rp = nfsaccess(buf, p, xid, auth, verf); + break; + case NFSPROC3_READLINK: + rp = nfsreadlink(buf, p, xid, verf); + break; + case NFSPROC3_READ: + rp = nfsread(buf, p, xid, auth, verf); + break; + case NFSPROC3_WRITE: + rp = nfswrite(buf, p, xid, auth, verf); + break; + case NFSPROC3_CREATE: + rp = nfscreate(buf, p, xid, auth, verf); + break; + case NFSPROC3_MKDIR: + rp = nfsmkdir(buf, p, xid, auth, verf); + break; + case NFSPROC3_SYMLINK: + rp = nfssymlink(buf, p, xid, auth, verf); + break; + case NFSPROC3_MKNOD: + rp = nfsmknod(buf, p, xid, auth, verf); + break; + case NFSPROC3_REMOVE: + case NFSPROC3_RMDIR: + rp = nfsremove(buf, p, xid, auth, verf); + break; + case NFSPROC3_RENAME: + rp = nfsrename(buf, p, xid, auth, verf); + break; + case NFSPROC3_LINK: + rp = nfsunsupp(buf, xid, verf); /* $ */ + break; + case NFSPROC3_READDIR: + rp = nfsreaddir(buf, p, xid, auth, verf); + break; + case NFSPROC3_READDIRPLUS: + rp = nfsreaddirplus(buf, p, xid, auth, verf); + break; + case NFSPROC3_FSSTAT: + rp = nfsfsstat(buf, p, xid, verf); + break; + case NFSPROC3_FSINFO: + rp = nfsfsinfo(buf, p, xid, verf); + break; + case NFSPROC3_PATHCONF: + rp = nfspathconf(buf, p, xid, verf); + break; + case NFSPROC3_COMMIT: + rp = nfscommit(buf, p, xid, verf); + break; + default: + rp = initreply(buf, xid, MSG_DENIED, verf, PROC_UNAVAIL); + break; + } + return rp - buf; +} + +static void +tpstarter(void *) +{ + Rcb *r; + int fd; + + while(recv(tpchan, &fd)) { + for(r = rcbhd; r && r->inuse; r = r->next) ; + if(r == nil) { + r = emalloc9p(sizeof(Rcb)); + r->inuse = 1; + r->io = ioproc(); + r->next = rcbhd; + rcbhd = r; + } + r->inuse = 1; + r->fd = fd; + r->myprog = PMAP_PROG; + r->minver = PMAP_VERS; + r->maxver = PMAP_VERS; + r->dispatch = pmapdis; + threadcreate(tcprpcreader, r, 8192); + } + threadexits(nil); +} + +static void +tportmapper(void *) +{ + char *s; + int acfd, lcfd, fd; + char adir[40], ldir[40]; + + s = smprint("tcp!*!%d", PMAP_PORT); + acfd = announce(s, adir); + if(acfd < 0) + fprint(2, "error in announce: %r\n"); + if(debugnfs) + fprint(2, "announce in tcp port mapper got dir: %s:%r\n", adir); + free(s); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(lcfd < 0) + fprint(2, "error in listen: %r\n"); + if(shutdown) + threadexits(nil); + if(debugnfs) + fprint(2, "back from listen in tcp port mapper: ldir=%s\n", ldir); + if(lcfd < 0) { + close(acfd); + threadexits(nil); + } + fd = accept(lcfd, ldir); + close(lcfd); + send(tpchan, &fd); + } +} + +static void +upstarter(void *) +{ + Rcb *r; + int fd; + + while(recv(upchan, &fd)) { + if(shutdown) + break; + for(r = rcbhd; r && r->inuse; r = r->next) ; + if(r == nil) { + r = emalloc9p(sizeof(Rcb)); + r->inuse = 1; + r->io = ioproc(); + r->next = rcbhd; + rcbhd = r; + } + r->inuse = 1; + r->fd = fd; + r->myprog = PMAP_PROG; + r->minver = PMAP_VERS; + r->maxver = PMAP_VERS; + r->dispatch = pmapdis; + threadcreate(udprpcreader, r, 8192); + } + threadexits(nil); +} + +static void +uportmapper(void *) +{ + char *s; + int acfd, lcfd, fd; + char adir[40], ldir[40]; + + s = smprint("udp!*!%d", PMAP_PORT); + acfd = announce(s, adir); + if(acfd < 0) + fprint(2, "error in announce: %r\n"); + if(debugnfs) + fprint(2, "announce in udp port mapper got dir: %s:%r\n", adir); + free(s); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(lcfd < 0) + fprint(2, "error in listen: %r\n"); + if(shutdown) + threadexits(nil); + if(debugnfs) + fprint(2, "back from listen in udp port mapper: ldir=%s\n", ldir); + if(lcfd < 0) { + close(acfd); + threadexits(nil); + } + fd = accept(lcfd, ldir); + close(lcfd); + send(upchan, &fd); + } +} + +static void +mountstarter(void *) +{ + Rcb *r; + int fd; + + while(recv(mchan, &fd)) { + if(shutdown) + break; + for(r = rcbhd; r && r->inuse; r = r->next) ; + if(r == nil) { + r = emalloc9p(sizeof(Rcb)); + r->inuse = 1; + r->io = ioproc(); + r->next = rcbhd; + rcbhd = r; + } + r->inuse = 1; + r->fd = fd; + r->myprog = MNT_PROG; + r->minver = MNT_MIN_VERS; + r->maxver = MNT_MAX_VERS; + r->dispatch = mntdis; + threadcreate(tcprpcreader, r, 8192); + } + threadexits(nil); +} + +static void +mountd(void *) +{ + char *s; + int acfd, lcfd, fd; + char adir[40], ldir[40]; + + s = smprint("tcp!*!%d", MNT_PORT); + acfd = announce(s, adir); + free(s); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(shutdown) + threadexits(nil); + if(debugnfs) + fprint(2, "back from listen in mountd: ldir=%s\n", ldir); + if(lcfd < 0) { + close(acfd); + threadexits(nil); + } + fd = accept(lcfd, ldir); + close(lcfd); + send(mchan, &fd); + } +} + +static void +nfsdstarter(void *) +{ + Rcb *r; + int fd; + + while(recv(nchan, &fd)) { + if(shutdown) + break; + for(r = rcbhd; r && r->inuse; r = r->next) ; + if(r == nil) { + r = emalloc9p(sizeof(Rcb)); + r->inuse = 1; + r->io = ioproc(); + r->next = rcbhd; + rcbhd = r; + } + r->inuse = 1; + r->fd = fd; + r->myprog = NFS_PROG; + r->minver = NFS_VERS; + r->maxver = NFS_VERS; + r->dispatch = nfsdis; + threadcreate(tcprpcreader, r, 8192); + } + threadexits(nil); +} + +static void +nfsd(void *) +{ + char *s; + int acfd, lcfd, fd; + char adir[40], ldir[40]; + + s = smprint("tcp!*!%d", NFS_PORT); + acfd = announce(s, adir); + free(s); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(shutdown) + threadexits(nil); + if(debugnfs) + fprint(2, "back from listen in nfsd: ldir=%s\n", ldir); + if(lcfd < 0) { + close(acfd); + threadexits(nil); + } + fd = accept(lcfd, ldir); + close(lcfd); + send(nchan, &fd); + } +} + +static int +regport(void) +{ + char *buf, *p; + int fd; +int n, i; + + /* + * On Plan 9, don't even bother trying to see if we have + * a local portmap running. + */ + if(access("/net/ipselftab", AREAD) == 0) + return 0; + /* + * Take a crack at using a locks instance of portmap/ + * rpcbind. If we succeed, we don't need to bother + * starting out build-in one. If + */ + fd = dial("udp!127.1!111", nil, nil, nil); + if(fd < 0) + return 0; +fprint(2, "Got portmap connection open\n"); + buf = malloc(1500); + p = buf; + p = rpcputl(p, 42); /* xid */ + p = rpcputl(p, CALL); /* mtype */ + p = rpcputl(p, 2); /* rpcvers */ + p = rpcputl(p, PMAP_PROG); /* prog */ + p = rpcputl(p, PMAP_VERS); /* vers */ + p = rpcputl(p, PMAPPROC_SET); /* proc */ + p = rpcputl(p, 0); /* auth */ + p = rpcputl(p, 0); + p = rpcputl(p, 0); /* verf */ + p = rpcputl(p, 0); + p = rpcputl(p, NFS_PROG); /* prog */ + p = rpcputl(p, NFS_VERS); /* vers */ + p = rpcputl(p, IPPROTO_TCP); /* prot */ + p = rpcputl(p, NFS_PORT); /* port */ + write(fd, buf, p - buf); + n = read(fd, buf, 1500); +for(i = 0; i < n; ++i) fprint(2, "%02x ", buf[i]); +fprint(2, "\n"); + close(fd); + fd = dial("udp!127.1!111", nil, nil, nil); + if(fd < 0) { + free(buf); + return 0; + } + p = buf; + p = rpcputl(p, 42); /* xid */ + p = rpcputl(p, CALL); /* mtype */ + p = rpcputl(p, 2); /* rpcvers */ + p = rpcputl(p, PMAP_PROG); /* prog */ + p = rpcputl(p, PMAP_VERS); /* vers */ + p = rpcputl(p, PMAPPROC_SET); /* proc */ + p = rpcputl(p, 0); /* auth */ + p = rpcputl(p, 0); + p = rpcputl(p, 0); /* verf */ + p = rpcputl(p, 0); + p = rpcputl(p, MNT_PROG); /* prog */ + p = rpcputl(p, MNT_MAX_VERS); /* vers */ + p = rpcputl(p, IPPROTO_TCP); /* prot */ + p = rpcputl(p, MNT_PORT); /* port */ + write(fd, buf, p - buf); + n = read(fd, buf, 1500); +for(i = 0; i < n; ++i) fprint(2, "%02x ", buf[i]); +fprint(2, "\n"); + close(fd); + free(buf); + return 1; +} + +void +initnfs(void) +{ + if(!regport()) { + upchan = chancreate(sizeof(ulong), 1); + threadcreate(upstarter, nil, 1024); + umaptid = proccreate(uportmapper, nil, 8192); + tpchan = chancreate(sizeof(ulong), 1); + threadcreate(tpstarter, nil, 1024); + tmaptid = proccreate(tportmapper, nil, 8192); + } + mchan = chancreate(sizeof(ulong), 1); + threadcreate(mountstarter, nil, 1024); + mounttid = proccreate(mountd, nil, 8192); + nchan = chancreate(sizeof(ulong), 1); + threadcreate(nfsdstarter, nil, 1024); + nfstid = proccreate(nfsd, nil, 8192); +} + +void +haltnfs(void) +{ + Rcb *r; + + if(upchan == nil) + return; +/* + if(upchan) { + chanclose(upchan); + chanclose(tpchan); + } + chanclose(mchan); + chanclose(nchan); +*/ + for(r = rcbhd; r; r = r->next) { + if(r->io) { + iointerrupt(r->io); + closeioproc(r->io); + } + } +/* + if(upchan) { + threadkill(umaptid); + threadkill(tmaptid); + } + threadkill(mounttid); + threadkill(nfstid); +*/ +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:30 2014 @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +uvlong +devsize(char *dev) +{ + Dir *d; + uvlong len; + + d = dirstat(dev); + if(d == nil) + return ~0ULL; + len = d->length; + free(d); + return len; +} + +static void +consthread(void *) +{ + docons(nil); + threadexits(nil); +} + +void +conspost(int cfd[], int pfd[]) +{ + if(pipe(pfd) < 0) + sysfatal("pipe: %r"); + postfd("θfscons", pfd[0]); + postfd("θfsctl", cfd[0]); + threadcreate(consthread, nil, 8192); +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:29 2014 @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +uvlong +devsize(char *dev) +{ + struct stat stbuf; + struct extvtoc vtoc; + int fd, n; + + if(stat(dev, &stbuf) == 0) + return stbuf.st_size; + fd = open(dev, OREAD); + if(fd <= 0) + return ~0ULL; + n = read_extvtoc(fd, &vtoc); + close(fd); + if(n < 0) + return ~0ULL; + return vtoc.v_sectorsz * vtoc.v_part[n].p_size; +} + +int +threadpid(int) +{ + return getpid(); +} + +static void +startcons(void *x) +{ + int *pfd; + char *ns, *path; + int acfd, lcfd; + char adir[40], ldir[40]; + + pfd = x; + ns = getns(); + path = smprint("unix!%s/thetafscons", ns); + acfd = announce(path, adir); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(lcfd < 0) + break; + pfd[0] = lcfd; + pfd[1] = lcfd; + docons(nil); + } + threadexits(nil); +} + +void +conspost(int cfd[], int pfd[]) +{ + threadcreate(startcons, pfd, 8192); + if(post9pservice(cfd[0], "thetafsctl", nil) < 0) + fprint(2, "post9pservice failed:%r\n"); +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:29 2014 @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +enum { + Nht = 67108859, +}; + +static ulong primes[] = {1048573, 2097143, 4194301, 8388593, + 16777213, 33554393, 67108859, 134217689, 268435399}; + +static Lock slock; + +Super super; + +void +loadsuper(void) +{ + Super *sp; + + lock(&slock); + sp = cbread(0); + if(sp->magic != Magicθ) + sysfatal("Bad super magic"); + memmove(&super, sp, sizeof(Super)); + brelease(0); + if(super.nht == 0) { + super.nht = Nht; + super.nhashblk = (super.nht + NPerBlk - 1) / NPerBlk; + } + if(super.snaptime == 0) + super.snaptime = (3 * 60 + 15) * 60; + initfree(); + unlock(&slock); +} + +static char supbuf[1024]; + +char * +prsuper(void) +{ + char *p, *e; + + p = supbuf; + e = p + nelem(supbuf); + p = seprint(p, e, "Superblock:\n"); + p = seprint(p, e, "magic: %ulld(0x%ullx)\n", super.magic, super.magic); + p = seprint(p, e, "qgen: %ulld(0x%ullx)\n", super.qgen, super.qgen); + p = seprint(p, e, "nblk: %ulld(0x%ullx)\n", super.nblk, super.nblk); + p = seprint(p, e, "nfreemap: %ulld(0x%ullx)\n", super.nfreemap, super.nfreemap); + p = seprint(p, e, "freemap: %ulld(0x%ullx)\n", super.freemap, super.freemap); + p = seprint(p, e, "stat: %ulld(0x%ullx)\n", super.state, super.state); + p = seprint(p, e, "firstdat: %ulld(0x%ullx)\n", super.firstdat, super.firstdat); + p = seprint(p, e, "nfree: %ulld(0x%ullx)\n", super.nfree, super.nfree); + p = seprint(p, e, "firstlun: %ulld(0x%ullx)\n", super.firstlun, super.firstlun); + p = seprint(p, e, "nmeta: %ulld(0x%ullx)\n", super.nmeta, super.nmeta); + p = seprint(p, e, "firstmeta: %ulld(0x%ullx)\n", super.firstmeta, super.firstmeta); + p = seprint(p, e, "ffmeta: %ulld(0x%ullx)\n", super.ffmeta, super.ffmeta); + p = seprint(p, e, "nblob: %ulld(0x%ullx)\n", super.nblob, super.nblob); + p = seprint(p, e, "firstblob: %ulld(0x%ullx)\n", super.firstblob, super.firstblob); + p = seprint(p, e, "ffblob: %ulld(0x%ullx)\n", super.ffblob, super.ffblob); + p = seprint(p, e, "lfblob: %ulld(0x%ullx)\n", super.lfblob, super.lfblob); + p = seprint(p, e, "nht: %ulld(0x%ullx)\n", super.nht, super.nht); + seprint(p, e, "nhashblk: %ulld(0x%ullx)\n", super.nhashblk, super.nhashblk); + return supbuf; +} + +void +savesuper(void) +{ + char *p; + + lock(&slock); + p = cbread(0); + memset(p, 0, BlkSize); + memmove(p, &super, sizeof(Super)); + cbwrite(0); + brelease(0); + unlock(&slock); +} + +void +ream(char *dev) +{ + Qid rootqid; + char *me; + uchar *bigbuf; + uvlong meta, firstnon, lastnon, i; + vlong bperb; + vlong now; + int j, k, sfd; +int ndot = 0; + +fprint(2, "reaming %s\n", dev); + sfd = open(dev, ORDWR); + if(sfd < 0) + sysfatal("Couldn't open device for write: %r"); + /* + * Init superblock + */ + super.magic = Magicθ; + super.version = 1; + super.qgen = 1 | ((uvlong)TFile << 60); + i = devsize(dev); + if(i == ~0ULL) + sysfatal("couldn't get device size:%r\n"); + super.nblk = i / BlkSize; + for(i = 0; i < nelem(primes) - 1 && super.nblk > primes[i]; ++i) ; + super.nht = primes[i]; + super.nhashblk = (super.nht + NPerBlk - 1) / NPerBlk; + bperb = 8 * BlkSize; + super.nfreemap = (super.nblk + bperb - 1) / bperb; + super.freemap = 2 * super.nhashblk + 1; + super.nmeta = super.nblk / 200; + super.firstmeta = super.freemap + super.nfreemap; + super.ffmeta = 1; + super.nblob = super.nblk / 200; + super.firstblob = super.firstmeta + super.nmeta; + super.ffblob = super.firstblob * BlkSize; + super.lfblob = super.ffblob + (super.nblob - 1) * BlkSize + BlkSize/2; + super.state = FSClean; + super.firstdat = super.firstblob + super.nblob; + super.nfree = super.nblk - super.firstdat; +fprint(2, "writing superblock: freemap=%ulld nfreemap=%ulld firstdat=%ulld nmeta=%ulld firstmeta=%ulld\n", super.freemap, super.nfreemap, super.firstdat, super.nmeta, super.firstmeta); + savesuper(); + /* + * Clear hash tables + */ + bigbuf = malloc(1024*1024); + j = (1024 * 1024) / BlkSize; + memset(bigbuf, 0, 1024 * 1024); + for(i = 1; i < super.freemap; i += j) +{ +fprint(2, "."); +if(++ndot % 60 == 0) fprint(2, "\n"); + pwrite(sfd, bigbuf, 1024 * 1024, i * BlkSize); +} +fprint(2, "\n"); + /* + * Init free bit map + */ + firstnon = super.firstdat / (BlkSize * 8); + lastnon = super.nblk / (BlkSize * 8); + memset(bigbuf, 0, BlkSize); + for(i = 0; i < firstnon; ++i) + pwrite(sfd, bigbuf, BlkSize, (super.freemap + i) * BlkSize); + for(i = firstnon; i <= lastnon; ++i) { + memset(bigbuf, 0xff, BlkSize); + if(i == firstnon) { + j = super.firstdat % (BlkSize * 8); + k = j % 8; + memset(bigbuf, 0, j/8); + bigbuf[j/8] = ~((1 << k) - 1); + } + if(i == lastnon) { + j = super.nblk % (BlkSize * 8); + k = j % 8; + bigbuf[j/8] = (1 << k) - 1; + memset(bigbuf + j/8 + 1, 0, BlkSize - (j/8 + 1)); + } + pwrite(sfd, bigbuf, BlkSize, (super.freemap + i) * BlkSize); + } + memset(bigbuf, 0, BlkSize); + for(i = lastnon + 1; i < super.nfreemap; ++i) + pwrite(sfd, bigbuf, BlkSize, (super.freemap + i) * BlkSize); + free(bigbuf); + loadsuper(); + /* + * Initialize the metadata regions + */ + reammeta(sfd); + /* + * Create root directory + */ + rootqid.path = p2q(-1, "/", 1); + meta = q2m(-1, rootqid.path, 1); + setmetastr(meta, "name", nil, "/", 0); + rootqid.vers = 0; + rootqid.type = QTDIR; + setmetaint(meta, "qpath", nil, rootqid.path); + setmetaint(meta, "qvers", nil, rootqid.vers); + setmetaint(meta, "qtype", nil, rootqid.type); + setmetaint(meta, "mode", nil, DMDIR | 0775); + now = nsec(); + setmetaint(meta, "atime", nil, now); + setmetaint(meta, "mtime", nil, now); + setmetaint(meta, "length", nil, 0); + me = getuser(); + setmetastr(meta, "uid", nil, me, 0); + setmetastr(meta, "gid", nil, me, 0); + setmetastr(meta, "muid", nil, me, 0); + setmetaint(meta, "child", nil, 0); + setqhash(rootqid.path, meta); + savesuper(); +fprint(2, "Done with ream\n"); +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:28 2014 @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +#define MAXFIELDS 100 + +typedef struct Filebuf Filebuf; +typedef struct P9user P9user; +typedef struct Unixgroup Unixgroup; +typedef struct Unixsys Unixsys; +typedef struct Unixuser Unixuser; + +struct Filebuf +{ + char *buf; + char **lines; + int nlines; +}; + +struct P9user +{ + char *id; + char *name; + char *leader; + char **members; + int nmembers; +}; + +struct Unixgroup +{ + char *name; + int id; + char **members; + int nmembers; +}; + +struct Unixsys +{ + char *name; + int nusers, ngroups; + Unixuser *users; + Unixgroup *groups; + Unixsys *next; +}; + +struct Unixuser +{ + char *name; + int id; +}; + +static P9user *p9users; +static int np9users; +static Unixsys *unixhd; + +/* +static void +dumpusers(void) +{ + P9user *u; + int i, j, fd; + + fd = create("/tmp/users.dump", OWRITE, 0666); + for(u = p9users, i = 0; i < np9users; ++u, ++i) { + fprint(fd, "id:%s name:%s leader:%s nmembers:%d\n", + u->id, u->name, u->leader, u->nmembers); + for(j = 0; j < u->nmembers; ++j) + fprint(fd, "%s ", u->members[j]); + fprint(fd, "\n"); + } + close(fd); +} +*/ + +static Filebuf * +loadfile(char *name) +{ + Filebuf *fb; + char *p; + uvlong len; + uvlong qpath, meta; + long n; + + qpath = p2q(-1, name, 0); + if(qpath == 0) + return nil; + meta = q2m(-1, qpath, 0); + if(meta == 0) + return nil; + if(getmetaint(-1, meta, "length", &len) == MTnone) + return nil; + fb = θmalloc(sizeof(Filebuf)); + fb->buf = θmalloc(len); + n = θpread(-1, qpath, fb->buf, len, 0); + if(n < len) { + free(fb->buf); + free(fb); + return nil; + } + for(p = fb->buf; p < fb->buf + n; ++p) + if(*p == '\n') + ++fb->nlines; + fb->lines = θmalloc(fb->nlines * sizeof(char *)); + gettokens(fb->buf, fb->lines, fb->nlines, "\n"); + return fb; +} + +static void +freefile(Filebuf *fb) +{ + free(fb->buf); + free(fb->lines); + free(fb); +} + +static void +loadusers(void) +{ + Filebuf *fb; + char *flds[MAXFIELDS]; + int i, j, n; + + np9users = 0; + fb = loadfile("//adm/users"); + if(fb == nil) { +fprint(2, "unexpected no /adm/users\n"); + return; +} + p9users = θmalloc(fb->nlines * sizeof(P9user)); + for(i = 0; i < fb->nlines; ++i) { + if(fb->lines[i][0] == '#') + continue; + n = getfields(fb->lines[i], flds, MAXFIELDS, 0, ":,"); + if(n < 3) + continue; + if(flds[3] == nil || flds[3][0] == 0) + p9users[np9users].nmembers = 0; + else + p9users[np9users].nmembers = n - 3; + p9users[np9users].id = estrdup9p(flds[0]); + p9users[np9users].name = estrdup9p(flds[1]); + if(flds[2] && flds[2][0] != '\0') + p9users[np9users].leader = estrdup9p(flds[2]); + p9users[np9users].members = θmalloc((n - 2) * sizeof(char *)); + for(j = 3; j < n; ++j) + p9users[np9users].members[j-3] = estrdup9p(flds[j]); + ++np9users; + } + freefile(fb); +} + +static Unixsys * +buildsys(char *toks[3]) +{ + Filebuf *ufb, *gfb; + Unixsys *us; + char *flds[MAXFIELDS]; + int i, j, n; + + ufb = loadfile(toks[1]); + if(ufb == nil) + return nil; + gfb = loadfile(toks[2]); + if(gfb == nil) { + freefile(ufb); + return nil; + } + us = θmalloc(sizeof(Unixsys)); + us->name = estrdup9p(toks[0]); + us->nusers = ufb->nlines; + us->users = θmalloc(us->nusers *sizeof(Unixuser)); + us->ngroups = gfb->nlines; + us->groups = θmalloc(us->ngroups *sizeof(Unixgroup)); + for(i = 0; i < us->nusers; ++i) { + n = getfields(ufb->lines[i], flds, MAXFIELDS, 0, ":"); + if(n < 3) + continue; + us->users[i].name = estrdup9p(flds[0]); + us->users[i].id = atoi(estrdup9p(flds[2])); + } + for(i = 0; i < us->ngroups; ++i) { + n = getfields(gfb->lines[i], flds, MAXFIELDS, 0, ":,"); + if(n < 3) + continue; + us->groups[i].name = estrdup9p(flds[0]); + us->groups[i].id = atoi(estrdup9p(flds[1])); + us->groups[i].nmembers = n - 3; + us->groups[i].members = θmalloc(us->groups[i].nmembers * sizeof(char *)); + for(j = 0; j < us->groups[i].nmembers; ++j) + us->groups[i].members[j] = estrdup9p(flds[j+3]); + } + freefile(ufb); + freefile(gfb); + return us; +} + +void +inituid(void) +{ + Filebuf *fb; + Unixsys *us; + char *toks[3]; + int i; + + loadusers(); + +// dumpusers(); + + fb = loadfile("//adm/nfs"); + if(fb != nil) { + for(i = 0; i < fb->nlines; ++i) { + tokenize(fb->lines[i], toks, 3); + us = buildsys(toks); + if(us) { + us->next = unixhd; + unixhd = us; + } + } + freefile(fb); + } +} + +int +ingroup(char *user, char *group) +{ + int i, j; + + if(strcmp(user, group) == 0) + return 1; + for(i = 0; i < np9users && strcmp(group, p9users[i].name); ++i) ; + if(i >= np9users) + return 0; + for(j = 0; j < p9users[i].nmembers && strcmp(user, p9users[i].members[j]); ++j) ; + if(j >= p9users[i].nmembers) + return 0; + return 1; +} + +int +isleader(char *user, char *group) +{ + int i; + + for(i = 0; i < np9users && strcmp(group, p9users[i].name); ++i) ; + if(i >= np9users) + return 0; + if(p9users[i].leader == nil) + return 0; + if(strcmp(user, p9users[i].leader) == 0) + return 1; + return 0; +} + +char * +id2uname(char *sys, int id) +{ + Unixsys *s; + int i; + + for(s = unixhd; s && strcmp(s->name, sys) != 0; s = s->next) ; + if(s == nil) + return nil; + for(i = 0; i < s->nusers && s->users[i].id != id; ++i) ; + if(i >= s->nusers) + return nil; + return s->users[i].name; +} + +char * +id2gname(char *sys, int id) +{ + Unixsys *s; + int i; + + for(s = unixhd; s && strcmp(s->name, sys) != 0; s = s->next) ; + if(s == nil) + return nil; + for(i = 0; i < s->ngroups && s->groups[i].id != id; ++i) ; + if(i >= s->ngroups) + return nil; + return s->groups[i].name; +} --- /sys/src/cmd/θfs Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs Thu Feb 20 02:17:27 2014 @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +/* + * Version of pread that's careful to always work on sector boundaries + */ +long +spread(int fd, void *a, long n, uvlong off) +{ + char *buf; + uvlong aoff, boff; + long an, rn; + + boff = off % 512; + if(n % 512 == 0 && boff == 0) + return pread(fd, a, n, off); + aoff = off & ~511; + an = (n + boff + 511) & ~511; + buf = θmalloc(an); + rn = pread(fd, buf, an, aoff); + if(rn <= 0) { + free(buf); + return rn; + } + rn -= boff; + if(rn > n) + rn = n; + memmove(a, buf + boff, rn); + free(buf); + return rn; +} + +long +θpread(int fd, uvlong qpath, void *a, long n, uvlong off) +{ + uvlong fblk, meta, now, len; + ulong m, tot, boff; + + meta = q2m(fd, qpath, 0); + if(meta == 0) + return -1; + if(getmetaint(fd, meta, "length", &len) == MTnone) + len = 0; + if(off >= len) + n = 0; + else if(off + n > len) + n = len - off; + tot = 0; + while(n > 0) { + fblk = locate(fd, meta, off / BlkSize, 0); + boff = off % BlkSize; + if(boff + n > BlkSize) + m = BlkSize - boff; + else + m = n; + if(fblk != 0) { + if(fd == -1) + m = cread((char *)a + tot, m, fblk * BlkSize + boff); + else + m = spread(fd, (char *)a + tot, m, fblk * BlkSize + boff); + } + else + memset((char *)a + tot, 0, m); + n -= m; + off += m; + tot += m; + } + if(fd == -1 && doatimes) { + now = nsec(); + setmetaint(meta, "atime", nil, now); + } + return tot; +} + +long +θpwrite(uvlong qpath, void *a, long n, uvlong off, int grow) +{ + uvlong fblk, meta, woff, now, len, qvers; + ulong m, tot, boff; + + meta = q2m(-1, qpath, 0); + if(meta == 0) + return -1; + if(getmetaint(-1, meta, "length", &len) == MTnone) + len = 0; + if(grow == 0) { + if(off >= len) + n = 0; + else if(off + n > len) + n = len - off; + } + else if (grow == 2) + off = len; + woff = off; + tot = 0; + while(n > 0) { + fblk = locate(-1, meta, woff / BlkSize, 1); + if(fblk == 0) + break; + boff = woff % BlkSize; + if(boff + n > BlkSize) + m = BlkSize - boff; + else + m = n; + m = cwrite((char *)a + tot, m, fblk * BlkSize + boff); + woff += m; + n -= m; + tot += m; + } + if(grow) { + if(off + tot > len) + setmetaint(meta, "length", nil, off + tot); + } + now = nsec(); + setmetaint(meta, "mtime", nil, now); + setmetaint(meta, "atime", nil, now); + if(getmetaint(-1, meta, "qvers", &qvers) != MTnone) + qvers++; + setmetaint(meta, "qvers", nil, qvers); + return tot; +} + +void +rmdlist(uvlong meta, uvlong myqid) +{ + uvlong sibqid, pqid, predqid; + uvlong pmeta, qvers; + + getmetaint(-1, meta, "sib", &sibqid); + getmetaint(-1, meta, "parent", &pqid); + pmeta = q2m(-1, pqid, 0); + if(pmeta == 0) { + fprint(2, "warning: no parent?!?!\n"); + return; + } + if(getmetaint(-1, pmeta, "qvers", &qvers) != MTnone) + setmetaint(pmeta, "qvers", nil, qvers + 1); + getmetaint(-1, pmeta, "child", &predqid); + if(predqid == myqid) { + setmetaint(pmeta, "child", nil, sibqid); + return; + } + do { + pmeta = q2m(-1, predqid, 0); + if(pmeta == 0) + return; + getmetaint(-1, pmeta, "sib", &predqid); + } while(predqid != myqid); + setmetaint(pmeta, "sib", nil, sibqid); +} + +void * +θmalloc(ulong x) +{ + if(x > 6553600) { + fprint(2, "$%p", getcallerpc(&x)); + return nil; + } + else + return emalloc9p(x); +} + --- /sys/src/cmd/θfs/nfs.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/nfs.c Thu Feb 20 02:17:37 2014 @@ -0,0 +1,2536 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include +#include "dat.h" + +/* RPC -- RFC 1057 */ +enum { + AUTH_NULL = 0, + AUTH_UNIX, + AUTH_SHORT, + AUTH_DES, + + CALL = 0, + REPLY, + + MSG_ACCEPTED = 0, + MSG_DENIED, + + SUCCESS = 0, + PROG_UNAVAIL, + PROG_MISMATCH, + PROC_UNAVAIL, + GARBAGE_ARGS, + + RPC_MISMATCH = 0, + AUTH_ERROR, + + AUTH_BADCRED = 1, + AUTH_REJECTEDCRED, + AUTH_BADVERF, + AUTH_REJECTEDVERF, + AUTH_TOOWEAK, + + PMAP_PROG = 100000, + PMAP_VERS = 2, + PMAP_PORT = 111, + IPPROTO_TCP = 6, + IPPROTO_UDP = 17, + + PMAPPROC_NULL = 0, + PMAPPROC_SET, + PMAPPROC_UNSET, + PMAPPROC_GETPORT, + PMAPPROC_DUMP, + PMAPPROC_CALLIT, +}; + +/* NFSv3 -- RFC 1813 */ +enum { + NFS_PROG = 100003, + NFS_VERS = 3, + NFS_PORT = 2049, + + NFS3_FHSIZE = 64, + NFS3_COOKIEVERFSIZE = 8, + NFS3_CREATEVERFSIZE = 8, + NFS3_WRITEVERFSIZE = 8, + + NFS3_OK = 0, + NFS3ERR_PERM, + NFS3ERR_NOENT, + NFS3ERR_IO = 5, + NFS3ERR_NXIO, + NFS3ERR_ACCES = 13, + NFS3ERR_EXIST = 17, + NFS3ERR_XDEV, + NFS3ERR_NODEV, + NFS3ERR_NOTDIR, + NFS3ERR_ISDIR, + NFS3ERR_INVAL, + NFS3ERR_FBIG = 27, + NFS3ERR_NOSPC, + NFS3ERR_ROFS = 30, + NFS3ERR_MLINK, + NFS3ERR_NAMETOOLONG = 63, + NFS3ERR_NOTEMPTY = 66, + NFS3ERR_DQUOT = 69, + NFS3ERR_STALE, + NFS3ERR_REMOTE, + NFS3ERR_BADHANDLE = 10001, + NFS3ERR_NOT_SYNC, + NFS3ERR_BAD_COOKIE, + NFS3ERR_NOTSUPP, + NFS3ERR_TOOSMALL, + NFS3ERR_SERVERFAULT, + NFS3ERR_BADTYPE, + NFS3ERR_JUKEBOX, + + NF3REG = 1, + NF3DIR, + NF3BLK, + NF3CHR, + NF3LNK, + NF3SOCK, + NF3FIFO, + + DONT_CHANGE = 0, + SET_TO_SERVER_TIME, + SET_TO_CLIENT_TIME, + + NFSPROC3_NULL = 0, + NFSPROC3_GETATTR, + NFSPROC3_SETATTR, + NFSPROC3_LOOKUP, + NFSPROC3_ACCESS, + NFSPROC3_READLINK, + NFSPROC3_READ, + NFSPROC3_WRITE, + NFSPROC3_CREATE, + NFSPROC3_MKDIR, + NFSPROC3_SYMLINK, + NFSPROC3_MKNOD, + NFSPROC3_REMOVE, + NFSPROC3_RMDIR, + NFSPROC3_RENAME, + NFSPROC3_LINK, + NFSPROC3_READDIR, + NFSPROC3_READDIRPLUS, + NFSPROC3_FSSTAT, + NFSPROC3_FSINFO, + NFSPROC3_PATHCONF, + NFSPROC3_COMMIT, + + ACCESS3_READ = 0x0001, + ACCESS3_LOOKUP = 0x0002, + ACCESS3_MODIFY = 0x0004, + ACCESS3_EXTEND = 0x0008, + ACCESS3_DELETE = 0x0010, + ACCESS3_EXECUTE = 0x0020, + + UNSTABLE = 0, + DATA_SYNC, + FILE_SYNC, + + UNCHECKED = 0, + GUARDED, + EXCLUSIVE, + + FSF3_LINK = 0x0001, + FSF3_SYMLINK = 0x0002, + FSF3_HOMOGENEOUS= 0x0008, + FSF3_CANSETTIME = 0x0010, + + MNT_PROG = 100005, + MNT_MIN_VERS = 2, + MNT_MAX_VERS = 3, + MNT_PORT = 4003, + + MNTPATHLEN = 1024, + MNTNAMELEN = 255, + FHSIZE3 = NFS3_FHSIZE, + + MNT3_OK = 0, + MNT3ERR_PERM, + MNT3ERR_NOENT, + MNT3ERR_IO = 5, + MNT3ERR_ACCES = 13, + MNT3ERR_NOTDIR = 20, + MNT3ERR_INVAL = 22, + MNT3ERR_NAMETOOLONG = 63, + MNT3ERR_NOTSUPP = 10004, + MNT3ERR_SERVERFAULT = 10006, + + MOUNTPROC3_NULL = 0, + MOUNTPROC3_MNT, + MOUNTPROC3_DUMP, + MOUNTPROC3_UMNT, + MOUNTPROC3_UMNTALL, + MOUNTPROC3_EXPORT, + + NLM_PROG = 100021, + NLM_VERS = 4, + NLM_PORT = 4002, + + NLM4_GRANTED = 0, + NLM4_DENIED, + NLM4_DENIED_NLOCKS, + NLM4_BLOCKED, + NLM4_DENIED_GRACE_PERIOD, + NLM4_DEADLOCK, + NLM4_ROFS, + NLM4_STALE_FH, + NLM4_FBIG, + NLM4_FAILED, + + NLMPROC4_NULL = 0, + NLMPROC4_TEST, + NLMPROC4_LOCK, + NLMPROC4_CANCEL, + NLMPROC4_UNLOCK, + NLMPROC4_GRANTED, + NLMPROC4_TEST_MSG, + NLMPROC4_LOCK_MSG, + NLMPROC4_CANCEL_MSG, + NLMPROC4_UNLOCK_MSG, + NLMPROC4_GRANTED_MSG, + NLMPROC4_TEST_RES, + NLMPROC4_LOCK_RES, + NLMPROC4_CANCEL_RES, + NLMPROC4_UNLOCK_RES, + NLMPROC4_GRANTED_RES, + NLMPROC4_SHARE = 20, + NLMPROC4_UNSHARE, + NLMPROC4_NM_LOCK, + NLMPROC4_FREE_ALL, +}; + +typedef struct Rcb Rcb; + +struct Rcb { + int inuse; + int fd; + Ioproc *io; + ulong myprog; + ulong minver; + ulong maxver; + int (*dispatch)(char *, char *, ulong, char *, char *, ulong); + Rcb *next; +}; + +static Channel *upchan, *tpchan, *mchan, *nchan; +static Rcb *rcbhd; +static int nfstid, mounttid, tmaptid, umaptid; + +int debugnfs; + +static int +round4(int x) +{ + return (x + 3) & ~3; +} + +static char * +rpcputl(char *p, ulong l) +{ + hnputl(p, l); + return p + 4; +} + +static char * +rpcputv(char *p, uvlong v) +{ + hnputv(p, v); + return p + 8; +} + +static char * +getauth(char **pp) +{ + char *a; + int n; + + n = nhgetl(*pp + 4); + a = malloc(n + 8); + memmove(a, *pp, n + 8); + *pp += n + 8; + return a; +} + +static char * +putauth(char *p, char *verf) +{ + int n; + + n = nhgetl(verf + 4); + memmove(p, verf, n + 8); + return p + n + 8; +} + +static char * +initreply(char *buf, ulong xid, ulong stat, void *verf, int rstat) +{ + char *p; + + p = buf; + p = rpcputl(p, xid); + p = rpcputl(p, REPLY); + p = rpcputl(p, stat); + if(stat == MSG_ACCEPTED) + p = putauth(p, verf); + p = rpcputl(p, rstat); + return p; +} + +static void +tcprpcreader(void *a) +{ + Rcb *r; + char *buf, *p, *auth, *verf; + ulong xid, mtype, rpcvers, prog, vers, proc; + int n; + + r = a; + buf = malloc(34004); + while(1) { + n = ioreadn(r->io, r->fd, buf, 4); + if(shutdown || n < 4) { + free(buf); + ioclose(r->io, r->fd); + r->inuse = 0; + threadexits(nil); + } + n = nhgetl(buf) & 0x7fffffff; + if(n > 34000) { + fprint(2, "bogus read size: %d\n", n); + continue; + } + n = ioreadn(r->io, r->fd, buf+4, n); + if(n <= 0) { + if(debugnfs) + fprint(2, "leaving tcpreader for prog %uld\n", r->myprog); + free(buf); + ioclose(r->io, r->fd); + r->inuse = 0; + threadexits(nil); + } + /* if we don't at least have the xid and mtype, ignore */ + if(n < 8) + continue; + p = buf+4; + xid = nhgetl(p); + p += 4; + mtype = nhgetl(p); + p += 4; + /* we're only a server - ignore replies */ + if(mtype != CALL) + continue; + rpcvers = nhgetl(p); + p += 4; + prog = nhgetl(p); + p += 4; + vers = nhgetl(p); + p += 4; + proc = nhgetl(p); + p += 4; + if(debugnfs) + fprint(2, "got message in prog %uld len=%d xid=%uld(%ulx) mtype=%uld rpcvers=%uld prog=%uld vers=%uld proc=%uld\n", r->myprog, n, xid, xid, mtype, rpcvers, prog, vers, proc); + if(rpcvers != 2) { + p = initreply(buf+4, xid, MSG_DENIED, nil, RPC_MISMATCH); + p = rpcputl(p, 2); + p = rpcputl(p, 2); + hnputl(buf, (p-(buf+4)) | 0x80000000); + iowrite(r->io, r->fd, buf, p-buf); + continue; + } + auth = getauth(&p); + verf = getauth(&p); + if(prog != r->myprog) { + p = initreply(buf+4, xid, MSG_ACCEPTED, verf, PROG_UNAVAIL); + hnputl(buf, (p-(buf+4)) | 0x80000000); + iowrite(r->io, r->fd, buf, p-buf); + free(auth); + free(verf); + continue; + } + if(vers < r->minver || vers > r->maxver) { + p = initreply(buf+4, xid, MSG_ACCEPTED, verf, PROG_MISMATCH); + p = rpcputl(p, r->minver); + p = rpcputl(p, r->maxver); + hnputl(buf, (p-(buf+4)) | 0x80000000); + iowrite(r->io, r->fd, buf, p-buf); + free(auth); + free(verf); + continue; + } + n = r->dispatch(buf+4, p, xid, auth, verf, proc); + if(debugnfs) { + fprint(2, "writing %d bytes in response\n", n); + if(debugnfs > 1) { + int i; + for(i = 0; i < n+4; i += 4) fprint(2, " %ud", nhgetl(buf + i)); + fprint(2, "\n"); + } + } + hnputl(buf, n | 0x80000000); + iowrite(r->io, r->fd, buf, n+4); + free(auth); + free(verf); + } +} + +static void +udprpcreader(void *a) +{ + Rcb *r; + char *buf, *p, *auth, *verf; + ulong xid, mtype, rpcvers, prog, vers, proc; + int n; + + r = a; + buf = malloc(8500); + n = ioread(r->io, r->fd, buf, 8500); + if(shutdown || n <= 0) + goto done2; + /* if we don't at least have the xid and mtype, ignore */ + if(n < 8) + goto done2; + p = buf; + xid = nhgetl(p); + p += 4; + mtype = nhgetl(p); + p += 4; + if(debugnfs) + fprint(2, "got message in prog %uld len=%d xid=%uld(%ulx) mtype=%uld\n", r->myprog, n, xid, xid, mtype); + /* we're only a server - ignore replies */ + if(mtype != CALL) + goto done2; + rpcvers = nhgetl(p); + p += 4; + prog = nhgetl(p); + p += 4; + vers = nhgetl(p); + p += 4; + proc = nhgetl(p); + p += 4; + if(debugnfs) + fprint(2, "rpcvers=%uld prog=%uld vers=%uld proc=%uld\n", rpcvers, prog, vers, proc); + if(rpcvers != 2) { + p = initreply(buf, xid, MSG_DENIED, nil, RPC_MISMATCH); + p = rpcputl(p, 2); + p = rpcputl(p, 2); + iowrite(r->io, r->fd, buf, p-buf); + goto done2; + } + auth = getauth(&p); + verf = getauth(&p); + if(prog != r->myprog) { + p = initreply(buf, xid, MSG_ACCEPTED, verf, PROG_UNAVAIL); + iowrite(r->io, r->fd, buf, p-buf); + goto done1; + } + if(vers < r->minver || vers > r->maxver) { + p = initreply(buf, xid, MSG_ACCEPTED, verf, PROG_MISMATCH); + p = rpcputl(p, r->minver); + p = rpcputl(p, r->maxver); + iowrite(r->io, r->fd, buf, p-buf); + goto done1; + } + n = r->dispatch(buf, p, xid, auth, verf, proc); + if(debugnfs) { + fprint(2, "writing %d bytes in response\n", n); + if(debugnfs > 1) { + int i; + for(i = 0; i < n; i += 4) fprint(2, " %ud", nhgetl(buf + i)); + fprint(2, "\n"); + } + } + iowrite(r->io, r->fd, buf, n); +done1: + free(auth); + free(verf); +done2: + free(buf); + ioclose(r->io, r->fd); + r->inuse = 0; + threadexits(nil); +} + +static char * +rpcnull(char *buf, ulong xid, char *verf) +{ + char *rp; + + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, 0); + return rp; +} + +/* + * Fake Port Mapper + */ +static int +pmapdis(char *buf, char *p, ulong xid, char *auth, char *verf, ulong proc) +{ + char *rp; + ulong prog, vers, prot, nproc; + + switch(proc) { + case PMAPPROC_NULL: + rp = rpcnull(buf, xid, verf); + break; + case PMAPPROC_GETPORT: + prog = nhgetl(p); + p += 4; + vers = nhgetl(p); + p += 4; + prot = nhgetl(p); + if(debugnfs) + fprint(2, "In portmap getport prog=%uld vers=%uld prot=%uld\n", prog, vers, prot); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + hnputl(rp, 0); + switch(prog) { + case NFS_PROG: + if(vers == NFS_VERS && prot == IPPROTO_TCP) + hnputl(rp, NFS_PORT); + break; + case MNT_PROG: + if(vers >= MNT_MIN_VERS && vers <= MNT_MAX_VERS && prot == IPPROTO_TCP) + hnputl(rp, MNT_PORT); + break; + case NLM_PROG: + if(vers == NLM_VERS && prot == IPPROTO_TCP) + hnputl(rp, NLM_PORT); + break; + } + rp += 4; + break; + case PMAPPROC_DUMP: + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, NFS_PROG); + rp = rpcputl(rp, NFS_VERS); + rp = rpcputl(rp, IPPROTO_TCP); + rp = rpcputl(rp, NFS_PORT); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, MNT_PROG); + rp = rpcputl(rp, MNT_MAX_VERS); + rp = rpcputl(rp, IPPROTO_TCP); + rp = rpcputl(rp, MNT_PORT); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, NLM_PROG); + rp = rpcputl(rp, NLM_VERS); + rp = rpcputl(rp, IPPROTO_TCP); + rp = rpcputl(rp, NLM_PORT); + rp = rpcputl(rp, 0); + break; + case PMAPPROC_CALLIT: +SET(nproc); +USED(nproc); +USED(auth); +/* + prog = nhgetl(p); + p += 4; + vers = nhgetl(p); + p += 4; + nproc = nhgetl(p); + p += 4; + switch(prog) { + case NFS_PROG: + return nfsdis(buf, p, xid, auth, verf, nproc); + break; + case MNT_PROG: + return mntdis(buf, p, xid, auth, verf, nproc); + break; + case NLM_PROG: + return nlmdis(buf, p, xid, auth, verf, nproc); + break; + default: + rp = initreply(buf, xid, MSG_ACCEPTED, verf); + break; + } + break; +*/ + case PMAPPROC_SET: /* not used here for fake port mapper */ + case PMAPPROC_UNSET: + default: + rp = initreply(buf, xid, MSG_ACCEPTED, verf, PROG_UNAVAIL); + rp += 4; + break; + } + return rp - buf; +} + +static char * +domnt(char *buf, char *p, ulong xid, char *, char *verf) +{ + Qid qid; + char *rp, *path; + uvlong meta, x; + int n; + + n = nhgetl(p); + path = malloc(n + 1); + memmove(path, p + 4, n); + path[n] = 0; + if(debugnfs) + fprint(2, "Attempting to mount %s qpath=%ulld\n", path, p2q(-1, path, 0)); + meta = q2m(-1, p2q(-1, path, 0), 0); + free(path); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + if(meta == 0) { + rp = rpcputl(rp, MNT3ERR_NOENT); + return rp; + } + if(getmetaint(-1, meta, "qpath", &x) == MTnone) { + rp = rpcputl(rp, MNT3ERR_IO); + return rp; + } + qid.path = x; + getmetaint(-1, meta, "qvers", &x); + qid.vers = x; + getmetaint(-1, meta, "qtype", &x); + qid.type = x; + if(!(qid.type & QTDIR)) { + rp = rpcputl(rp, MNT3ERR_NOTDIR); + return rp; + } + if(debugnfs) + fprint(2, "meta=%ulld qid=(%ulld,%uld,%d)\n", meta, qid.path, qid.vers, qid.type); + rp = rpcputl(rp, MNT3_OK); + rp = rpcputl(rp, sizeof(Qid)); + memmove(rp, &qid, sizeof(Qid)); + rp += round4(sizeof(Qid)); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, AUTH_UNIX); + return rp; +} + +static int +mntdis(char *buf, char *p, ulong xid, char *auth, char *verf, ulong proc) +{ + char *rp; + + switch(proc) { + case MOUNTPROC3_NULL: + rp = rpcnull(buf, xid, verf); + break; + case MOUNTPROC3_MNT: + rp = domnt(buf, p, xid, auth, verf); + break; + case MOUNTPROC3_DUMP: + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + hnputl(rp, 0); + rp += 4; + break; + case MOUNTPROC3_UMNT: + rp = rpcnull(buf, xid, verf); + break; + case MOUNTPROC3_UMNTALL: + rp = rpcnull(buf, xid, verf); + break; + case MOUNTPROC3_EXPORT: + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, 1); + memmove(rp, "/\0\0\0", 4); + rp += 4; + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + break; + default: + rp = initreply(buf, xid, MSG_DENIED, verf, PROC_UNAVAIL); + break; + } + return rp - buf; +} + +static char * +fattr3(int fd, char *rp, Qid *qid) +{ + char *symlink; + uvlong meta, len, mtime, x; + + meta = q2m(fd, qid->path, 0); + if(meta == 0) +{ +fprint(2, "nil meta in fattr3: caller:%p qpath:%ulld fd:%d\n", getcallerpc(&fd), qid->path, fd); + return nil; +} + if(qid->type & QTDIR) + rp = rpcputl(rp, NF3DIR); + else { + if((symlink = getmetastr(fd, meta, "symlink")) != nil) { + rp = rpcputl(rp, NF3LNK); + free(symlink); + } + else if(getmetaint(fd, meta, "nodetype", &x) != MTnone) + rp = rpcputl(rp, x); + else + rp = rpcputl(rp, NF3REG); + } + if(getmetaint(fd, meta, "unixmode", &x) != MTnone) + rp = rpcputl(rp, x); + else if(getmetaint(fd, meta, "mode", &x) != MTnone) + rp = rpcputl(rp, x & 0777); + else + rp = rpcputl(rp, 0777); + rp = rpcputl(rp, 1); /* nlink */ + if(getmetaint(fd, meta, "nuid", &x) == MTnone) /* uid */ + rp = rpcputl(rp, -2); + else + rp = rpcputl(rp, x); + if(getmetaint(fd, meta, "ngid", &x) == MTnone) /* gid */ + rp = rpcputl(rp, -2); + else + rp = rpcputl(rp, x); + if(getmetaint(fd, meta, "length", &len) == MTnone) + len = 0; + rp = rpcputv(rp, len); /* size */ + if(getmetaint(fd, meta, "used", &x) == MTnone) + rp = rpcputv(rp, len); + else + rp = rpcputv(rp, x); + if(getmetaint(fd, meta, "majordev", &x) == MTnone) /* rdev */ + rp = rpcputl(rp, 0); + else + rp = rpcputl(rp, x); + if(getmetaint(fd, meta, "minordev", &x) == MTnone) + rp = rpcputl(rp, 0); + else + rp = rpcputl(rp, x); + rp = rpcputv(rp, 0); /* fsid */ + rp = rpcputv(rp, qid->path); /* fileid */ + if(getmetaint(fd, meta, "atime", &x) == MTnone) + rp = rpcputv(rp, 0); + else { + rp = rpcputl(rp, x / 1000000000LL); + rp = rpcputl(rp, x % 1000000000LL); + } + if(getmetaint(fd, meta, "mtime", &x) == MTnone) + mtime = 0; + else + mtime = x; + rp = rpcputl(rp, mtime / 1000000000LL); + rp = rpcputl(rp, mtime % 1000000000LL); + if(getmetaint(fd, meta, "ctime", &x) == MTnone) { + rp = rpcputl(rp, mtime / 1000000000LL); + rp = rpcputl(rp, mtime % 1000000000LL); + } + else { + rp = rpcputl(rp, x / 1000000000LL); + rp = rpcputl(rp, x % 1000000000LL); + } + return rp; +} + +static char * +opattr(int fd, char *rp, Qid *qid) +{ + char *trp; + + rp = rpcputl(rp, 1); + trp = fattr3(fd, rp, qid); + if(trp == nil) { +fprint(2, "nil in opattr from %p\n", getcallerpc(&fd)); + rp -= 4; + rp = rpcputl(rp, 0); + return rp; + } + return trp; +} + +static ulong +getperm(int fd, uvlong meta, char *auth) +{ + char *host, *uid, *gid, *s; + uvlong mode, x; + ulong perm; + int n, nuid, ngid; + + if(allow) + return 0007; + + getmetaint(fd, meta, "mode", &mode); + perm = mode & 0007; + host = nil; + switch(nhgetl(auth)) { + case AUTH_UNIX: + auth += 12; + n = nhgetl(auth); + host = emalloc9p(n + 1); + auth += 4; + memmove(host, auth, n); + auth += n; + nuid = nhgetl(auth); + auth += 4; + ngid = nhgetl(auth); + if(rootallow && nhgetl(auth) == 0) { + perm = 0007; + break; + } + if((uid = getmetastr(fd, meta, "uid")) != nil && (s = id2uname(host, nuid))) { + if(strcmp(s, uid) == 0) { + perm = (mode >> 6) & 0007; + free(uid); + break; + } + } + else if(getmetaint(fd, meta, "nuid", &x) != MTnone && x == nuid) { + perm = (mode >> 6) & 0007; + free(uid); + break; + } + if((gid = getmetastr(fd, meta, "gid")) != nil && (s = id2gname(host, ngid))) { + if(strcmp(s, uid) == 0) + perm = (mode >> 3) & 0007; + } + else if(getmetaint(fd, meta, "ngid", &x) != MTnone && x == ngid) + perm = (mode >> 3) & 0007; + free(uid); + free(gid); + break; + case AUTH_NULL: + case AUTH_SHORT: + case AUTH_DES: + default: + break; + } + free(host); + return perm; +} + +static int +prewcc(int fd, uvlong qpath, uvlong *len, uvlong *mtime, uvlong *ctime) +{ + uvlong meta, x; + + meta = q2m(fd, qpath, 0); + if(meta == 0) + return -1; + if(getmetaint(fd, meta, "length", &x) == MTnone) + x = 0; + *len = x; + getmetaint(fd, meta, "mtime", &x); + *mtime = x; + if(getmetaint(fd, meta, "ctime", &x) == MTnone) + *ctime = *mtime; + else + *ctime = x; + return 0; +} + +static char * +dowcc(int fd, char *rp, Qid *qid, uvlong prelen, uvlong premtime, uvlong prectime) +{ + rp = rpcputl(rp, 1); + rp = rpcputv(rp, prelen); + rp = rpcputl(rp, premtime / 1000000000LL); + rp = rpcputl(rp, premtime % 1000000000LL); + rp = rpcputl(rp, prectime / 1000000000LL); + rp = rpcputl(rp, prectime % 1000000000LL); + rp = opattr(fd, rp, qid); + return rp; +} + +static char * +mkpath(int fd, uvlong qpath, int len) +{ + char *str, *name, *p; + uvlong meta, parent; + int n; + + if(qpath == 1) { + str = malloc(len + 2); + strcpy(str, "/"); + return str; + } + meta = q2m(fd, qpath, 0); + if(meta == 0) + return nil; + name = getmetastr(fd, meta, "name"); + n = strlen(name); + if(getmetaint(fd, meta, "parent", &parent) == MTnone) { + str = malloc(len + n + 2); + strcpy(str, name); + free(name); + return str; + } + str = mkpath(fd, parent, len + n + 1); + p = str + strlen(str); + *p++ = '/'; + strcpy(p, name); + free(name); + return str; +} + +static char * +dosattr(uvlong meta, char *p) +{ + uvlong now, x; + ulong setit; + + now = nsec(); + setit = nhgetl(p); + p += 4; + if(setit) { + getmetaint(-1, meta, "mode", &x); + setmetaint(meta, "mode", nil, (nhgetl(p) & 0777) | (x & ~0777)); + setmetaint(meta, "unixmode", nil, nhgetl(p)); + p += 4; + } + setit = nhgetl(p); + p += 4; + if(setit) { + setmetaint(meta, "nuid", nil, nhgetl(p)); + p += 4; + } + setit = nhgetl(p); + p += 4; + if(setit) { + setmetaint(meta, "ngid", nil, nhgetl(p)); + p += 4; + } + setit = nhgetl(p); + p += 4; + if(setit) { + setmetaint(meta, "length", nil, nhgetv(p)); + p += 8; + setmetaint(meta, "mtime", nil, now); + } + setit = nhgetl(p); + p += 4; + if(setit == SET_TO_CLIENT_TIME) { + setmetaint(meta, "atime", nil, nhgetl(p) * 1000000000LL + nhgetl(p + 4)); + p += 8; + } + setit = nhgetl(p); + p += 4; + if(setit == SET_TO_CLIENT_TIME) { + setmetaint(meta, "mtime", nil, nhgetl(p) * 1000000000LL + nhgetl(p + 4)); + p += 8; + } + setmetaint(meta, "ctime", nil, now); + return p; +} + +static int +opensnap(uvlong qid) +{ + char *sname, *spath; + uvlong meta; + int fd; + + meta = q2m(-1, qid, 0); + if(meta == 0) + return -1; + sname = getmetastr(-1, meta, "snap"); + if(sname == nil) + return -1; + spath = smprint("%s/%s", ddir, sname); + free(sname); + fd = open(spath, OREAD); + free(spath); + return fd; +} + +static char * +nfsgetattr(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp, *a; + int fd; + + fd = -1; + if(nhgetl(p) != sizeof(Qid)) { + fd = opensnap(nhgetv(p + sizeof(Qid) + 4)); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + a = fattr3(fd, rp, &qid); + if(a == nil) + hnputl(rp-4, NFS3ERR_BADHANDLE); + else + rp = a; + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfssetattr(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp; + uvlong meta, prelen, premeta, prectime; + + if(nhgetl(p) != sizeof(Qid)) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + if(prewcc(-1, qid.path, &prelen, &premeta, &prectime) < 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + meta = q2m(-1, qid.path, 0); + dosattr(meta, p); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + rp = dowcc(-1, rp, &qid, prelen, premeta, prectime); + return rp; +} + +static char * +nfslookup(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid, qid2; + char *rp, *name, *path, *sname, *spath; + uvlong meta, qp, x, sqid; + ulong perms; + int n, m, fd,pfd; + + pfd = fd = -1; + sqid = 0; + if(nhgetl(p) != round4(sizeof(Qid))) { + sqid = nhgetv(p + round4(sizeof(Qid)) + 4); + pfd = fd = opensnap(sqid); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + n = nhgetl(p); + p += 4; + name = malloc(n + 1); + if(name == nil) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_SERVERFAULT); + return rp; + } + memmove(name, p, n); + name[n] = 0; + meta = q2m(fd, qid.path, 0); + if(debugnfs) + fprint(2, "in nfslookup: qid=(%ulld,%uld,%ud) name=%s\n", qid.path, qid.vers, qid.type, name); + perms = getperm(fd, meta, auth); + if((perms & DMEXEC) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + if(fd != -1) + close(fd); + return rp; + } + if(strcmp(name, ".") == 0) { + /* don't need to do anything */ + } + else if(strcmp(name, "..") == 0) { + getmetaint(fd, meta, "parent", &qp); + meta = q2m(fd, qp, 0); + } + else { + path = mkpath(fd, qid.path, n + 1); + m = strlen(path); + path[m] = '/'; + strcpy(path + m + 1, name); + x = p2q(fd, path, 0); + meta = q2m(fd, x, 0); + if(meta == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOENT); + rp = rpcputl(rp, 0); + return rp; + } + free(path); + sname = getmetastr(fd, meta, "snap"); + if(sname) { + spath = smprint("%s/%s", ddir, sname); + free(sname); + fd = open(spath, OREAD); + free(spath); + sqid = x; + meta = q2m(fd, p2q(fd, "/", 0), 0); + } + } + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + getmetaint(fd, meta, "qpath", &x); + qid2.path = x; + getmetaint(fd, meta, "qvers", &x); + qid2.vers = x; + getmetaint(fd, meta, "qtype", &x); + qid2.type = x; + if(fd == -1) + m = round4(sizeof(Qid)); + else + m = round4(sizeof(Qid)) + sizeof(uvlong); + rp = rpcputl(rp, m); + memmove(rp, &qid2, sizeof(Qid)); + rp += round4(sizeof(Qid)); + if(fd != -1) + rp = rpcputv(rp, sqid); + rp = opattr(fd, rp, &qid2); + rp = opattr(pfd, rp, &qid); + free(name); + if(fd != -1) + close(fd); + if(pfd != -1 && pfd != fd) + close(pfd); + return rp; +} + +static char * +nfsaccess(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid; + char *rp, *a; + uvlong meta; + ulong reqacc, rspacc, perms; + int fd; + + fd = -1; + if(nhgetl(p) != round4(sizeof(Qid))) { + fd = opensnap(nhgetv(p + round4(sizeof(Qid)) + 4)); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + reqacc = nhgetl(p); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + if(fd != -1) + close(fd); + return rp; + } + perms = getperm(fd, meta, auth); + rspacc = 0; + if(perms & DMREAD) + rspacc |= ACCESS3_READ; + if(perms & DMWRITE) + rspacc |= ACCESS3_MODIFY | ACCESS3_EXTEND; + if(perms & DMEXEC) + rspacc |= ACCESS3_LOOKUP | ACCESS3_EXECUTE; + rspacc &= reqacc; + rp = rpcputl(rp, NFS3_OK); + a = fattr3(fd, rp + 4, &qid); + if(a == nil) { + hnputl(rp-4, NFS3ERR_BADHANDLE); + rpcputl(rp, 0); + } + else { + rpcputl(rp, 1); + rp = a; + rp = rpcputl(rp, rspacc); + } + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfsreadlink(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp, *pp; + uvlong meta; + int n, fd; + + fd = -1; + if(nhgetl(p) != round4(sizeof(Qid))) { + fd = opensnap(nhgetv(p + round4(sizeof(Qid)) + 4)); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + if((pp = getmetastr(fd, meta, "symlink")) == nil) { + rp = rpcputl(rp, NFS3ERR_INVAL); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + n = strlen(pp); + rp = rpcputl(rp, NFS3_OK); + rp = opattr(fd, rp, &qid); + rp = rpcputl(rp, n); + memmove(rp, pp, n); + rp += round4(n); + free(pp); + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfsread(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid; + char *rp, *a; + uvlong offset, meta, len; + ulong perms; + long count1, count2; + int fd; + + fd = -1; + if(nhgetl(p) != round4(sizeof(Qid))) { + fd = opensnap(nhgetv(p + round4(sizeof(Qid)) + 4)); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + offset = nhgetv(p); + p += 8; + count1 = nhgetl(p); + if(count1 > 32768) + count1 = 32768; + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + a = rp + 104; + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + perms = getperm(fd, meta, auth); + if((perms & DMREAD) == 0) { + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + if(getmetaint(fd, meta, "length", &len) == MTnone) + len = 0; + count2 = θpread(fd, qid.path, a, count1, offset); + a = fattr3(fd, rp + 8, &qid); + if(a == nil) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + hnputl(rp + 4, 1); + if(count2 < 0) { + hnputl(rp, NFS3ERR_IO); + rp = a; + if(fd != -1) + close(fd); + return rp; + } + hnputl(rp, NFS3_OK); + rp = a; + rp = rpcputl(rp, count2); + if(offset + count2 >= len) + rp = rpcputl(rp, 1); + else + rp = rpcputl(rp, 0); + rp = rpcputl(rp, count2); + rp += round4(count2); + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfswrite(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid; + char *rp; + uvlong offset, meta, prelen, premtime, prectime; + ulong perms; + long count1, count2, stable; + + if(nhgetl(p) != round4(sizeof(Qid))) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + offset = nhgetv(p); + p += 8; + count1 = nhgetl(p); + p += 4; + stable = nhgetl(p); + p += 8; /* also skip the count at the beginning of the opaque data */ + meta = q2m(-1, qid.path, 0); + if(meta == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + return rp; + } + perms = getperm(-1, meta, auth); + if((perms & DMWRITE) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + if(prewcc(-1, qid.path, &prelen, &premtime, &prectime) < 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + count2 = θpwrite(qid.path, p, count1, offset, 1); + if(stable != UNSTABLE) { + resetmeta(); + csync(); + } + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + if(count2 < 0) { + rp = rpcputl(rp, NFS3ERR_IO); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + rp = rpcputl(rp, NFS3_OK); + rp = dowcc(-1, rp, &qid, prelen, premtime, prectime); + rp = rpcputl(rp, count2); + if(stable == UNSTABLE) + rp = rpcputl(rp, UNSTABLE); + else + rp = rpcputl(rp, FILE_SYNC); + rp = rpcputv(rp, starttime); + return rp; +} + +static char * +mkfile(char *buf, char *p, ulong xid, char *auth, char *verf, int ilk) +{ + Qid qid, nqid; + char *name, *path, *rp; + uvlong meta, pmeta, dirblk, now, x; + uvlong prelen, premeta, prectime; + ulong perms; + int n, m, how, nodetype; + + if(nhgetl(p) != round4(sizeof(Qid))) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + n = nhgetl(p); + p += 4; + name = malloc(n + 1); + if(name == nil) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_SERVERFAULT); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + memmove(name, p, n); + name[n] = 0; + p += round4(n); + if(ilk == NF3REG) { + how = nhgetl(p); + p += 4; + } + else + how = GUARDED; + if(debugnfs) + fprint(2, "in nfscreate: qid=(%ulld,%uld,%ud) name=%s\n", qid.path, qid.vers, qid.type, name); + if((qid.type & QTDIR) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOTDIR); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_EXIST); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } +if(how == EXCLUSIVE) { +rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); +rp = rpcputl(rp, NFS3ERR_NOTSUPP); +rp = rpcputl(rp, 0); +rp = rpcputl(rp, 0); +return rp; +} + pmeta = q2m(-1, qid.path, 0); + perms = getperm(-1, pmeta, auth); + if((perms & DMWRITE) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + path = mkpath(-1, qid.path, n + 1); + m = strlen(path); + path[m] = '/'; + strcpy(path + m + 1, name); + nqid.path = p2q(-1, path, 1); + switch(how) { + case UNCHECKED: + break; + case GUARDED: + meta = q2m(-1, nqid.path, 0); + if(meta != 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_EXIST); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + meta = q2m(-1, nqid.path, 1); + free(path); + nqid.vers = 0; + if(ilk == NF3DIR) + nqid.type = QTDIR; + else + nqid.type = QTFILE; + prewcc(-1, qid.path, &prelen, &premeta, &prectime); + setmetastr(meta, "name", nil, name, 0); + setmetaint(meta, "parent", nil, qid.path); + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + getmetaint(-1, pmeta, "mode", &x); + if(ilk == NF3DIR) + setmetaint(meta, "mode", nil, x & 0777 | DMDIR); + else + setmetaint(meta, "mode", nil, x & 0777); + now = nsec(); + setmetaint(pmeta, "mtime", nil, now); + getmetaint(-1, pmeta, "child", &x); + setmetaint(meta, "sib", nil, x); + setmetaint(pmeta, "child", nil, nqid.path); + nodetype = 0; + switch(ilk) { + case NF3DIR: + setmetaint(meta, "child", nil, 0); + break; + case NF3REG: + dirblk = allocblock(); + cbclean(dirblk); + cbwrite(dirblk); + brelease(dirblk); + setmetaint(meta, "index", nil, dirblk); + break; + case NF3CHR: + nodetype = nhgetl(p); + p += 4; + setmetaint(meta, "nodetype", nil, nodetype); + break; + } + setqhash(nqid.path, meta); + p = dosattr(meta, p); + if(ilk == NF3LNK) { + n = nhgetl(p); + p += 4; + path = malloc(n + 1); + memmove(path, p, n); + path[n] = 0; + setmetastr(meta, "symlink", nil, path, 0); + free(path); + } + else if(ilk == NF3CHR) { + if(nodetype == NF3CHR || nodetype == NF3BLK) { + setmetaint(meta, "majordev", nil, nhgetl(p)); + p += 4; + setmetaint(meta, "minordev", nil, nhgetl(p)); + } + } + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, round4(sizeof(Qid))); + memmove(rp, &nqid, sizeof(Qid)); + rp += round4(sizeof(Qid)); + rp = opattr(-1, rp, &nqid); + rp = dowcc(-1, rp, &qid, prelen, premeta, prectime); + savesuper(); + return rp; +} + +static char * +nfscreate(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + return mkfile(buf, p, xid, auth, verf, NF3REG); +} + +static char * +nfsmkdir(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + return mkfile(buf, p, xid, auth, verf, NF3DIR); +} + +static char * +nfssymlink(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + return mkfile(buf, p, xid, auth, verf, NF3LNK); +} + +static char * +nfsmknod(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + return mkfile(buf, p, xid, auth, verf, NF3CHR); /* sort out the exact node type in mkfile */ +} + +static char * +nfsremove(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid; + char *rp, *name, *path; + uvlong meta, qpath, pmeta, x; + uvlong prelen, premtime, prectime; + ulong perms; + int n; + + if(nhgetl(p) != round4(sizeof(Qid))) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + if((qid.type & QTDIR) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOTDIR); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + pmeta = q2m(-1, qid.path, 0); + if(pmeta == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + perms = getperm(-1, pmeta, auth); + if((perms & DMWRITE) == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + if(prewcc(-1, qid.path, &prelen, &premtime, &prectime) < 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + n = nhgetl(p); + p += 4; + name = malloc(n + 1); + memmove(name, p, n); + name[n] = 0; + path = mkpath(-1, qid.path, n + 1); + n = strlen(path); + path[n] = '/'; + strcpy(path + n + 1, name); + qpath = p2q(-1, path, 0); + meta = q2m(-1, qpath, 0); + if(meta == 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOENT); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + free(name); + free(path); + return rp; + } + if(getmetaint(-1, meta, "child", &x) != MTnone && x != 0) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOTEMPTY); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + free(name); + free(path); + return rp; + } + rmq(qpath, meta); + freedata(meta); + rmdlist(meta, qpath); + freeblock(meta); + rmp(path); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + rp = dowcc(-1, rp, &qid, prelen, premtime, prectime); + free(name); + free(path); + return rp; +} + +static char * +nfsfsstat(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp, *a; + + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3_OK); + a = fattr3(-1, rp + 4, &qid); + if(a == nil) { + hnputl(rp - 4, NFS3ERR_BADHANDLE); + hnputl(rp, 0); + return rp; + } + hnputl(rp, 1); + rp = a; + rp = rpcputv(rp, super.nblk * BlkSize); /* tbytes */ + rp = rpcputv(rp, super.nfree * BlkSize); /* fbytes */ + rp = rpcputv(rp, super.nfree * BlkSize); /* abytes */ + rp = rpcputv(rp, super.nht); /* tfiles */ + rp = rpcputv(rp, super.nht); /* ffiles */ + rp = rpcputv(rp, super.nht); /* afiles */ + rp = rpcputl(rp, 0); /* invarsec */ + return rp; +} + +static char * +nfsfsinfo(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp, *a; + + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + hnputl(rp, NFS3_OK); + rp += 4; + a = fattr3(-1, rp + 4, &qid); + if(a == nil) { + hnputl(rp - 4, NFS3ERR_BADHANDLE); + hnputl(rp, 0); + return rp; + } + hnputl(rp, 1); + rp = a; + rp = rpcputl(rp, 32768); /* rtmax */ + rp = rpcputl(rp, 32768); /* rtpref */ + rp = rpcputl(rp, 1); /* rtmult */ + rp = rpcputl(rp, 32768); /* wtmax */ + rp = rpcputl(rp, 32768); /* wtpref */ + rp = rpcputl(rp, 1); /* wtmult */ + rp = rpcputl(rp, 8192); /* dtpref */ + rp = rpcputv(rp, 1LL << 55); /* maxfilesize */ + rp = rpcputl(rp, 0); /* time_delta */ + rp = rpcputl(rp, 1); + rp = rpcputl(rp, FSF3_SYMLINK | FSF3_HOMOGENEOUS | FSF3_CANSETTIME); /* properties */ + return rp; +} + +static char * +nfspathconf(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp, *a; + + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + hnputl(rp, NFS3_OK); + rp += 4; + a = fattr3(-1, rp + 4, &qid); + if(a == nil) { + hnputl(rp - 4, NFS3ERR_BADHANDLE); + return rp; + } + hnputl(rp, 1); + rp = a; + rp = rpcputl(rp, 1); + rp = rpcputl(rp, MNTNAMELEN); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, 1); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 1); + return rp; +} + +static char * +nfsrename(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid fqid, tqid; + char *fname, *tname, *fpath, *tpath, *rp; + uvlong fdmeta, fmeta, tmeta, qpath, now, x; + uvlong fprelen, fpremtime, fprectime, tprelen, tpremtime, tprectime; + ulong perms; + int fn, tn, n; + + if(nhgetl(p) != round4(sizeof(Qid))) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + fqid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + fn = nhgetl(p); + p += 4; + fname = malloc(fn + 1); + memmove(fname, p, fn); + fname[fn] = 0; + p += round4(fn); + fpath = mkpath(-1, fqid.path, fn + 1); + n = strlen(fpath); + fpath[n] = '/'; + strcpy(fpath + n + 1, fname); + tqid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + tn = nhgetl(p); + p += 4; + tname = malloc(tn + 1); + memmove(tname, p, tn); + tname[tn] = 0; + tpath = mkpath(-1, tqid.path, tn + 1); + n = strlen(tpath); + tpath[n] = '/'; + strcpy(tpath + n + 1, tname); + prewcc(-1, fqid.path, &fprelen, &fpremtime, &fprectime); + prewcc(-1, tqid.path, &tprelen, &tpremtime, &tprectime); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + fdmeta = q2m(-1, fqid.path, 0); + if(fdmeta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + goto done; + } + perms = getperm(-1, fdmeta, auth); + if((perms & DMWRITE) == 0) { + rp = rpcputl(rp, NFS3ERR_ACCES); + goto done; + } + qpath = p2q(-1, fpath, 0); + if(qpath == 0) { + rp = rpcputl(rp, NFS3ERR_NOENT); + goto done; + } + if((tqid.type & QTDIR) == 0) { + rp = rpcputl(rp, NFS3ERR_NOTDIR); + goto done; + } + now = nsec(); + fmeta = q2m(-1, qpath, 0); + if(fqid.path != tqid.path) { + tmeta = q2m(-1, tqid.path, 0); + if(tmeta == 0) { + rp = rpcputl(rp, NFS3ERR_NOENT); + goto done; + } + perms = getperm(-1, tmeta, auth); + if((perms & DMWRITE) == 0) { + rp = rpcputl(rp, NFS3ERR_ACCES); + goto done; + } + rmdlist(fmeta, qpath); + setmetaint(fmeta, "parent", nil, tqid.path); + getmetaint(-1, tmeta, "child", &x); + setmetaint(fmeta, "sib", nil, x); + setmetaint(tmeta, "child", nil, qpath); + setmetaint(tmeta, "mtime", nil, now); + setmetaint(tmeta, "atime", nil, now); + } + setmetastr(fmeta, "name", nil, tname, 0); + rehashpath(qpath, fpath, tpath); + setmetaint(fmeta, "ctime", nil, now); + setmetaint(fdmeta, "mtime", nil, now); + setmetaint(fdmeta, "atime", nil, now); + rp = rpcputl(rp, NFS3_OK); +done: + rp = dowcc(-1, rp, &fqid, fprelen, fpremtime, fprectime); + rp = dowcc(-1, rp, &tqid, tprelen, tpremtime, tprectime); + free(fname); + free(fpath); + free(tname); + free(tpath); + return rp; +} + +static char * +nfsreaddir(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid; + char *rp, *a, *xs; + uvlong cookie, meta; + ulong perms; + long count1, count2; + int n, fd; + + fd = -1; + if(nhgetl(p) != round4(sizeof(Qid))) { + fd = opensnap(nhgetv(p + round4(sizeof(Qid)) + 4)); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + cookie = nhgetv(p); + p += 16; + count1 = nhgetl(p); + if(count1 > 8192) + count1 = 8192; + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + perms = getperm(fd, meta, auth); + if((perms & DMREAD) == 0) { + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + if(cookie == 0) { + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + return rp; + } + getmetaint(fd, meta, "child", &cookie); + } + a = rp + 92; + a = rpcputv(a, 0); + count2 = a - rp; + while(cookie != 0) { + meta = q2m(fd, cookie, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_IO); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + xs = getmetastr(fd, meta, "name"); + n = strlen(xs); + if(count2 + round4(n) + 24 + 8 > count1) { + free(xs); + break; + } + a = rpcputl(a, 1); + a = rpcputv(a, cookie); + a = rpcputl(a, n); + memmove(a, xs, n); + a += round4(n); + free(xs); + a = rpcputv(a, cookie); + getmetaint(fd, meta, "sib", &cookie); + count2 += round4(n) + 24; + } + a = fattr3(fd, rp + 8, &qid); + if(a == nil) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + hnputl(rp, NFS3_OK); + hnputl(rp + 4, 1); + rp += count2; + rp = rpcputl(rp, 0); + if(cookie == 0) + rp = rpcputl(rp, 1); + else + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfsreaddirplus(char *buf, char *p, ulong xid, char *auth, char *verf) +{ + Qid qid, qid2; + char *rp, *a, *xs; + uvlong cookie, meta, x, sqid; + ulong perms; + long count1, count2; + int n, m, fd; + + fd = -1; + sqid = 0; + if(nhgetl(p) != round4(sizeof(Qid))) { + sqid = nhgetv(p + round4(sizeof(Qid)) + 4); + fd = opensnap(sqid); + if(fd == -1) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + } + qid = *((Qid *)(p + 4)); + p += nhgetl(p) + 4; + cookie = nhgetv(p); + p += 16; + p += 4; /* use maxcount instead of dircount */ + count1 = nhgetl(p); + if(count1 > 8192) + count1 = 8192; + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + perms = getperm(fd, meta, auth); + if((perms & DMREAD) == 0) { + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + if(cookie == 0) { + meta = q2m(fd, qid.path, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + getmetaint(fd, meta, "child", &cookie); + } + a = rp + 92; + a = rpcputv(a, 0); /* cookieverf */ + count2 = a - rp; + while(cookie != 0) { + meta = q2m(fd, cookie, 0); + if(meta == 0) { + rp = rpcputl(rp, NFS3ERR_IO); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + xs = getmetastr(fd, meta, "name"); + n = strlen(xs); + getmetaint(fd, meta, "qpath", &x); + qid2.path = x; + getmetaint(fd, meta, "qvers", &x); + qid2.vers = x; + getmetaint(fd, meta, "qtype", &x); + qid2.type = x; + if(fd == -1) + m = round4(sizeof(Qid)); + else + m = round4(sizeof(Qid)) + sizeof(uvlong); + if(count2 + 4 + 8 + 4 + round4(n) + 8 + 88 + 4 + 4 + m + 8 > count1) { + free(xs); + break; + } + a = rpcputl(a, 1); + a = rpcputv(a, cookie); /* fileid */ + a = rpcputl(a, n); /* name */ + memmove(a, xs, n); + a += round4(n); + free(xs); + a = rpcputv(a, cookie); /* cookie */ + a = opattr(fd, a, &qid2); /* name_attributes */ + a = rpcputl(a, 1); /* name_handle */ + a = rpcputl(a, m); + memmove(a, &qid2, sizeof(Qid)); + a += round4(sizeof(Qid)); + if(fd != -1) + rp = rpcputv(rp, sqid); + getmetaint(fd, meta, "sib", &cookie); + count2 += 4 + 8 + 4 + round4(n) + 8 + 88 + 4 + 4+ m; + } + a = fattr3(fd, rp + 8, &qid); + if(a == nil) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; + } + hnputl(rp, NFS3_OK); + hnputl(rp + 4, 1); + rp += count2; + rp = rpcputl(rp, 0); /* no more entries */ + if(cookie == 0) /* eof? */ + rp = rpcputl(rp, 1); + else + rp = rpcputl(rp, 0); + if(fd != -1) + close(fd); + return rp; +} + +static char * +nfscommit(char *buf, char *p, ulong xid, char *verf) +{ + Qid qid; + char *rp; + uvlong prelen, premtime, prectime; + + if(*((long *)p) != round4(sizeof(Qid))) { + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_ACCES); + rp = rpcputl(rp, 0); + return rp; + } + qid = *((Qid *)(p + 4)); + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + if(prewcc(-1, qid.path, &prelen, &premtime, &prectime) < 0) { + rp = rpcputl(rp, NFS3ERR_BADHANDLE); + rp = rpcputl(rp, 0); + rp = rpcputl(rp, 0); + return rp; + } + resetmeta(); + csync(); + rp = rpcputl(rp, NFS3_OK); + rp = dowcc(-1, rp, &qid, prelen, premtime, prectime); + rp = rpcputv(rp, starttime); + return rp; +} + +static char * +nfsunsupp(char *buf, ulong xid, char *verf) +{ + char *rp; + + rp = initreply(buf, xid, MSG_ACCEPTED, verf, SUCCESS); + rp = rpcputl(rp, NFS3ERR_NOTSUPP); + rp = rpcputl(rp, 0); + return rp; +} + +static int +nfsdis(char *buf, char *p, ulong xid, char *auth, char *verf, ulong proc) +{ + char *rp; + + switch(proc) { + case NFSPROC3_NULL: + rp = rpcnull(buf, xid, verf); + break; + case NFSPROC3_GETATTR: + rp = nfsgetattr(buf, p, xid, verf); + break; + case NFSPROC3_SETATTR: + rp = nfssetattr(buf, p, xid, verf); + break; + case NFSPROC3_LOOKUP: + rp = nfslookup(buf, p, xid, auth, verf); + break; + case NFSPROC3_ACCESS: + rp = nfsaccess(buf, p, xid, auth, verf); + break; + case NFSPROC3_READLINK: + rp = nfsreadlink(buf, p, xid, verf); + break; + case NFSPROC3_READ: + rp = nfsread(buf, p, xid, auth, verf); + break; + case NFSPROC3_WRITE: + rp = nfswrite(buf, p, xid, auth, verf); + break; + case NFSPROC3_CREATE: + rp = nfscreate(buf, p, xid, auth, verf); + break; + case NFSPROC3_MKDIR: + rp = nfsmkdir(buf, p, xid, auth, verf); + break; + case NFSPROC3_SYMLINK: + rp = nfssymlink(buf, p, xid, auth, verf); + break; + case NFSPROC3_MKNOD: + rp = nfsmknod(buf, p, xid, auth, verf); + break; + case NFSPROC3_REMOVE: + case NFSPROC3_RMDIR: + rp = nfsremove(buf, p, xid, auth, verf); + break; + case NFSPROC3_RENAME: + rp = nfsrename(buf, p, xid, auth, verf); + break; + case NFSPROC3_LINK: + rp = nfsunsupp(buf, xid, verf); /* $ */ + break; + case NFSPROC3_READDIR: + rp = nfsreaddir(buf, p, xid, auth, verf); + break; + case NFSPROC3_READDIRPLUS: + rp = nfsreaddirplus(buf, p, xid, auth, verf); + break; + case NFSPROC3_FSSTAT: + rp = nfsfsstat(buf, p, xid, verf); + break; + case NFSPROC3_FSINFO: + rp = nfsfsinfo(buf, p, xid, verf); + break; + case NFSPROC3_PATHCONF: + rp = nfspathconf(buf, p, xid, verf); + break; + case NFSPROC3_COMMIT: + rp = nfscommit(buf, p, xid, verf); + break; + default: + rp = initreply(buf, xid, MSG_DENIED, verf, PROC_UNAVAIL); + break; + } + return rp - buf; +} + +static void +tpstarter(void *) +{ + Rcb *r; + int fd; + + while(recv(tpchan, &fd)) { + for(r = rcbhd; r && r->inuse; r = r->next) ; + if(r == nil) { + r = emalloc9p(sizeof(Rcb)); + r->inuse = 1; + r->io = ioproc(); + r->next = rcbhd; + rcbhd = r; + } + r->inuse = 1; + r->fd = fd; + r->myprog = PMAP_PROG; + r->minver = PMAP_VERS; + r->maxver = PMAP_VERS; + r->dispatch = pmapdis; + threadcreate(tcprpcreader, r, 8192); + } + threadexits(nil); +} + +static void +tportmapper(void *) +{ + char *s; + int acfd, lcfd, fd; + char adir[40], ldir[40]; + + s = smprint("tcp!*!%d", PMAP_PORT); + acfd = announce(s, adir); + if(acfd < 0) + fprint(2, "error in announce: %r\n"); + if(debugnfs) + fprint(2, "announce in tcp port mapper got dir: %s:%r\n", adir); + free(s); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(lcfd < 0) + fprint(2, "error in listen: %r\n"); + if(shutdown) + threadexits(nil); + if(debugnfs) + fprint(2, "back from listen in tcp port mapper: ldir=%s\n", ldir); + if(lcfd < 0) { + close(acfd); + threadexits(nil); + } + fd = accept(lcfd, ldir); + close(lcfd); + send(tpchan, &fd); + } +} + +static void +upstarter(void *) +{ + Rcb *r; + int fd; + + while(recv(upchan, &fd)) { + if(shutdown) + break; + for(r = rcbhd; r && r->inuse; r = r->next) ; + if(r == nil) { + r = emalloc9p(sizeof(Rcb)); + r->inuse = 1; + r->io = ioproc(); + r->next = rcbhd; + rcbhd = r; + } + r->inuse = 1; + r->fd = fd; + r->myprog = PMAP_PROG; + r->minver = PMAP_VERS; + r->maxver = PMAP_VERS; + r->dispatch = pmapdis; + threadcreate(udprpcreader, r, 8192); + } + threadexits(nil); +} + +static void +uportmapper(void *) +{ + char *s; + int acfd, lcfd, fd; + char adir[40], ldir[40]; + + s = smprint("udp!*!%d", PMAP_PORT); + acfd = announce(s, adir); + if(acfd < 0) + fprint(2, "error in announce: %r\n"); + if(debugnfs) + fprint(2, "announce in udp port mapper got dir: %s:%r\n", adir); + free(s); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(lcfd < 0) + fprint(2, "error in listen: %r\n"); + if(shutdown) + threadexits(nil); + if(debugnfs) + fprint(2, "back from listen in udp port mapper: ldir=%s\n", ldir); + if(lcfd < 0) { + close(acfd); + threadexits(nil); + } + fd = accept(lcfd, ldir); + close(lcfd); + send(upchan, &fd); + } +} + +static void +mountstarter(void *) +{ + Rcb *r; + int fd; + + while(recv(mchan, &fd)) { + if(shutdown) + break; + for(r = rcbhd; r && r->inuse; r = r->next) ; + if(r == nil) { + r = emalloc9p(sizeof(Rcb)); + r->inuse = 1; + r->io = ioproc(); + r->next = rcbhd; + rcbhd = r; + } + r->inuse = 1; + r->fd = fd; + r->myprog = MNT_PROG; + r->minver = MNT_MIN_VERS; + r->maxver = MNT_MAX_VERS; + r->dispatch = mntdis; + threadcreate(tcprpcreader, r, 8192); + } + threadexits(nil); +} + +static void +mountd(void *) +{ + char *s; + int acfd, lcfd, fd; + char adir[40], ldir[40]; + + s = smprint("tcp!*!%d", MNT_PORT); + acfd = announce(s, adir); + free(s); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(shutdown) + threadexits(nil); + if(debugnfs) + fprint(2, "back from listen in mountd: ldir=%s\n", ldir); + if(lcfd < 0) { + close(acfd); + threadexits(nil); + } + fd = accept(lcfd, ldir); + close(lcfd); + send(mchan, &fd); + } +} + +static void +nfsdstarter(void *) +{ + Rcb *r; + int fd; + + while(recv(nchan, &fd)) { + if(shutdown) + break; + for(r = rcbhd; r && r->inuse; r = r->next) ; + if(r == nil) { + r = emalloc9p(sizeof(Rcb)); + r->inuse = 1; + r->io = ioproc(); + r->next = rcbhd; + rcbhd = r; + } + r->inuse = 1; + r->fd = fd; + r->myprog = NFS_PROG; + r->minver = NFS_VERS; + r->maxver = NFS_VERS; + r->dispatch = nfsdis; + threadcreate(tcprpcreader, r, 8192); + } + threadexits(nil); +} + +static void +nfsd(void *) +{ + char *s; + int acfd, lcfd, fd; + char adir[40], ldir[40]; + + s = smprint("tcp!*!%d", NFS_PORT); + acfd = announce(s, adir); + free(s); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(shutdown) + threadexits(nil); + if(debugnfs) + fprint(2, "back from listen in nfsd: ldir=%s\n", ldir); + if(lcfd < 0) { + close(acfd); + threadexits(nil); + } + fd = accept(lcfd, ldir); + close(lcfd); + send(nchan, &fd); + } +} + +static int +regport(void) +{ + char *buf, *p; + int fd; +int n, i; + + /* + * On Plan 9, don't even bother trying to see if we have + * a local portmap running. + */ + if(access("/net/ipselftab", AREAD) == 0) + return 0; + /* + * Take a crack at using a locks instance of portmap/ + * rpcbind. If we succeed, we don't need to bother + * starting out build-in one. If + */ + fd = dial("udp!127.1!111", nil, nil, nil); + if(fd < 0) + return 0; +fprint(2, "Got portmap connection open\n"); + buf = malloc(1500); + p = buf; + p = rpcputl(p, 42); /* xid */ + p = rpcputl(p, CALL); /* mtype */ + p = rpcputl(p, 2); /* rpcvers */ + p = rpcputl(p, PMAP_PROG); /* prog */ + p = rpcputl(p, PMAP_VERS); /* vers */ + p = rpcputl(p, PMAPPROC_SET); /* proc */ + p = rpcputl(p, 0); /* auth */ + p = rpcputl(p, 0); + p = rpcputl(p, 0); /* verf */ + p = rpcputl(p, 0); + p = rpcputl(p, NFS_PROG); /* prog */ + p = rpcputl(p, NFS_VERS); /* vers */ + p = rpcputl(p, IPPROTO_TCP); /* prot */ + p = rpcputl(p, NFS_PORT); /* port */ + write(fd, buf, p - buf); + n = read(fd, buf, 1500); +for(i = 0; i < n; ++i) fprint(2, "%02x ", buf[i]); +fprint(2, "\n"); + close(fd); + fd = dial("udp!127.1!111", nil, nil, nil); + if(fd < 0) { + free(buf); + return 0; + } + p = buf; + p = rpcputl(p, 42); /* xid */ + p = rpcputl(p, CALL); /* mtype */ + p = rpcputl(p, 2); /* rpcvers */ + p = rpcputl(p, PMAP_PROG); /* prog */ + p = rpcputl(p, PMAP_VERS); /* vers */ + p = rpcputl(p, PMAPPROC_SET); /* proc */ + p = rpcputl(p, 0); /* auth */ + p = rpcputl(p, 0); + p = rpcputl(p, 0); /* verf */ + p = rpcputl(p, 0); + p = rpcputl(p, MNT_PROG); /* prog */ + p = rpcputl(p, MNT_MAX_VERS); /* vers */ + p = rpcputl(p, IPPROTO_TCP); /* prot */ + p = rpcputl(p, MNT_PORT); /* port */ + write(fd, buf, p - buf); + n = read(fd, buf, 1500); +for(i = 0; i < n; ++i) fprint(2, "%02x ", buf[i]); +fprint(2, "\n"); + close(fd); + free(buf); + return 1; +} + +void +initnfs(void) +{ + if(!regport()) { + upchan = chancreate(sizeof(ulong), 1); + threadcreate(upstarter, nil, 1024); + umaptid = proccreate(uportmapper, nil, 8192); + tpchan = chancreate(sizeof(ulong), 1); + threadcreate(tpstarter, nil, 1024); + tmaptid = proccreate(tportmapper, nil, 8192); + } + mchan = chancreate(sizeof(ulong), 1); + threadcreate(mountstarter, nil, 1024); + mounttid = proccreate(mountd, nil, 8192); + nchan = chancreate(sizeof(ulong), 1); + threadcreate(nfsdstarter, nil, 1024); + nfstid = proccreate(nfsd, nil, 8192); +} + +void +haltnfs(void) +{ + Rcb *r; + + if(upchan == nil) + return; +/* + if(upchan) { + chanclose(upchan); + chanclose(tpchan); + } + chanclose(mchan); + chanclose(nchan); +*/ + for(r = rcbhd; r; r = r->next) { + if(r->io) { + iointerrupt(r->io); + closeioproc(r->io); + } + } +/* + if(upchan) { + threadkill(umaptid); + threadkill(tmaptid); + } + threadkill(mounttid); + threadkill(nfstid); +*/ +} --- /sys/src/cmd/θfs/aoe.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/aoe.c Thu Feb 20 02:17:38 2014 @@ -0,0 +1,1136 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Adapted by Brian L. Stuart from: + * + * vblade — virtual aoe target + * copyright © 2007—2013 erik quanstrom + */ + +#include +#include +#include +#include /* irony */ +#include +#include +#include <9p.h> + +enum { + Eaddrlen = 6, /* only defined in kernel */ +}; +#include "aoe.h" + +#include "dat.h" + +enum { + Fclone, + Fdata, + Flast, + + Nether = 8, + Nvblade = 16, + Nmask = 10, + Nmaxout= 128, + Maxpkt = 10000, + Conflen = 1024, +}; + +typedef struct Vblade Vblade; + +struct Vblade { + vlong maxlba; + uint nmask; + Lock mlk; + uchar *mask; + int shelf; + int slot; + int clen; + char *config; +}; + +static Vblade vblade[Nvblade]; +static int nblade; + +static char *ethertab[Nether] = { + "#l0/ether0", +}; +static int etheridx = 1; +static int efdtab[Nether*Flast]; +static uchar pkttab[Nether][Maxpkt]; +static uchar bctab[Nether][Maxpkt]; +static int mtutab[Nether]; +static Ioproc *ioprocs[Nether]; + +static int +getmtu(char *p) +{ + char buf[50]; + int fd, mtu; + + snprint(buf, sizeof buf, "%s/mtu", p); + if((fd = open(buf, OREAD)) == -1) + return 2; + if(read(fd, buf, 36) < 0) + return 2; + close(fd); + buf[36] = 0; + mtu = strtoul(buf+12, 0, 0)-Aoehsz-Aoeatasz; + return mtu>>9; +} + +static int +aoeopen(Ioproc *io, char *e, int fds[]) +{ + char buf[128], ctl[13]; + int n; + + snprint(buf, sizeof buf, "%s/clone", e); + if((fds[Fclone] = ioopen(io, buf, ORDWR)) == -1) + return -1; + memset(ctl, 0, sizeof ctl); + if(ioread(io, fds[Fclone], ctl, sizeof ctl - 1) < 0) + return -1; + n = atoi(ctl); + snprint(buf, sizeof buf, "connect %d", Aoetype); + if(iowrite(io, fds[Fclone], buf, strlen(buf)) != strlen(buf)) + return -1; + snprint(buf, sizeof buf, "%s/%d/data", e, n); + fds[Fdata] = ioopen(io, buf, ORDWR); + return fds[Fdata]; +} + +static void +replyhdr(Aoehdr *h, Vblade *vblade) +{ + uchar ea[Eaddrlen]; + + memmove(ea, h->dst, Eaddrlen); + memmove(h->dst, h->src, Eaddrlen); + memmove(h->src, ea, Eaddrlen); + + hnputs(h->major, vblade->shelf); + h->minor = vblade->slot; + h->verflag |= AFrsp; +} + +static int +servebad(uchar *pkt, Vblade*, int) +{ + Aoehdr *h; + + h = (Aoehdr*)pkt; + h->verflag |= AFerr; + h->error = AEcmd; + + return Aoehsz; +} + +static uchar nilea[Eaddrlen]; + +/* +static void +savemask(Vblade *vb) +{ + uvlong qpath, meta; + + qpath = ((uvlong)TLun << 60) | (vb->shelf << 8) | vb->slot; + meta = q2m(-1, qpath, 0); + if(meta == 0) + return; + setmetaint(meta, "nmask", nil, vb->nmask); + if(vb->mask) + setmetablob(meta, "mask", nil, vb->mask, vb->nmask * Eaddrlen, 0); + else + setmetastr(meta, "mask", nil, "", 0); +} +*/ + +static int +servemask(uchar *pkt, Vblade *vb, int mtu) +{ + int i, j, r, e; + uchar mx[Nmask*Eaddrlen], *mtab[Nmask], *p; + Aoem *m; + Aoemd *d; + + m = (Aoem*)(pkt + Aoehsz); + if(m->mcnt > (mtu - Aoehsz - Aoemsz)/Aoemdsz) + return -1; + + if(!canlock(&vb->mlk)) + return -1; /* drop */ + + switch(m->mcmd){ + default: + unlock(&vb->mlk); + return servebad(pkt, vb, mtu); + case Medit: + memcpy(mx, vb->mask, vb->nmask*Eaddrlen); + j = 0; + for(i = 0; i < vb->nmask; i++){ + p = mx + i*Eaddrlen; + if(memcmp(p, nilea, Eaddrlen) != 0) + mtab[j++] = p; + } + e = 0; + p = pkt + Aoehsz + Aoemsz; + for(i = 0; i < m->mcnt && e == 0; i++){ + d = (Aoemd*)(p + i*Aoemdsz); + switch(d->dcmd){ + default: + e = MEunk; + break; + case MDnop: + break; + case MDadd: + for(i = 0; i < j; i++) + if(memcmp(d->ea, mtab[j], Eaddrlen) == 0) + continue; + if(j == Nmask) + e = MEfull; + else + memcpy(mtab[j++], d->ea, Eaddrlen); + break; + case MDdel: + for(i = 0; i < j; i++) + if(memcmp(d->ea, mtab[j], Eaddrlen) == 0) + break; + if(i < j){ + for(; i < j; i++) + mtab[i] = mtab[i+1]; + j--; + } + break; + } + } + + if(e != 0){ + m->merr = e; + r = Aoehsz + Aoemsz; + break; + } + + p = malloc(j*Eaddrlen); + if(p == nil){ + r = -1; + break; + } + + for(i = 0; i < j; i++) + memcpy(p+i*Eaddrlen, mtab[i], Eaddrlen); + free(vb->mask); + vb->nmask = j; + vb->mask = p; + case Mread: + m->mcnt = vb->nmask; + m->merr = 0; + p = pkt + Aoehsz + Aoemsz; + for(i = 0; i < m->mcnt; i++){ + d = (Aoemd*)(p + i*Aoemdsz); + d->dres = 0; + d->dcmd = MDnop; + memcpy(d->ea, vb->mask + i*Eaddrlen, Eaddrlen); + } + r = Aoehsz + Aoemsz + m->mcnt * Aoemdsz; + break; + } + + unlock(&vb->mlk); + return r; +} + +static void +saveconfig(Vblade *vb) +{ + uvlong qpath, meta; + + qpath = ((uvlong)TLun << 60) | (vb->shelf << 8) | vb->slot; + meta = q2m(-1, qpath, 0); + if(meta == 0) + return; + if(vb->config) + setmetastr(meta, "config", nil, vb->config, 0); + else + setmetastr(meta, "config", nil, "", 0); +} + +static int +serveconfig(uchar *pkt, Vblade *vb, int mtu) +{ + char *cfg; + int cmd, reqlen, len; + Aoehdr *h; + Aoecfg *q; + + h = (Aoehdr*)pkt; + q = (Aoecfg*)(pkt + Aoehsz); + + if(memcmp(h->src, h->dst, Eaddrlen) == 0) + return -1; + + reqlen = nhgets(q->cslen); + len = vb->clen; + cmd = q->verccmd&0xf; + cfg = (char*)(pkt + Aoehsz + Aoecfgsz); + + switch(cmd){ + case AQCtest: + if(reqlen != len) + return -1; + case AQCprefix: + if(reqlen > len) + return -1; + if(memcmp(vb->config, cfg, reqlen) != 0) + return -1; + case AQCread: + break; + case AQCset: + if(len && len != reqlen || memcmp(vb->config, cfg, reqlen) != 0){ + h->verflag |= AFerr; + h->error = AEcfg; + break; + } + case AQCfset: + if(reqlen > Conflen){ + h->verflag |= AFerr; + h->error = AEarg; + break; + } + free(vb->config); + vb->config = θmalloc(reqlen + 1); + memmove(vb->config, cfg, reqlen); + vb->clen = len = reqlen; + saveconfig(vb); + break; + default: + h->verflag |= AFerr; + h->error = AEarg; + break; + } + + if(vb->config) + memmove(cfg, vb->config, len); + hnputs(q->cslen, len); + hnputs(q->bufcnt, Nmaxout); + q->scnt = mtu; + hnputs(q->fwver, 2323); + q->verccmd = Aoever<<4 | cmd; + + return len; +} + +static ushort ident[256] = { + [47] 0x8000, + [49] 0x0200, + [50] 0x4000, + [83] 0x5400, + [84] 0x4000, + [86] 0x1400, + [87] 0x4000, + [93] 0x400b, +}; + +static void +idmoveto(char *a, int idx, int len, char *s) +{ + char *p; + + p = a+idx*2; + for(; len > 0; len -= 2) { + if(*s == 0) + p[1] = ' '; + else + p[1] = *s++; + if (*s == 0) + p[0] = ' '; + else + p[0] = *s++; + p += 2; + } +} + +static void +lbamoveto(char *p, int idx, int n, vlong lba) +{ + int i; + + p += idx*2; + for(i = 0; i < n; i++) + *p++ = lba>>i*8; +} + +enum { + Crd = 0x20, + Crdext = 0x24, + Cwr = 0x30, + Cwrext = 0x34, + Cid = 0xec, +}; + +static uvlong +getlba(uchar *p) +{ + uvlong v; + + v = p[0]; + v |= p[1]<<8; + v |= p[2]<<16; + v |= p[3]<<24; + v |= (uvlong)p[4]<<32; + v |= (uvlong)p[5]<<40; + return v; +} + +static void +putlba(uchar *p, vlong lba) +{ + p[0] = lba; + p[1] = lba>>8; + p[2] = lba>>16; + p[3] = lba>>24; + p[4] = lba>>32; + p[5] = lba>>40; +} + +static int +serveata(uchar *pkt, Vblade *vb, int mtu) +{ + char *buf; + int rbytes, bytes, len; + vlong lba, off, qpath; + Aoehdr *h; + Aoeata *a; + + h = (Aoehdr*)pkt; + a = (Aoeata*)(pkt + Aoehsz); + + buf = (char*)(pkt + Aoehsz + Aoeatasz); + lba = getlba(a->lba); + len = a->scnt<<9; + off = lba<<9; + + if(a->scnt > mtu || a->scnt == 0){ + h->verflag |= AFerr; + h->error = AEarg; + a->cmdstat = ASdrdy|ASerr; + return 0; + } + + if(a->cmdstat != Cid) + if(lba+a->scnt > vb->maxlba){ + a->errfeat = Eidnf; + a->cmdstat = ASdrdy|ASerr; + return 0; + } + + if(a->cmdstat&0xf0 == 0x20) + lba &= 0xfffffff; + switch(a->cmdstat){ + default: + a->errfeat = Eabrt; + a->cmdstat = ASdrdy|ASerr; + return 0; + case Cid: + memmove(buf, ident, sizeof ident); + idmoveto(buf, 27, 40, "Plan 9 Vblade"); + idmoveto(buf, 10, 20, "serial#"); + idmoveto(buf, 23, 8, "2"); + lbamoveto(buf, 60, 4, vb->maxlba); + lbamoveto(buf, 100, 8, vb->maxlba); + a->cmdstat = ASdrdy; + return 512; + break; + case Crd: + case Crdext: + qpath = ((uvlong)TLun << 60) | (vb->shelf << 8) | vb->slot; + bytes = θpread(-1, qpath, buf, len, off); + rbytes = bytes; + break; + case Cwr: + case Cwrext: + qpath = ((uvlong)TLun << 60) | (vb->shelf << 8) | vb->slot; + bytes = θpwrite(qpath, buf, len, off, 0); + rbytes = 0; + break; + } + if(bytes != len){ + a->errfeat = Eabrt; + a->cmdstat = ASdf|ASerr; + putlba(a->lba, lba+(len-bytes)>>9); + return 0; + } + + putlba(a->lba, lba+a->scnt); + a->scnt = 0; + a->errfeat = 0; + a->cmdstat = ASdrdy; + + return rbytes; +} + +static int +myea(Ioproc *io, uchar ea[6], char *p) +{ + char buf[50]; + int fd; + + snprint(buf, sizeof buf, "%s/addr", p); + if((fd = ioopen(io, buf, OREAD)) == -1) + return -1; + if(ioread(io, fd, buf, 12) < 12) + return -1; + ioclose(io, fd); + return parseether(ea, buf); +} + +static int +bcastpkt(uchar *pkt, uint shelf, uint slot, int i) +{ + Aoehdr *h; + + h = (Aoehdr*)pkt; + myea(ioprocs[i], h->dst, ethertab[i]); + memset(h->src, 0xff, Eaddrlen); + hnputs(h->type, Aoetype); + hnputs(h->major, shelf); + h->minor = slot; + h->cmd = ACconfig; + *(u32int*)h->tag = 0; + return Aoehsz + Aoecfgsz; +} + +static int +osdgetattr(Aoeosd *o, int len, uvlong pid, uvlong oid) +{ + MVal x; + uchar *inbuf, *outbuf, *end; + char *name, *strval; + uvlong meta; + int n, nn, typ, tot; + + name = smprint("%016ullx:%016ullx", pid, oid); + meta = q2m(-1, p2q(-1, name, 0), 0); + free(name); + if(meta == 0) { + o->oflag = 0x40; + return 0; + } + len -= 20; + inbuf = θmalloc(len); + memmove(inbuf, o->oaddr, len); + end = inbuf + len; + outbuf = o->opid; + tot = 0; + while(inbuf < end) { + name = (char *)inbuf; /* the compiler's obsession with signed and unsigned is annoying */ + nn = strlen(name); + inbuf += nn + 1; + typ = getmeta(-1, meta, name, &x); + switch(typ) { +/* + case MTshort: + if(tot + nn + 4 >= 8192) + goto done; + tot += nn + 4; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 'h'; + hnputs(outbuf, *((ushort *)x)); + outbuf += 2; + break; + case MTlong: + if(tot + nn + 6 >= 8192) + goto done; + tot += nn + 6; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 'l'; + hnputl(outbuf, *((ulong *)x)); + outbuf += 4; + break; +*/ + case MTint: + if(tot + nn + 10 >= 8192) + goto done; + tot += nn + 10; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 'v'; + hnputv(outbuf, x.val); + outbuf += 8; + break; + case MTistring: + n = strlen(x.str) + 1; + if(tot + nn + n + 3 >= 8192) + goto done; + tot += nn + n + 3; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 's'; + strcpy((char *)outbuf, x.str); + outbuf += n; + break; + case MTstring: + strval = getblob(-1, x.val, &n); + if(tot + nn + n + 3 >= 8192) + goto done; + tot += nn + n + 3; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 's'; + strcpy((char *)outbuf, strval); + free(strval); + outbuf += n; + break; + case MTblob: + strval = getblob(-1, x.val, &n); + if(tot + nn + n + 4 >= 8192) + goto done; + tot += nn + n + 4; + strcpy((char *)outbuf, name); + outbuf += nn + 1; + *outbuf++ = 'b'; + hnputs(outbuf, n); + outbuf += 2; + memmove(outbuf, strval, n); + free(strval); + outbuf += n; + break; + } + } +done: + brelease(meta); + return tot; +} + +static int +osdsetattr(Aoeosd *o, int len, uvlong pid, uvlong oid) +{ + uchar *buf, *end; + char *name; + uvlong meta; + int n; + + name = smprint("%016ullx:%016ullx", pid, oid); + meta = q2m(-1, p2q(-1, name, 0), 0); + free(name); + if(meta == 0) { + o->oflag = 0x40; + return 0; + } + buf = o->oaddr; + end = buf + len; + while(buf < end) { + name = (char *)buf; /* the compiler's obsession with signed and unsigned is annoying */ + buf += strlen(name) + 1; + switch(*buf) { + case 'h': + setmetaint(meta, name, nil, nhgets(buf + 1)); + buf += 3; + break; + case 'l': + setmetaint(meta, name, nil, nhgetl(buf + 1)); + buf += 5; + break; + case 'v': + setmetaint(meta, name, nil, nhgetv(buf + 1)); + buf += 9; + break; + case 's': + setmetastr(meta, name, nil, (char *)(buf + 1), 0); + buf += strlen((char *)(buf + 1)) + 2; + break; + case 'b': + n = *((ushort *)(buf + 1)); + setmetablob(meta, name, nil, buf + 3, n, 0); + buf += n + 1; + break; + } + } + return 0; +} + +static int +serveosd(Ioproc *io, uchar *pkt, int fd, int) +{ + Qid nqid; + Aoehdr *ah; + Aoeosd *o; + uchar *buf; + char *name; + uvlong x; + uvlong pid, oid, addr, meta, pmeta, dirblk, pqpath; + int n, len, rlen; + + ah = (Aoehdr *)pkt; + o = (Aoeosd *)(pkt + Aoehsz); + len = nhgets(o->olen); + /* for some commands, the pid, oid, or addr may be junk */ + pid = nhgetv(o->opid); + oid = nhgetv(o->ooid); + addr = nhgetv(o->oaddr); + + rlen = 0; + o->oflag = 0; +fprint(2, "OSD request: %016ullx:%016ullx len:%d cmd:%x addr:%ulld\n", pid, oid, len, o->ocmd, addr); + switch(o->ocmd) { + case AOCformat: + name = smprint("0000000000000000:0000000000000000"); + nqid.path = p2q(-1, name, 1); + nqid.vers = 0; + nqid.type = QTFILE; + meta = q2m(-1, nqid.path, 1); + setmetastr(meta, "name", nil, name, 0); + setmetaint(meta, "pid", nil, 0); + setmetaint(meta, "oid", nil, 0); + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + setmetaint(meta, "child", nil, 0); + setqhash(nqid.path, meta); + free(name); + savesuper(); + break; + case AOCcreate: + name = smprint("%016ullx:0000000000000000", pid); + pqpath = p2q(-1, name, 0); + pmeta = q2m(-1, pqpath, 0); + free(name); + if(pmeta == 0) { + o->oflag = 0x40; + break; + } + name = smprint("%016ullx:%016ullx", pid, oid); + nqid.path = p2q(-1, name, 1); + nqid.vers = 0; + nqid.type = QTFILE; + meta = q2m(-1, nqid.path, 1); + if(meta == 0) { + o->oflag = 0x40; + free(name); + break; + } + setmetastr(meta, "name", nil, name, 0); + setmetaint(meta, "pid", nil, pid); + setmetaint(meta, "oid", nil, oid); + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + setmetaint(meta, "length", nil, 0); + setmetaint(meta, "parent", nil, pqpath); + getmetaint(-1, pmeta, "child", &x); + setmetaint(meta, "sib", nil, x); + setmetaint(pmeta, "child", nil, nqid.path); + dirblk = allocblock(); + if(dirblk != 0) { + cbclean(dirblk); + cbwrite(dirblk); + brelease(dirblk); + } + setmetaint(meta, "index", nil, dirblk); + setqhash(nqid.path, meta); + free(name); + savesuper(); + break; + case AOClist: + name = smprint("%016ullx:%016ullx", pid, oid); + pqpath = p2q(-1, name, 0); + meta = q2m(-1, pqpath, 0); + getmetaint(-1, meta, "child", &pqpath); + buf = o->opid; + while(len > 0 && pqpath != 0) { + meta = q2m(-1, pqpath, 0); + if(meta == 0) + break; + if(pid == 0) + getmetaint(-1, meta, "pid", &x); + else + getmetaint(-1, meta, "oid", &x); + hnputv(buf, x); + buf += 8; + len -= 8; + getmetaint(-1, meta, "sib", &pqpath); + } + rlen = len = buf - o->opid; + hnputs(o->olen, len); + break; + case AOCread: + name = smprint("%016ullx:%016ullx", pid, oid); + pqpath = p2q(-1, name, 0); + buf = o->opid; + len = θpread(-1, pqpath, buf, len, addr); + rlen = len; + free(name); + break; + case AOCwrite: + name = smprint("%016ullx:%016ullx", pid, oid); + pqpath = p2q(-1, name, 0); + buf = o->oaddr + 8; + len = θpwrite(pqpath, buf, len, addr, 1); + free(name); + break; + case AOCappend: + name = smprint("%016ullx:%016ullx", pid, oid); + pqpath = p2q(-1, name, 0); + buf = o->oaddr + 8; + len = θpwrite(pqpath, buf, len, 0, 2); + free(name); + break; + case AOCflush: + resetmeta(); + csync(); + break; + case AOCremove: + name = smprint("%016ullx:%016ullx", pid, oid); + meta = q2m(-1, p2q(-1, name, 0), 0); + if(meta == 0) { + o->oflag = 0x40; + free(name); + break; + } + getmetaint(-1, meta, "qpath", &x); + rmdlist(meta, x); + rmq(x, meta); + rmp(name); + free(name); + break; + case AOCpcreate: + name = smprint("0000000000000000:0000000000000000"); + pqpath = p2q(-1, name, 0); + pmeta = q2m(-1, pqpath, 0); + free(name); + if(pmeta == 0) { + o->oflag = 0x40; + break; + } + name = smprint("%016ullx:0000000000000000", pid); + nqid.path = p2q(-1, name, 1); + nqid.vers = 0; + nqid.type = QTFILE; + meta = q2m(-1, nqid.path, 1); + if(meta == 0) { + o->oflag = 0x40; + free(name); + break; + } + setmetastr(meta, "name", nil, name, 0); + setmetaint(meta, "pid", nil, pid); + setmetaint(meta, "oid", nil, 0); + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + setmetaint(meta, "parent", nil, pqpath); + setmetaint(meta, "child", nil, 0); + getmetaint(-1, pmeta, "child", &x); + setmetaint(meta, "sib", nil, x); + setmetaint(pmeta, "child", nil, nqid.path); + setqhash(nqid.path, meta); + free(name); + savesuper(); + break; + case AOCpremove: + name = smprint("%016ullx:0000000000000000", pid); + meta = q2m(-1, p2q(-1, name, 0), 0); + if(meta == 0) { + o->oflag = 0x40; + free(name); + break; + } + getmetaint(-1, meta, "child", &x); + if(x == 0) { + rmdlist(meta, x); + rmq(x, meta); + freeblock(meta); + rmp(name); + } + else { + o->oflag |= 0x40; + } + free(name); + break; + case AOCgetattr: + rlen = len = osdgetattr(o, len, pid, oid); + break; + case AOCsetattr: + rlen = len = osdsetattr(o, len, pid, oid); + break; + case AOCccreate: + case AOCcremove: + case AOCclist: + default: + o->oflag = 0x40; + break; + } + memmove(ah->dst, ah->src, Eaddrlen); + ah->verflag |= AFrsp; + if(o->oflag & 0x40) { + ah->verflag |= AFerr; + ah->error = AEarg; + } + o->oflag |= 0x80; + hnputs(o->olen, len); + n = rlen + 4 + sizeof(Aoehdr); + if(n < 60) + n = 60; + if(iowrite(io, fd, pkt, n) != n) { + fprint(2, "response write failed: %r\n"); + return -1; + } + return 0; +} + +static int +bladereply(Vblade *v, int i, int fd, uchar *pkt) +{ + int n; + Aoehdr *h; + + h = (Aoehdr*)pkt; + switch(h->cmd){ + case ACata: + n = serveata(pkt, v, mtutab[i]); + n += Aoehsz+Aoeatasz; + break; + case ACconfig: + n = serveconfig(pkt, v, mtutab[i]); + if(n >= 0) + n += Aoehsz+Aoecfgsz; + break; + case ACmask: + n = servemask(pkt, v, mtutab[i]); + break; + case ACosd: + if(v == vblade) + return serveosd(ioprocs[i], pkt, fd, mtutab[i]); + else + return 0; + break; + default: + n = servebad(pkt, v, mtutab[i]); + break; + } + if(n == -1) + return -1; + replyhdr(h, v); + if(n < 60){ + memset(pkt+n, 0, 60-n); + n = 60; + } + if(iowrite(ioprocs[i], fd, h, n) != n){ + fprint(2, "vblade: write to %s failed: %r\n", ethertab[i]); + return -1; + } + return 0; +} + +static int +filter(Vblade *v, uchar *ea) +{ + int i; + uchar *u; + + if(v->nmask == 0) + return 0; + + u = v->mask; + for(i = 0; i < v->nmask; i++) + if(memcmp(u + i*Eaddrlen, ea, Eaddrlen) == 0) + return 0; + return -1; +} + +static void +serve(void *a) +{ + int i, j, popcnt, vec, n, s, efd; + uchar *pkt, *bcpkt; + Aoehdr *h; + Vblade *v; + + i = (int)(uintptr)a; + + efd = efdtab[i*Flast+Fdata]; + pkt = pkttab[i]; + bcpkt = bctab[i]; + + n = 60; + h = (Aoehdr*)pkt; + bcastpkt(pkt, 0xffff, 0xff, i); + goto start; + + for(;;){ + n = ioread(ioprocs[i], efd, pkt, Maxpkt); + start: + if(shutdown) + threadexits(nil); + if(n < 60 || h->verflag & AFrsp) + continue; + s = nhgets(h->major); + popcnt = 0; + vec = 0; + for(j = 0; j < nblade; j++){ + v = vblade+j; + if(v->shelf == s || s == 0xffff) + if(v->slot == h->minor || h->minor == 0xff) + if(v->nmask == 0 || filter(v, h->src) == 0){ + popcnt++; + vec |= 1<0){ + memcpy(bcpkt, pkt, n); + bladereply(vblade + j, i, efd, bcpkt); + }else{ + bladereply(vblade + j, i, efd, pkt); + break; + } + } + } +} + +static void +aoeannounce(Vblade *vb) +{ + uchar *pkt; + int i; + + pkt = θmalloc(Maxpkt); + for(i = 0; i < etheridx; ++i) { + bcastpkt(pkt, 0xffff, 0xff, i); + bladereply(vb, i, efdtab[i*Flast+Fdata], pkt); + } +} + +void +starttarget(int major, int minor, uvlong nsect) +{ + Vblade *vp; + + vp = vblade + nblade; + vp->maxlba = nsect; + vp->nmask = 0; + vp->mask = nil; + vp->shelf = major; + vp->slot = minor; + vp->clen = 0; + ++nblade; + aoeannounce(vp); +} + +void +rmtarget(int major, int minor) +{ + int i; + + for(i = 0; i < nblade && (vblade[i].shelf != major || vblade[i].slot != minor); ++i) ; + if(i >= nblade) + return; + for(; i < nblade - 1; ++i) + vblade[i] = vblade[i+1]; + memset(vblade + i, 0, sizeof (Vblade)); + --nblade; +} + +static void +scanluns(void) +{ + uvlong x; + uvlong qpath, meta; + + for(qpath = super.firstlun; qpath; ) { + meta = q2m(-1, qpath, 0); + if(meta == 0) { + fprint(2, "No metadata for %ulld\n", qpath); + break; + } + getmetaint(-1, meta, "length", &x); + vblade[nblade].maxlba = x >> 9; + if(getmetaint(-1, meta, "nmask", &x) == MTnone) + vblade[nblade].nmask = 0; + else + vblade[nblade].nmask = x; + if(vblade[nblade].nmask != 0) { + if(getmeta(-1, meta, "masks", (MVal *)&x) == MTnone) + vblade[nblade].nmask = 0; + else + vblade[nblade].mask = getblob(-1, x, nil); + } + getmetaint(-1, meta, "aoemajor", &x); + vblade[nblade].shelf = x; + getmetaint(-1, meta, "aoeminor", &x); + vblade[nblade].slot = x; + if(vblade[nblade].config = getmetastr(-1, meta, "config")) + vblade[nblade].clen = strlen(vblade[nblade].config); + else + vblade[nblade].clen = 0; + ++nblade; + getmetaint(-1, meta, "nextlun", &qpath); + } +} + +static void +launch(char *tab[], int fdtab[]) +{ + int i; + + for(i = 0; tab[i]; i++){ + ioprocs[i] = ioproc(); + if(aoeopen(ioprocs[i], tab[i], fdtab+Flast*i) < 0) + sysfatal("network open: %r"); + threadcreate(serve, (void*)(uintptr)i, 32*1024); + } +} + +void +initaoe(void) +{ + int i; + + for(i = 0; i < etheridx; i++) + mtutab[i] = getmtu(ethertab[i]); + scanluns(); + launch(ethertab, efdtab); +} + +void +haltaoe(void) +{ + int i; + + for(i = 0; ethertab[i]; ++i) { + if(ioprocs[i]) { + iointerrupt(ioprocs[i]); + closeioproc(ioprocs[i]); + } + } +} --- /sys/src/cmd/θfs/hash.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/hash.c Thu Feb 20 02:17:39 2014 @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +static uvlong np2q, nq2m; +static int maxp2q, maxq2m; +static int p2qcoll, q2mcoll; + +/* FNV hash */ +static ulong +pathhash(char *path) +{ + uchar *p; + ulong h; + + h = 2166136261UL; + for(p = (uchar *)path; *p; ++p) + h = (h ^ *p) * 16777619; + return h % super.nht; +} + +static ulong +qidhash(uvlong qpath) +{ + return qpath % super.nht; +} + +static uvlong +qoffset(ulong bucket) +{ + return BlkSize * (super.nhashblk + 1) + bucket * sizeof(uvlong); +} + +static PQMap * +nextpq(PQMap *pq) +{ + uchar *p; + + p = (uchar *)pq; + p += pq->plen + offsetof(PQMap, pname[0]); + return (PQMap *)p; +} + +uvlong +p2q(int fd, char *path, int create) +{ + PQMap *pq, *pend; + uchar *p; + uvlong *uvp; + uvlong hlist, next, qpath; + ulong bucket; + int plen, nsearch, n; + + ++np2q; + plen = strlen(path); + bucket = pathhash(path); + if(fd == -1) + n = cread(&hlist, sizeof(uvlong), BlkSize + bucket * sizeof(uvlong)); + else + n = spread(fd, &hlist, sizeof(uvlong), BlkSize + bucket * sizeof(uvlong)); + if(n < 0) + sysfatal("cread failure: %r"); + if(hlist == 0) { + if(create) { + hlist = allocblock(); + if(hlist == 0) + return 0; + p = cbclean(hlist); + pq = (PQMap *)p; + qpath = super.qgen++ | ((uvlong)TFile << 60); + pq->qpath = qpath; + pq->plen = plen; + memmove(pq->pname, path, plen); + cbwrite(hlist); + brelease(hlist); + uvp = cbread(bucket / NPerBlk + 1); + uvp[bucket % NPerBlk] = hlist; + cbwrite(bucket / NPerBlk + 1); + brelease(bucket / NPerBlk + 1); + return qpath; + } + return 0; + } + nsearch = 1; + p = nil; /* make the compiler happy */ + if(fd != -1) + p = θmalloc(BlkSize); + while(hlist) { + if(fd == -1) { + p = cbread(hlist); + if(p == nil) { + fprint(2, "cbread failed on block %ulld\n", hlist); + return 0; + } + } + else + spread(fd, p, BlkSize, hlist * BlkSize); + pend = (PQMap *)(p + BlkSize); + --pend; + for(pq = (PQMap *)p; pq < pend && pq->qpath != 0; pq = nextpq(pq)) { + if(plen == pq->plen && memcmp(path, pq->pname, plen) == 0) + goto found; + ++nsearch; + } + next = *((uvlong *)(p + BlkSize - sizeof(uvlong))); + if(next == 0 && create) + goto addone; + if(fd == -1) + brelease(hlist); + hlist = next; + } + if(fd != -1) + free(p); + return 0; +found: + if(nsearch > maxp2q) + maxp2q = nsearch; + next = pq->qpath; + if(fd == -1) + brelease(hlist); + else + free(p); + if(create) + return 0; + return next; +addone: + for(pq = (PQMap *)p; pq < pend && pq->qpath != 0; pq = nextpq(pq)) ; + if(pq != (PQMap *)p) + ++p2qcoll; + if(pq >= pend) { +fprint(2, "HUH?"); + next = allocblock(); + if(next == 0) + return 0; + *((uvlong *)(p + BlkSize - sizeof(uvlong))) = next; + cbwrite(hlist); + brelease(hlist); + hlist = next; + p = cbclean(hlist); + pq = (PQMap *)p; + } + qpath = super.qgen++ | ((uvlong)TFile << 60); + pq->qpath = qpath; + pq->plen = plen; + memmove(pq->pname, path, plen); + if(hlist != 0) { /* shouldn't be possible, but just to be safe */ + cbwrite(hlist); + brelease(hlist); + } + return qpath; +} + +void +setqhash(uvlong qpath, uvlong midx) +{ + ulong bucket; + + bucket = qidhash(qpath); + cwrite(&midx, sizeof(uvlong), qoffset(bucket)); +} + +uvlong +q2m(int fd, uvlong qpath, int create) +{ + uvlong val; + uvlong first, meta; + ulong bucket; + int nsearch, n; + + if(qpath == 0) + return 0; + ++nq2m; + bucket = qidhash(qpath); + if(fd == -1) + n = cread(&first, sizeof(uvlong), qoffset(bucket)); + else + n= spread(fd, &first, sizeof(uvlong), qoffset(bucket)); + if(n < 0) + sysfatal("cread failure: %r"); + if(first == 0) { + if(create) { + meta = setmetaint(0, "qhnext", nil, 0); + //setqhash(qpath, meta); + return meta; + } + return 0; + } + nsearch = 1; + for(meta = first; meta; ) { + if(getmetaint(fd, meta, "qpath", &val) != MTnone && val == qpath) + break; + if(getmetaint(fd, meta, "qhnext", &meta) == MTnone) + meta = 0; + ++nsearch; + } + if(meta == 0) { + if(create) { + meta = setmetaint(0, "qhnext", nil, first); + //setqhash(qpath, meta); + } + } + else + if(nsearch > maxq2m) + maxq2m = nsearch; + return meta; +} + +void +rehashone(uvlong qpath, char *from, char *to) +{ + PQMap *pq, *rend; + uchar *p; + uvlong *uvp; + uvlong hlist, next; + ulong bucket; + int plen; + + rmp(from); + plen = strlen(to); + bucket = pathhash(to); + if(cread(&hlist, sizeof(uvlong), BlkSize + bucket * sizeof(uvlong)) < 0) + sysfatal("cread failure: %r"); + if(hlist == 0) { + hlist = allocblock(); + if(hlist == 0) + return; + p = cbclean(hlist); + pq = (PQMap *)p; + pq->qpath = qpath; + pq->plen = plen; + memmove(pq->pname, to, plen); + cbwrite(hlist); + brelease(hlist); + uvp = cbread(bucket / NPerBlk + 1); + uvp[bucket % NPerBlk] = hlist; + cbwrite(bucket / NPerBlk + 1); + brelease(bucket / NPerBlk + 1); + return; + } + while(hlist) { + p = cbread(hlist); + rend = (PQMap *)(p + BlkSize); + --rend; + for(pq = (PQMap *)p; pq < rend; pq = nextpq(pq)) + if(plen == pq->plen && memcmp(to, pq->pname, plen) == 0) + goto found; + next = *((uvlong *)(p + BlkSize - sizeof(uvlong))); + if(next == 0) + goto addone; + brelease(hlist); + hlist = next; + } + return; +found: + fprint(2, "Impossible! Repath destination exists\n"); + brelease(hlist); + return; +addone: + for(pq = (PQMap *)p; pq < rend && pq->qpath != 0; pq = nextpq(pq)) ; + if(pq >= rend) { + next = allocblock(); + *((uvlong *)(p + BlkSize - sizeof(uvlong))) = next; + cbwrite(hlist); + brelease(hlist); + if(next == 0) + return; + hlist = next; + p = cbclean(hlist); + pq = (PQMap *)p; + } + pq->qpath = qpath; + pq->plen = plen; + memmove(pq->pname, to, plen); + cbwrite(hlist); + brelease(hlist); +} + +void +rehashpath(uvlong qpath, char *from, char *to) +{ + char *f, *t, *name; + uvlong cqid, meta; + + meta = q2m(-1, qpath, 0); + if(meta != 0 && getmetaint(-1, meta, "child", &cqid) != MTnone) { + while(cqid != 0) { + meta = q2m(-1, cqid, 0); + if(meta == 0) + break; + name = getmetastr(-1, meta, "name"); + f = smprint("%s/%s", from, name); + t = smprint("%s/%s", to, name); + free(name); + rehashpath(cqid, f, t); + free(f); + free(t); + if(getmetaint(-1, meta, "sib", &cqid) == MTnone) + break; + } + } + rehashone(qpath, from, to); +} + +static PQMap * +rmpath(PQMap *full, PQMap *victim) +{ + PQMap *next, *last; + PQMap *rend; + int plen; + + rend = (PQMap *)((char *)full + BlkSize - sizeof(uvlong)); + for(last = victim; last < rend - 1 && last->plen > 0; last = nextpq(last)) ; + /* + * last now points to the start of the first empty path/qid map slot + */ + plen = victim->plen + offsetof(PQMap, pname[0]); + next = nextpq(victim); + memmove(victim, next, (char *)rend - (char *)next); + /* + * now last has moved up by plen bytes + */ + last = (PQMap *)((char *)last - plen); + memset(last, 0, (char *)rend - (char *)last); + return last; +} + +void +rmp(char *path) +{ + PQMap *pq, *rend, *last; + uchar *p; + uvlong hlist, next; + ulong bucket; + int plen; + + plen = strlen(path); + if(plen == 0) + return; + bucket = pathhash(path); + if(cread(&hlist, sizeof(uvlong), BlkSize + bucket * sizeof(uvlong)) < 0) + sysfatal("cread failure: %r"); + while(hlist) { + p = cbread(hlist); + rend = (PQMap *)(p + BlkSize); + --rend; + for(pq = (PQMap *)p; pq < rend; pq = nextpq(pq)) + if(plen == pq->plen && memcmp(path, pq->pname, plen) == 0) + goto found; + next = *((uvlong *)(p + BlkSize - sizeof(uvlong))); + brelease(hlist); + hlist = next; + } + return; +found: + while(hlist) { + last = rmpath((PQMap *)p, pq); + next = *((uvlong *)(p + BlkSize - sizeof(uvlong))); + if(next != 0) { + p = cbread(next); + pq = (PQMap *)p; + if(pq->plen == 0 || pq->plen > (char *)rend - (char *)last) { + brelease(next); + next = 0; + } + else + memmove(last, pq, pq->plen + offsetof(PQMap, pname[0])); + } + cbwrite(hlist); + brelease(hlist); + hlist = next; + } +} + +void +rmq(uvlong qpath, uvlong victim) +{ + uvlong prev, meta, next; + ulong bucket; + + if(qpath == 0) + return; + bucket = qidhash(qpath); + if(cread(&meta, sizeof(uvlong), qoffset(bucket)) < 0) + sysfatal("cread failure: %r"); + if(meta == victim) { + if(getmetaint(-1, meta, "qhnext", &next) == MTnone) + next = 0; + if(cwrite(&next, sizeof(uvlong), qoffset(bucket)) < 0) + sysfatal("cwrite failure: %r"); + return; + } + for(prev = meta; prev; ) { + if(getmetaint(-1, prev, "qhnext", &meta) == MTnone) + meta = 0; + if(meta == victim) { + if(getmetaint(-1, victim, "qhnext", &next) == MTnone) + next = 0; + setmetaint(prev, "qhnext", nil, next); + return; + } + prev = meta; + } +} + +static char hstatbuf[1024]; + +char * +prhstat(void) +{ + char *p, *e; + + p = hstatbuf; + e = p + nelem(hstatbuf); + p = seprint(p, e, "Hash stats:\n"); + p = seprint(p, e, "np2q: %ulld\n", np2q); + p = seprint(p, e, "p2qcoll: %ud\n", p2qcoll); + p = seprint(p, e, "maxp2q: %ud\n", maxp2q); + p = seprint(p, e, "nq2m: %ulld\n", nq2m); + p = seprint(p, e, "q2mcoll: %ud\n", q2mcoll); + seprint(p, e, "maxq2m: %ud\n", maxq2m); + return hstatbuf; +} + +void +showphash(int fd, char *path) +{ + uvlong hlist; + ulong bucket; + + bucket = pathhash(path); + cread(&hlist, sizeof(uvlong), BlkSize + bucket * sizeof(uvlong)); + fprint(fd, "%s: bucket:%uld hlist:%ulld\n", path, bucket, hlist); +} + +void +fixpaths(int fd) +{ + PQMap *pq, *pend; + uvlong *hb; + uchar *p; + char *path; + uvlong hlist, next; + ulong bucket; + int i, j; + + fprint(fd, "Checking for dangling path names\n"); + for(bucket = 0, i = 0; i < super.nhashblk; ++i) { + hb = cbread(i + 1); + for(j = 0; j < BlkSize / sizeof(uvlong) && bucket < super.nht; ++j, ++bucket) { + if(bucket % 100000 == 0) + fprint(fd, "."); +restart: + hlist = hb[j]; + while(hlist) { + p = cbread(hlist); + if(p == nil) { + fprint(fd, "hlist block read failure in fixpaths\n"); + return; + } + pend = (PQMap *)(p + BlkSize); + --pend; + for(pq = (PQMap *)p; pq < pend && pq->qpath != 0; pq = nextpq(pq)) { + if(q2m(-1, pq->qpath, 0) == 0) { + path = θmalloc(pq->plen + 1); + memmove(path, pq->pname, pq->plen); + fprint(fd, "removing dangling path %s\n", path); + rmp(path); + free(path); + brelease(hlist); + goto restart; + } + } + next = *((uvlong *)(p + BlkSize - sizeof(uvlong))); + brelease(hlist); + hlist = next; + } + } + brelease(i + 1); + } +} --- /sys/src/cmd/θfs/fs.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/fs.c Thu Feb 20 02:17:40 2014 @@ -0,0 +1,1207 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +typedef struct Fdref Fdref; +typedef struct Fidaux Fidaux; +typedef struct Lmsg Lmsg; +typedef struct Srvaux Srvaux; +typedef struct Uglymap Uglymap; + +struct Fdref { + Ref ref; + int fd; +}; + +struct Fidaux { + char *path; + char *uname; + uvlong lsearch; + int dirindex; + Fdref *store; +}; + +struct Lmsg { + int data; + char *rsys; +}; + +struct Srvaux { + Ioproc *io9p; +}; + +struct Uglymap { + Srv *s; + uchar *rbuf; + Uglymap *next; +}; + +static void θattach(Req *); +static void θcreate(Req *); +static void θdestroyfid(Fid *); +static void θend(Srv *); +static void θflush(Req *); +static void θopen(Req *); +static void θread(Req *); +static void θremove(Req *); +static void θstat(Req *); +static void θwalk(Req *); +static void θwstat(Req *); +static void θwrite(Req *); +static void mylistenproc(void *); +static void srvstarter(void *); + +Srv θsrv = { + .attach = θattach, + .auth = auth9p, + .open = θopen, + .create = θcreate, + .read = θread, + .write = θwrite, + .remove = θremove, + .flush = θflush, + .stat = θstat, + .wstat = θwstat, + .walk = θwalk, + .destroyfid = θdestroyfid, + .start = θstart, + .end = θend, +}; + +static char *dev; +static char *laddr; +static Uglymap *uhd; +static Channel *lchan; + +char *ddir, *dname; +uvlong starttime; +int doatimes; +int shutdown; +int mainstacksize = 16384; + +static void +usage(void) +{ + fprint(2, "Usage: %s [-anrsACD] [-m nblk] [-p port] device\n", argv0); + threadexits("usage"); +} + +void +threadmain(int argc, char *argv[]) +{ + Lmsg lmsg; + char *lstr, *p; + int doream, postcons, port, poststdin; + int doaoe, donfs, maxcache; + + doream = 0; + postcons = 0; + poststdin = 0; + doaoe = 1; + donfs = 1; + maxcache = 4000; + port = 564; + ARGBEGIN { + case 'a': + doaoe = 0; + break; + case 'm': + maxcache = atoi(EARGF(usage())); + break; + case 'n': + donfs = 0; + break; + case 'p': + port = atoi(EARGF(usage())); + break; + case 'r': + doream = 1; + break; + case 's': + poststdin = 1; + break; + case 'A': + doatimes = 1; + break; + case 'C': + postcons = 1; + break; + case 'D': + ++chatty9p; + break; + default: + usage(); + } ARGEND + if(argc != 1) + usage(); + dev = *argv; + lstr = smprint("tcp!*!%d", port); + starttime = nsec(); + p = strrchr(dev, '/'); + if(p == nil) { + ddir = "."; + dname = strdup(dev); + } + else { + ddir = mallocz(p - dev + 1, 1); + strncpy(ddir, dev, p - dev); + dname = strdup(p+1); + } + initcache(dev, maxcache); + if(doream) + ream(dev); + else + loadsuper(); + inituid(); + if(doaoe) + initaoe(); + if(donfs) + initnfs(); + lchan = chancreate(sizeof(Lmsg), 4); + laddr = lstr; + threadcreate(srvstarter, nil, 8192); + if(poststdin) { + lmsg.data = 1; + lmsg.rsys = estrdup9p("boot"); + send(lchan, &lmsg); + postfd("θfs", 0); + } + /* + * Because the main in libthread runs the thread scheduler in + * the initial process, we can't daemonize in the usual way. + * The backgrounding is no big deal, but we want the parent + * to be able to wait until we're ready for an attach. So we + * don't do the console until almost the end and the parent + * can wait until θfsctl appears in /srv. It's not as elegant as + * letting the wait synchronize, but it's better than an arbitrary + * sleep. + */ + initcons(postcons); + proccreate(mylistenproc, nil, 8192); +} + +void +halt9p(void) +{ + Srvaux *sa; + Uglymap *u; + +/* chanclose(lchan); */ + for(u = uhd; u; u = u->next) { + close(u->s->infd); + close(u->s->outfd); + sa = u->s->aux; + closeioproc(sa->io9p); + } +} + +static void +mysrvproc(void *a) +{ + Srv *s; + int data; + + s = a; + data = s->infd; + srv(s); + close(data); + threadexits(nil); +} + +static void +srvstarter(void *) +{ + Lmsg m; + Srv *s; + + while(recv(lchan, &m)) { + if(shutdown) + break; + s = emalloc9p(sizeof(Srv)); + *s = θsrv; + s->addr = m.rsys; + s->infd = s->outfd = m.data; + s->fpool = nil; + s->rpool = nil; + s->rbuf = nil; + s->wbuf = nil; + threadcreate(mysrvproc, s, 32 * 1024); + } + threadexits(nil); +} + +static char* +getremotesys(char *ndir) +{ + char buf[128], *serv, *sys; + int fd, n; + + snprint(buf, sizeof buf, "%s/remote", ndir); + sys = nil; + fd = open(buf, OREAD); + if(fd >= 0) { + n = read(fd, buf, sizeof(buf)-1); + if(n>0) { + buf[n-1] = 0; + serv = strchr(buf, '!'); + if(serv) + *serv = 0; + sys = estrdup9p(buf); + } + close(fd); + } + if(sys == nil) + sys = estrdup9p("unknown"); + return sys; +} + +static void +mylistenproc(void *) +{ + Lmsg m; + char ndir[NETPATHLEN], dir[NETPATHLEN]; + int ctl, data, nctl; + + ctl = announce(laddr, dir); + if(ctl < 0) { + fprint(2, "%s: announce %s: %r", argv0, laddr); + return; + } + + for(;;){ + nctl = listen(dir, ndir); + if(nctl < 0){ + fprint(2, "%s: listen %s: %r", argv0, laddr); + break; + } + + data = accept(ctl, ndir); + if(data < 0){ + fprint(2, "%s: accept %s: %r\n", argv0, ndir); + continue; + } + m.data = data; + m.rsys = getremotesys(ndir); + send(lchan, &m); + } +} + +int +read9pmsg(int fd, void *abuf, uint n) +{ + Srvaux *sa; + Uglymap *um; + Ioproc *io9p; + int m, len; + uchar *buf; + + buf = abuf; + + /* + * Grotesque, but this is research :) + */ + for(um = uhd; um && um->rbuf != buf; um = um->next) ; + if(um == nil) { + fprint(2, "no ugly mapping"); + return 0; + } + sa = um->s->aux; + io9p = sa->io9p; + + /* read count */ + m = ioreadn(io9p, fd, buf, BIT32SZ); + if(m != BIT32SZ){ + if(m < 0) + return -1; + return 0; + } + + len = GBIT32(buf); + if(len <= BIT32SZ || len > n){ + werrstr("bad length in 9P2000 message header"); + return -1; + } + len -= BIT32SZ; + m = ioreadn(io9p, fd, buf+BIT32SZ, len); + if(m < len) + return 0; + return BIT32SZ+m; +} + +static int +θhasperm(int fd, uvlong meta, char *uid, int p) +{ + uvlong mode; + char *fuser, *fgroup; + int m; + + if(allow) + return 1; + if(getmetaint(fd, meta, "mode", &mode) == MTnone) + return 1; + m = mode & 7; /* other */ + if((p & m) == p) + return 1; + + if((fuser = getmetastr(fd, meta, "uid")) != nil) { + if(strcmp(fuser, uid) == 0) { + m |= (mode>>6) & 7; + if((p & m) == p) { + free(fuser); + return 1; + } + } + free(fuser); + } + + if((fgroup = getmetastr(fd, meta, "gid")) != nil) { + if(ingroup(uid, fgroup)) { + m |= (mode>>3) & 7; + if((p & m) == p) { + free(fgroup); + return 1; + } + } + free(fgroup); + } + return 0; +} + + +static void +attacher(void *a) +{ + Req *r; + Fidaux *fa; + char *path; + uvlong rmeta, x; + + r = a; + if(r->ifcall.aname == nil || strlen(r->ifcall.aname) == 0) + path = smprint("/"); + else + path = smprint("/%s", r->ifcall.aname); + rmeta = q2m(-1, p2q(-1, path, 0), 0); + if(rmeta == 0) + respond(r, "no root"); + else { + getmetaint(-1, rmeta, "qpath", &x); + r->fid->qid.path = x; + getmetaint(-1, rmeta, "qvers", &x); + r->fid->qid.vers = x; + getmetaint(-1, rmeta, "qtype", &x); + r->fid->qid.type = x; + r->ofcall.qid = r->fid->qid; + fa = malloc(sizeof(Fidaux)); + r->fid->aux = fa; + fa->path = path; + fa->uname = estrdup9p(r->ifcall.uname); + fa->lsearch = 0; + fa->store = θmalloc(sizeof(Fdref)); + incref(&fa->store->ref); + fa->store->fd = -1; + respond(r, nil); + } + threadexits(nil); +} + +static void +θattach(Req *r) +{ + if(authattach(r) < 0) + return; + threadcreate(attacher, r, 8192); +} + +static void +_θcreate(void *a) +{ + Req *r; + Qid nqid; + Fidaux *fa; + char *npath; + uvlong x; +// uvlong meta, pmeta, dirblk, now; + uvlong meta, pmeta, now; + + r = a; + fa = r->fid->aux; + pmeta = q2m(-1, r->fid->qid.path, 0); + if(θhasperm(fa->store->fd, pmeta, fa->uname, AWRITE) == 0) { + respond(r, "permission denied"); + threadexits(nil); + } + npath = smprint("%s/%s", fa->path, r->ifcall.name); + nqid.path = p2q(-1, npath, 1); + meta = q2m(-1, nqid.path, 1); + if(meta == 0) { + respond(r, "create failure"); + free(npath); + threadexits(nil); + } + setmetastr(meta, "name", nil, r->ifcall.name, 0); + setmetaint(meta, "parent", nil, r->fid->qid.path); + nqid.vers = 0; + nqid.type = 0; + if(r->ifcall.perm & DMDIR) + nqid.type |= QTDIR; + if(r->ifcall.perm & DMAPPEND) + nqid.type |= QTAPPEND; + if(r->ifcall.perm & DMEXCL) + nqid.type |= QTEXCL; + if(r->ifcall.perm & DMTMP) + nqid.type |= QTTMP; + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + setmetaint(meta, "mode", nil, r->ifcall.perm); + now = nsec(); + setmetaint(meta, "atime", nil, now); + setmetaint(meta, "mtime", nil, now); + setmetaint(meta, "length", nil, 0); + setmetastr(meta, "uid", nil, fa->uname, 0); + setmetastr(meta, "gid", nil, fa->uname, 0); + setmetastr(meta, "muid", nil, fa->uname, 0); + if(getmetaint(-1, pmeta, "child", &x) == MTint) + setmetaint(meta, "sib", nil, x); + else + setmetaint(meta, "sib", nil, 0); + if(r->ifcall.perm & DMDIR) + setmetaint(meta, "child", nil, 0); + else + setmetaint(meta, "dblock", nil, 0); + setmetaint(pmeta, "child", nil, nqid.path); + if(getmetaint(-1, pmeta, "qvers", &x) != MTnone) + setmetaint(pmeta, "qvers", nil, x+1); + setmetaint(pmeta, "mtime", nil, now); + setmetastr(pmeta, "muid", nil, fa->uname, 0); + setqhash(nqid.path, meta); + free(fa->path); + fa->path = npath; + fa->lsearch = 0; + r->fid->qid = nqid; + r->ofcall.qid = nqid; + respond(r, nil); + savesuper(); + threadexits(nil); +} + +static void +θcreate(Req *r) +{ + threadcreate(_θcreate, r, 8192); +} + +static void +θdestroyfid(Fid *fid) +{ + Fidaux *fa; + uvlong meta; + + if(fid->qid.type & QTAUTH) { + authdestroy(fid); + return; + } + fa = fid->aux; + if(fid->omode != -1 && (fid->omode & ORCLOSE)) { + meta = q2m(fa->store->fd, fid->qid.path, 0); + if(meta != 0) { + freedata(meta); + rmdlist(meta, fid->qid.path); + rmq(fid->qid.path, meta); + rmmlist(meta); + if(fa) + rmp(fa->path); + } + } + if(fa == nil) + return; + if(fa->store && decref(&fa->store->ref) == 0) { + if(fa->store->fd != -1) + close(fa->store->fd); + free(fa->store); + } + free(fa->path); + free(fa->uname); + free(fa); +} + +static void +θend(Srv *s) +{ + Srvaux *sa; + Uglymap *um, *u; + + resetmeta(); + csync(); + sa = s->aux; + if(sa) { + if(sa->io9p) + closeioproc(sa->io9p); + free(sa); + } + if(uhd == nil) + return; + if(uhd->s == s) { + um = uhd; + uhd = um->next; + free(um); + return; + } + for(um = uhd; um && um->next && um->next->s != s; um = um->next) ; + if(um && um->next) { + u = um->next; + um->next = u->next; + free(u); + } +} + +static void +θflush(Req *r) +{ + respond(r, nil); +} + +static void +_θopen(void *a) +{ + Fidaux *fa; + Req *r; + Fid *fid; + uvlong meta, x; + ulong need; + + r = a; + fid = r->fid; + fa = fid->aux; + meta = q2m(fa->store->fd, fid->qid.path, 0); + if(meta == 0) { + respond(r, "no file"); + threadexits(nil); + } + switch(r->ifcall.mode & 3) { + case OREAD: + need = AREAD; + break; + case OWRITE: + need = AWRITE; + break; + case ORDWR: + need = AREAD | AWRITE; + break; + case OEXEC: + need = AEXEC; + break; + default: + need = AREAD | AWRITE | AEXEC; + break; + } + if(r->ifcall.mode & OTRUNC) + need |= AWRITE; + if(θhasperm(fa->store->fd, meta, fa->uname, need) == 0) { + respond(r, "permission denied"); + threadexits(nil); + } + if(r->ifcall.mode & ORCLOSE) { + /* check write permission on parent */ + } + if(r->ifcall.mode & OTRUNC) { + setmetaint(meta, "length", nil, 0LL); + if(getmetaint(fa->store->fd, meta, "qvers", &x) != MTnone) + setmetaint(meta, "qvers", nil, x+1); + } + respond(r, nil); + threadexits(nil); +} + +static void +θopen(Req *r) +{ + threadcreate(_θopen, r, 8192); +} + +static int +lzstat(int fd, uvlong meta, Dir *d) +{ + uvlong x; + + memset(&d->qid, 0, sizeof(Qid)); + if(getmetaint(fd, meta, "qpath", &x) != MTnone) + d->qid.path = x; + if(getmetaint(fd, meta, "qvers", &x) != MTnone) + d->qid.vers = x; + if(getmetaint(fd, meta, "qtype", &x) != MTnone) + d->qid.type = x; + if(getmetaint(fd, meta, "mode", &x) != MTnone) + d->mode = x; + else + d->mode = 0; + if(getmetaint(fd, meta, "atime", &x) != MTnone) + d->atime = x / 1000000000; + else + d->atime = 0; + if(getmetaint(fd, meta, "mtime", &x) != MTnone) + d->mtime = x / 1000000000; + else + d->mtime = 0; + if(getmetaint(fd, meta, "length", &x) != MTnone) + d->length = x; + else + d->length = 0; + if((d->name = getmetastr(fd, meta, "name")) == nil) { + fprint(2, "where the streets have no name\n"); + d->name = estrdup9p(""); + } + /* If this is one of the roots, just call it '/' */ + if(d->name[0] == '/') + d->name[1] = 0; + if((d->uid = getmetastr(fd, meta,"uid")) == nil) + d->uid = estrdup9p("none"); + if((d->gid = getmetastr(fd, meta, "gid")) == nil) + d->gid = estrdup9p("none"); + if((d->muid = getmetastr(fd, meta, "muid")) == nil) + d->muid = estrdup9p("none"); + return 0; +} + +static int +θgen(int n, Dir *dir, void *a) +{ + Fidaux *fa; + Fid *fid; + uvlong meta, x; + int i; + + fid = a; + fa = fid->aux; + if(n == fa->dirindex + 1 && fa->lsearch != 0) { + if(getmetaint(fa->store->fd, fa->lsearch, "sib", &x) == MTint) + meta = q2m(fa->store->fd, x, 0); + else { + meta = 0; + fprint(2, "no sibling in mblock %ulld\n", fa->lsearch); + } + } + else { + meta = q2m(fa->store->fd, fid->qid.path, 0); + if(meta == 0) + return -1; + if(getmetaint(fa->store->fd, meta, "child", &x) != MTint) + return -1; + meta = q2m(fa->store->fd, x, 0); + for(i = 0; i < n && meta != 0; ++i) { + getmetaint(fa->store->fd, meta, "sib", &x); + meta = q2m(fa->store->fd, x, 0); + } + } + fa->dirindex = n; + fa->lsearch = meta; + if(meta == 0) + return -1; + i = lzstat(fa->store->fd, meta, dir); + return i; +} + +static void +_θread(void *a) +{ + Fidaux *fa; + Req *r; + ulong tot; + + r = a; + fa = r->fid->aux; + fa->lsearch = 0; + fa->dirindex = 0; + if(r->fid->qid.type & QTDIR) { + dirread9p(r, θgen, r->fid); + respond(r, nil); + threadexits(nil); + } + tot = θpread(fa->store->fd, r->fid->qid.path, r->ofcall.data, r->ifcall.count, r->ifcall.offset); + if(tot == -1) { + respond(r, "no metadata"); + threadexits(nil); + } + r->ofcall.count = tot; + respond(r, nil); + threadexits(nil); +} + +static void +θauthread(void *a) +{ + Req *r; + + r = a; + authread(r); + threadexits(nil); +} + +static void +θread(Req *r) +{ + if(r->fid->qid.type & QTAUTH) { + proccreate(θauthread, r, 8192); + return; + } + threadcreate(_θread, r, 8192); +} + +static void +_θremove(void *a) +{ + static QLock rlock; + Req *r; + Fidaux *fa; + uvlong meta, pmeta, qpath, now; + + /* + * This lock is ugly. Its purpose is to serialize the removes so + * that we don't end up in the process of removing the same + * file more than once concurrently. It comes up when doing + * a mk clean on the kernel. I'm going to give some thought + * to better ways to handle this, but this should get around + * the issue for now. + */ + qlock(&rlock); + r = a; + fa = r->fid->aux; + meta = q2m(-1, r->fid->qid.path, 0); + if(meta == 0) { + qunlock(&rlock); + respond(r, nil); + threadexits(nil); + } + pmeta = 0; + /* check parent permission */ + if(getmetaint(-1, meta, "parent", &qpath) != MTnone && qpath != 0) { + pmeta = q2m(-1, qpath, 0); + if(pmeta != 0) { + if(θhasperm(fa->store->fd, pmeta, fa->uname, AWRITE) == 0) { + qunlock(&rlock); + respond(r, "permission denied"); + threadexits(nil); + } + } + } + if(r->fid->qid.type & QTDIR) { + if(getmetaint(-1, meta, "child", &qpath) != MTnone && qpath != 0) { + qunlock(&rlock); + respond(r, "not empty"); + threadexits(nil); + } + } + now = nsec(); + rmq(r->fid->qid.path, meta); + setmetaint(pmeta, "mtime", nil, now); + setmetastr(pmeta, "muid", nil, fa->uname, 0); + freedata(meta); + rmdlist(meta, r->fid->qid.path); + rmmlist(meta); + rmp(fa->path); + qunlock(&rlock); + respond(r, nil); + threadexits(nil); +} + +static void +θremove(Req *r) +{ + threadcreate(_θremove, r, 8192); +} + +void +θstart(Srv *s) +{ + Srvaux *sa; + Uglymap *um; + + sa = malloc(sizeof(Srvaux)); + sa->io9p = ioproc(); + s->aux = sa; + um = malloc(sizeof(Uglymap)); + um->s = s; + um->rbuf = s->rbuf; + um->next = uhd; + uhd = um; +} + +static void +_θstat(void *a) +{ + Req *r; + Fidaux *fa; + uvlong meta; + int n; + + r = a; + fa = r->fid->aux; + meta = q2m(fa->store->fd, r->fid->qid.path, 0); + if(meta == 0) + respond(r, "no file"); + else { + n = lzstat(fa->store->fd, meta, &r->d); + if(n == 0) + respond(r, nil); + else + respond(r, "errnt"); + } + threadexits(nil); +} + +static void +θstat(Req *r) +{ + threadcreate(_θstat, r, 8192); +} + +static char * +θwalk1(Fid *fid, char *name, void *) +{ + Fidaux *fa; + char *npath, *sname, *spath; + uvlong meta, x; + int fd; + + fa = (Fidaux *)(fid->aux); + npath = smprint("%s/%s", fa->path, name); + meta = q2m(fa->store->fd, p2q(fa->store->fd, npath, 0), 0); + if(meta == 0) + return "does not exit"; + sname = getmetastr(fa->store->fd, meta, "snap"); + if(sname == nil) { + free(fa->path); + fa->path = npath; + } + else { + free(npath); + spath = smprint("%s/%s", ddir, sname); + free(sname); + fd = open(spath, OREAD); + if(fd < 0) + return "snap open"; + free(fa->path); + fa->path = estrdup9p("/"); + if(decref(&fa->store->ref) == 0) { + if(fa->store->fd != -1) + close(fa->store->fd); + free(fa->store); + } + fa->store = θmalloc(sizeof(Fdref)); + incref(&fa->store->ref); + fa->store->fd = fd; + meta = q2m(fa->store->fd, p2q(fa->store->fd, "/", 0), 0); + if(meta == 0) + return "no root"; + } + if(getmetaint(fa->store->fd, meta, "qpath", &x) != MTint) + return "no qid"; + fid->qid.path = x; + getmetaint(fa->store->fd, meta, "qvers", &x); + fid->qid.vers = x; + getmetaint(fa->store->fd, meta, "qtype", &x); + fid->qid.type = x; + return nil; +} + +static char * +θclone(Fid *oldfid, Fid *newfid, void *) +{ + Fidaux *ofa, *nfa; + + ofa = (Fidaux *)(oldfid->aux); + nfa = newfid->aux = θmalloc(sizeof(Fidaux)); + *nfa = *ofa; + nfa->path = estrdup9p(ofa->path); + nfa->uname = estrdup9p(ofa->uname); + incref(&nfa->store->ref); + return nil; +} + +static void +_θwalk(void *a) +{ + Req *r; + Fdref *store; + Fidaux *fa; + char *npath, *p, *e; + uvlong qp, meta, x; + int nlen; + int i, fd; + + r = a; + fa = r->fid->aux; + store = fa->store; + fd = store->fd; + if(r->ifcall.nwname == 1 && strcmp(r->ifcall.wname[0], "..") == 0) { + npath = estrdup9p(fa->path); + p = strrchr(npath, '/'); + if(p && p != npath) + *p = 0; + } + else { + nlen = strlen(fa->path); + for(i = 0; i < r->ifcall.nwname; ++i) + nlen += strlen(r->ifcall.wname[i]) + 1; + npath = θmalloc(nlen + 1); + p = npath; + e = npath + nlen + 1; + p = seprint(p, e, "%s", fa->path); + for(i = 0; i < r->ifcall.nwname; ++i) + p = seprint(p, e, "/%s", r->ifcall.wname[i]); + } + /* + * If we can get there directly, do it, otherwise, fall + * back to the one step at a time using walkandclone + */ + meta = q2m(fd, p2q(fd, npath, 0), 0); + if(meta == 0) { + walkandclone(r, θwalk1, θclone, nil); + free(npath); + threadexits(nil); + } + if(p = getmetastr(fd, meta, "snap")) { + free(p); + walkandclone(r, θwalk1, θclone, nil); + free(npath); + threadexits(nil); + } + fa = r->newfid->aux; + if(r->fid == r->newfid) + free(fa->path); + else { + fa = r->newfid->aux = θmalloc(sizeof(Fidaux)); + fa->uname = estrdup9p(((Fidaux *)(r->fid->aux))->uname); + fa->store = store; + incref(&store->ref); + } + fa->path = npath; + if(r->ifcall.nwname == 0) { + respond(r, nil); + threadexits(nil); + } + r->ofcall.nwqid = r->ifcall.nwname; + for(i = r->ifcall.nwname - 1; i >= 0; --i) { + if(getmetaint(fd, meta, "qpath", &x) == MTnone) { + respond(r, "errnt"); + threadexits(nil); + } + r->ofcall.wqid[i].path = x; + getmetaint(fd, meta, "qvers", &x); + r->ofcall.wqid[i].vers = x; + getmetaint(fd, meta, "qtype", &x); + r->ofcall.wqid[i].type = x; + getmetaint(fd, meta, "parent", &qp); + meta = q2m(fd, qp, 0); + } + respond(r, nil); + threadexits(nil); +} + +static void +θwalk(Req *r) +{ + threadcreate(_θwalk, r, 8192); +} + +static void +_θwrite(void *a) +{ + Req *r; + ulong tot; + + r = a; + if(r->fid->qid.type & QTAPPEND) + tot = θpwrite(r->fid->qid.path, r->ifcall.data, r->ifcall.count, 0, 2); + else + tot = θpwrite(r->fid->qid.path, r->ifcall.data, r->ifcall.count, r->ifcall.offset, 1); + if(tot == -1) { + respond(r, "no metadata"); + threadexits(nil); + } + r->ofcall.count = tot; + respond(r, nil); + threadexits(nil); +} + +static void +θauthwrite(void *a) +{ + Req *r; + + r = a; + authwrite(r); + threadexits(nil); +} + +static void +θwrite(Req *r) +{ + if(r->fid->qid.type & QTAUTH) { + proccreate(θauthwrite, r, 8192); + return; + } + threadcreate(_θwrite, r, 8192); +} + +static void +_θwstat(void *a) +{ + Req *r; + Fidaux *fa; + Qid nqid; + char *p, *gid, *uid, *newpath; + uvlong meta, pmeta, x, pqpath; + + r = a; + fa = r->fid->aux; + meta = q2m(-1, r->fid->qid.path, 0); + if(meta == 0) { + respond(r, "no metadata"); + threadexits(nil); + } + p = strrchr(fa->path, '/'); + if(p && fa->path) + newpath = smprint("%.*s/%s", (int)(p - fa->path), fa->path, r->d.name); + else + newpath = estrdup9p(r->d.name); + + if(allow) + goto skipperm; + uid = getmetastr(-1, meta, "uid"); + gid = getmetastr(-1, meta, "gid"); + + /* Becuase wstat is defined to be all or none, first check all the permissions */ + if(strlen(r->d.name) > 0) { + if(getmetaint(-1, meta, "parent", &pqpath) != MTnone && pqpath != 0) { + pmeta = q2m(-1, pqpath, 0); + if(pmeta != 0) { + if(θhasperm(-1, pmeta, fa->uname, AWRITE) == 0) { + free(newpath); + free(gid); + free(uid); + respond(r, "permission denied"); + threadexits(nil); + } + } + } + if(q2m(-1, p2q(-1, newpath, 0), 0) != 0) { + free(gid); + free(uid); + respond(r, "file extists"); + threadexits(nil); + } + + } + if(r->d.length != 0xffffffffffffffffLL) { + if((r->fid->qid.type & QTDIR) && r->d.length != 0) { + free(newpath); + free(gid); + free(uid); + respond(r, "non-zero size on directory"); + threadexits(nil); + } + if(θhasperm(-1, meta, fa->uname, AWRITE) == 0) { + free(newpath); + free(gid); + free(uid); + respond(r, "permission denied"); + threadexits(nil); + } + } + if(r->d.mode != 0xffffffff || r->d.mtime != 0xffffffff) { + if(!(strcmp(fa->uname, uid) == 0 || isleader(fa->uname, gid))) { + free(gid); + free(uid); + free(newpath); + respond(r, "not owner"); + threadexits(nil); + } + } + if(strlen(r->d.gid) > 0) { + if(!(strcmp(fa->uname, uid) == 0 && ingroup(fa->uname, gid) || isleader(fa->uname, gid))) { + free(gid); + free(newpath); + respond(r, "not owner"); + threadexits(nil); + } + } + free(gid); + free(uid); + +skipperm: + /* Now the we know we have permission, make all the changes */ + if(r->d.mode != 0xffffffff) { + getmetaint(-1, meta, "qpath", &x); + nqid.path = x; + getmetaint(-1, meta, "qvers", &x); + nqid.vers = x; + getmetaint(-1, meta, "qtype", &x); + nqid.type = x; + x = nqid.type & QTDIR; + if(r->d.mode & DMAPPEND) + x |= QTAPPEND; + if(r->d.mode & DMEXCL) + x |= QTEXCL; + if(r->d.mode & DMTMP) + x |= QTTMP; + if(x != nqid.type) + setmetaint(meta, "qtype", nil, x); + setmetaint(meta, "mode", nil, r->d.mode); + if(getmetaint(-1, meta, "unixmode", &x) != MTnone) + setmetaint(meta, "unixmode", nil, x & ~0777 | r->d.mode & 0777); + } + if(r->d.mtime != 0xffffffff) + setmetaint(meta, "mtime", nil, r->d.mtime * 1000000000LL); + if(r->d.length != 0xffffffffffffffffLL) + setmetaint(meta, "length", nil, r->d.length); + if(strlen(r->d.name) > 0) { + setmetastr(meta, "name", nil, r->d.name, 0); + rehashpath(r->fid->qid.path, fa->path, newpath); + free(fa->path); + fa->path = newpath; + } + if(allow && strlen(r->d.uid) > 0) + setmetastr(meta, "uid", nil, r->d.uid, 0); + if(strlen(r->d.gid) > 0) + setmetastr(meta, "gid", nil, r->d.gid, 0); + respond(r, nil); + threadexits(nil); +} + +static void +θwstat(Req *r) +{ + threadcreate(_θwstat, r, 8192); +} --- /sys/src/cmd/θfs/dat.h Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/dat.h Thu Feb 20 02:17:41 2014 @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +enum { +// BlkSize = 65536, + BlkSize = 32768, + NPerBlk = BlkSize / sizeof(uvlong), + + BlobQuan = 64, /* must be a power of 2 */ + + TFile = 0, + TObject, + TLun, + + Magicθ = 0x4207abcddcba0742LL, + + FSClean = 1, + + MTnone = 0, /* must be 0 */ + MTint = 1, + MTistring = 2, + MTstring = 3, + MTblob = 4, +}; + +typedef struct Blob Blob; +typedef struct GMeta GMeta; +typedef union MVal MVal; +typedef struct PQMap PQMap; +typedef struct Super Super; + +#pragma pack on +struct Blob { + short len; + union { + uvlong next; + char data[1]; + }; +}; + +union MVal { + uvlong val; + char str[8]; +}; + +struct GMeta { + uvlong next; + uchar type; + char name[15]; + MVal m; +}; + +struct PQMap { + uvlong qpath; + ushort plen; + char pname[1]; +}; +#pragma pack off + +struct Super { + uvlong magic; + uvlong version; + uvlong qgen; + uvlong nblk; + uvlong nfreemap; + uvlong freemap; + uvlong state; + uvlong firstdat; + uvlong nfree; + uvlong firstlun; + uvlong nmeta; + uvlong firstmeta; + uvlong ffmeta; + uvlong nblob; + uvlong firstblob; + uvlong ffblob; + uvlong lfblob; + uvlong nht; + uvlong nhashblk; + uvlong snaptime; +}; + +/* aoe.c */ +extern void haltaoe(void); +extern void initaoe(void); +extern void rmtarget(int, int); +extern void starttarget(int, int, uvlong); + +/* cache.c */ +extern int brelease(uvlong); +extern void *cbclean(uvlong); +extern void *cbread(uvlong); +extern void cbwrite(uvlong); +extern int ccanfree(uvlong); +extern int cread(void *, int, uvlong); +extern void csync(void); +extern int cwrite(void *, int, uvlong); +extern void haltcache(void); +extern void initcache(char *, int); +extern char *prcstat(void); +extern void resetcache(void); + +/* cons.c */ +extern void docons(void *); +extern void initcons(int); + +extern int allow; +extern int rootallow; + +/* free.c */ +extern uvlong allocblock(void); +extern void freeblock(uvlong); +extern void haltfree(void); +extern void initfree(void); + +/* fs.c */ +extern void halt9p(void); +extern void θstart(Srv *); + +extern char *ddir, *dname; +extern uvlong starttime; +extern int doatimes; +extern int shutdown; + +/* hash.c */ +extern void fixpaths(int); +extern uvlong p2q(int, char *, int); +extern char *prhstat(void); +extern uvlong q2m(int, uvlong, int); +extern void rehashpath(uvlong, char *, char *); +extern void rmp(char *); +extern void rmq(uvlong, uvlong); +extern void setqhash(uvlong, uvlong); +extern void showphash(int, char *); + +/* meta.c */ +extern void blockuse(int, uvlong); +extern void checkalloc(int); +extern void fixfamilies(int); +extern void freedata(uvlong); +extern void *getblob(int, uvlong, int *); +extern int getmeta(int, uvlong, char *, MVal *); +extern int getmetaint(int, uvlong, char *, uvlong *); +extern char *getmetastr(int, uvlong, char *); +extern uvlong locate(int, uvlong, uvlong, int); +extern void mpred(int, uvlong); +extern void mprint(int, uvlong); +extern void prmeta(int, uvlong); +extern char *prmstat(void); +extern void reammeta(int); +extern void recovermeta(int); +extern void resetmeta(void); +extern uvlong rmmeta(uvlong, uvlong); +extern void rmmlist(uvlong); +extern uvlong setblob(void *, int, uvlong); +extern uvlong setmeta(uvlong, char *, char *, uvlong, int); +extern uvlong setmetaint(uvlong, char *, char *, uvlong); +extern uvlong setmetablob(uvlong, char *, char *, uchar *, int, uvlong); +extern uvlong setmetastr(uvlong, char *, char *, char *, uvlong); +extern void setmstruct(uvlong, uvlong, char *, int, uvlong); + +/* nfs.c */ +extern void haltnfs(void); +extern void initnfs(void); + +extern int debugnfs; + +/* super.c */ +extern void loadsuper(void); +extern char *prsuper(void); +extern void ream(char *); +extern void savesuper(void); + +extern Super super; + +/* uid.c */ +extern char *id2gname(char *, int); +extern char *id2uname(char *, int); +extern int ingroup(char *, char *); +extern void inituid(void); +extern int isleader(char *, char *); + +/* util.c */ +extern void *θmalloc(ulong); +extern long θpread(int, uvlong, void *, long, uvlong); +extern long θpwrite(uvlong, void *, long, uvlong, int); +extern void rmdlist(uvlong, uvlong); +extern long spread(int, void *, long, uvlong); + +/* platform specific */ +extern void conspost(int [], int []); +extern uvlong devsize(char *); --- /sys/src/cmd/θfs/cons.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/cons.c Thu Feb 20 02:17:42 2014 @@ -0,0 +1,789 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include +#include "dat.h" + +enum { + CMallow, + CMblockuse, + CMcheckalloc, + CMcstat, + CMdisallow, + CMfixfamilies, + CMfixpaths, + CMhalt, + CMhelp, + CMhstat, + CMlcreate, + CMlls, + CMlmeta, + CMlrm, + CMmpred, + CMmprint, + CMmstat, + CMnewroot, + CMnfsdebug, + CMp2q, + CMp9debug, + CMphash, + CMpmeta, + CMq2m, + CMqmeta, + CMrecovermeta, + CMrevert, + CMrmp, + CMrootallow, + CMrootdisallow, + CMsetmeta, + CMsetmstruct, + CMsetqhash, + CMsnap, + CMsuper, + CMsync, +}; + +enum { + SecPerDay = 24 * 60 * 60, +}; + +static void θconsread(Req *); +static void θconswrite(Req *); + +static Srv θconssrv = { + .start = θstart, + .read = θconsread, + .write = θconswrite, +}; + +static int snapid; +static Channel *snaptrigger; +static Ioproc *consio; +static int pfd[2]; +static Cmdtab ctab[] = { + {CMallow, "allow", 1}, + {CMblockuse, "blockuse", 2}, + {CMcheckalloc, "checkalloc", 1}, + {CMcstat, "cstat", 1}, + {CMdisallow, "disallow", 1}, + {CMfixfamilies, "fixfamilies", 1}, + {CMfixpaths, "fixpaths", 1}, + {CMhalt, "halt", 1}, + {CMhelp, "help", 1}, + {CMhstat, "hstat", 1}, + {CMlcreate, "lcreate", 3}, + {CMlls, "lls", 1}, + {CMlmeta, "lmeta", 2}, + {CMlrm, "lrm", 2}, + {CMmpred, "mpred", 2}, + {CMmprint, "mprint", 2}, + {CMmstat, "mstat", 1}, + {CMnewroot, "newroot", 2}, + {CMnfsdebug, "nfsdebug", 0}, + {CMp2q, "p2q", 2}, + {CMp9debug, "p9debug", 2}, + {CMphash, "phash", 2}, + {CMpmeta, "pmeta", 2}, + {CMq2m, "q2m", 2}, + {CMqmeta, "qmeta", 2}, + {CMrecovermeta, "recovermeta", 1}, + {CMrevert, "revert", 2}, + {CMrmp, "rmp", 2}, + {CMrootallow, "rootallow", 1}, + {CMrootdisallow, "rootdisallow", 1}, + {CMsetmeta, "setmeta", 5}, + {CMsetmstruct, "setmstruct", 6}, + {CMsetqhash, "setqhash", 3}, + {CMsnap, "snap", 1}, + {CMsuper, "super", 1}, + {CMsync, "sync", 1}, +}; + +extern int chatty9p; + +int allow; +int rootallow; + +static void +showhelp(void) +{ + int i; + + for(i = 0; i < nelem(ctab); ++i) + fprint(pfd[1], "%-15s %d\n", ctab[i].cmd, ctab[i].narg); +} + +static void +lcreate(char *aoeid, uvlong size) +{ + Qid nqid; + uvlong meta, dirblk, now, nblk, pperb; + int sperb; + int aoemajor, aoeminor; + + sperb = BlkSize / 512; + pperb = BlkSize / 8; + nblk = (size + sperb - 1) / sperb; + if(nblk + 3 >= super.nfree) { + fprint(pfd[1], "Not enough space\n"); + return; + } + sscanf(aoeid, "%d.%d", &aoemajor, &aoeminor); + nqid.path = ((uvlong)TLun << 60) | (aoemajor << 8) | aoeminor; + nqid.vers = 0; + nqid.type = QTFILE; + meta = q2m(-1, nqid.path, 1); + if(meta == 0) { + fprint(pfd[1], "Creation failure\n"); + return; + } + setmetaint(meta, "aoemajor", nil, aoemajor); + setmetaint(meta, "aoeminor", nil, aoeminor); + setmetaint(meta, "qpath", nil, nqid.path); + setmetaint(meta, "qvers", nil, nqid.vers); + setmetaint(meta, "qtype", nil, nqid.type); + now = nsec(); + setmetaint(meta, "ctime", nil, now); + setmetaint(meta, "length", nil, size << 9); + dirblk = allocblock(); + if(dirblk != 0) { + cbclean(dirblk); + cbwrite(dirblk); + brelease(dirblk); + } + if(nblk <= pperb) + setmetaint(meta, "index", nil, dirblk); + else if(nblk <= pperb * pperb) + setmetaint(meta, "indirect", nil, dirblk); + else + setmetaint(meta, "dblindir", nil, dirblk); + setmetaint(meta, "nextlun", nil, super.firstlun); + setqhash(nqid.path, meta); + super.firstlun = nqid.path; + savesuper(); + starttarget(aoemajor, aoeminor, size); + resetmeta(); + csync(); + fprint(pfd[1], "Created %d.%d with qid %ulld\n", aoemajor, aoeminor, nqid.path); +} + +static char llsbuf[1024]; + +static char * +lls(void) +{ + char *p, *e; + uvlong x; + uvlong qpath, meta, length; + int aoemajor, aoeminor; + + p = llsbuf; + e = llsbuf + nelem(llsbuf); + p = seprint(p, e, "Luns:\n"); + for(qpath = super.firstlun; qpath; ) { + meta = q2m(-1, qpath, 0); + if(meta == 0) { + seprint(p, e, "no metadata for %ulld\n", qpath); + return llsbuf; + } + getmetaint(-1, meta, "aoemajor", &x); + aoemajor = x; + getmetaint(-1, meta, "aoeminor", &x); + aoeminor = x; + getmetaint(-1, meta, "length", &x); + length = x; + p = seprint(p, e, "%d.%d %ulld\n", aoemajor, aoeminor, length); + getmetaint(-1, meta, "nextlun", &qpath); + } + return llsbuf; +} + +static void +lmeta(char *aoeid) +{ + uvlong qpath; + int aoemajor, aoeminor; + + sscanf(aoeid, "%d.%d", &aoemajor, &aoeminor); + qpath = ((uvlong)TLun << 60) | (aoemajor << 8) | aoeminor; + fprint(pfd[1], "metadata for %d.%d:\n", aoemajor, aoeminor); + prmeta(pfd[1], qpath); +} + +static void +lrm(char *aoeid) +{ + uvlong qpath, meta, nextlun, qt, mt; + int aoemajor, aoeminor; + + sscanf(aoeid, "%d.%d", &aoemajor, &aoeminor); + qpath = ((uvlong)TLun << 60) | (aoemajor << 8) | aoeminor; + meta = q2m(-1, qpath, 0); + if(meta == 0) { + fprint(pfd[1], "Not found\n"); + return; + } + freedata(meta); + getmetaint(-1, meta, "nextlun", &nextlun); + if(super.firstlun == qpath) { + super.firstlun = nextlun; + savesuper(); + } + else { + qt = super.firstlun; + while(1) { + mt = q2m(-1, qt, 0); + if(mt == 0) { + fprint(pfd[1], "Missing metadata in LUN set\n"); + goto bail; + } + getmetaint(-1, mt, "nextlun", &qt); + if(qt == qpath) + break; + } + setmetaint(mt, "nextlun", nil, nextlun); + } +bail: + rmq(qpath, meta); + freeblock(meta); + rmtarget(aoemajor, aoeminor); + resetmeta(); + csync(); +} + +static void +newroot(char *name) +{ + Qid rootqid; + char *me, *path; + uvlong meta; + vlong now; + + path = smprint("/%s", name); + rootqid.path = p2q(-1, path, 1); + meta = q2m(-1, rootqid.path, 1); + setmetastr(meta, "name", nil, path, 0); + rootqid.vers = 0; + rootqid.type = QTDIR; + setmetaint(meta, "qpath", nil, rootqid.path); + setmetaint(meta, "qvers", nil, rootqid.vers); + setmetaint(meta, "qtype", nil, rootqid.type); + setmetaint(meta, "mode", nil, DMDIR | 0775); + now = nsec(); + setmetaint(meta, "atime", nil, now); + setmetaint(meta, "mtime", nil, now); + setmetaint(meta, "length", nil, 0); + me = getuser(); + setmetastr(meta, "uid", nil, me, 0); + setmetastr(meta, "gid", nil, me, 0); + setmetastr(meta, "muid", nil, me, 0); + setmetaint(meta, "child", nil, 0); + setqhash(rootqid.path, meta); + savesuper(); + free(path); +} + +static char * +dosnap(void) +{ + Qid qid; + Tm *today; + char *me; + uvlong meta, now, dqid, dmeta, yqid, ymeta, x; + int fd, seq, n; + char path[128], sname[32]; + + dqid = p2q(-1, "/dump", 0); + if(dqid == 0) + return "no dump"; + dmeta = q2m(-1, dqid, 0); + snprint(path, 127, "%s/ctl", ddir); + fd = open(path, ORDWR); + if(fd < 0) + return "no snap"; + today = localtime(time(0)); + snprint(path, 127, "/dump/%04d", today->year + 1900); + seq = 0; + yqid = p2q(-1, path, 0); + if(yqid == 0) { + qid.path = p2q(-1, path, 1); + yqid = qid.path; + qid.vers = 0; + qid.type = QTDIR; + ymeta = q2m(-1, qid.path, 1); + snprint(path, 127, "%04d", today->year + 1900); + setmetastr(ymeta, "name", nil, path, 0); + setmetaint(ymeta, "qpath", nil, qid.path); + setmetaint(ymeta, "qvers", nil, qid.vers); + setmetaint(ymeta, "qtype", nil, qid.type); + setmetaint(ymeta, "mode", nil, DMDIR | 0775); + setmetaint(ymeta, "parent", nil, dqid); + now = nsec(); + setmetaint(ymeta, "atime", nil, now); + setmetaint(ymeta, "mtime", nil, now); + setmetaint(ymeta, "length", nil, 0); + me = getuser(); + setmetastr(ymeta, "uid", nil, me, 0); + setmetastr(ymeta, "gid", nil, me, 0); + setmetastr(ymeta, "muid", nil, me, 0); + getmetaint(-1, dmeta, "child", &x); + setmetaint(ymeta, "sib", nil, x); + setmetaint(dmeta, "child", nil, yqid); + setmetaint(ymeta, "child", nil, 0); + setqhash(qid.path, ymeta); + savesuper(); + snprint(path, 127, "/dump/%04d/%02d%02d", today->year + 1900, today->mon+1, today->mday); + } + else { + snprint(path, 127, "/dump/%04d/%02d%02d", today->year + 1900, today->mon+1, today->mday); + if(p2q(-1, path, 0) != 0) { + for(seq = 1; seq < 10; ++seq) { + snprint(path, 127, "/dump/%04d/%02d%02d%d", + today->year + 1900, today->mon+1, today->mday, seq); + if(p2q(-1, path, 0) == 0) + break; + } + if(seq >= 10) { + close(fd); + return "too many snaps"; + } + } + ymeta = q2m(-1, yqid, 0); + } + qid.path = p2q(-1, path, 1); + qid.vers = 0; + qid.type = QTDIR; + meta = q2m(-1, qid.path, 1); + if(seq == 0) { + snprint(path, 127, "%02d%02d", today->mon+1, today->mday); + snprint(sname, 31, "%s.%04d%02d%02d", + dname, today->year + 1900, today->mon+1, today->mday); + } + else { + snprint(path, 127, "%02d%02d%d", today->mon+1, today->mday, seq); + snprint(sname, 31, "%s.%04d%02d%02d%d", + dname, today->year + 1900, today->mon+1, today->mday, seq); + } + resetmeta(); + csync(); + n = fprint(fd, "snap %s %s", dname, sname); + close(fd); + if(n < 0) + return (char *)(~0); + setmetastr(meta, "name", nil, path, 0); + setmetaint(meta, "qpath", nil, qid.path); + setmetaint(meta, "qvers", nil, qid.vers); + setmetaint(meta, "qtype", nil, qid.type); + setmetaint(meta, "mode", nil, DMDIR | 0775); + setmetaint(meta, "parent", nil, yqid); + now = nsec(); + setmetaint(meta, "atime", nil, now); + setmetaint(meta, "mtime", nil, now); + setmetaint(meta, "length", nil, 0); + me = getuser(); + setmetastr(meta, "uid", nil, me, 0); + setmetastr(meta, "gid", nil, me, 0); + setmetastr(meta, "muid", nil, me, 0); + getmetaint(-1, ymeta, "child", &x); + setmetaint(meta, "sib", nil, x); + setmetaint(ymeta, "child", nil, qid.path); + setmetastr(meta, "snap", nil, sname, 0); + setqhash(qid.path, meta); + savesuper(); + return nil; +} + +static char * +revert(char *snap) +{ + char *path, *p; + int fd, n; + + path = smprint("%s/ctl", ddir); + fd = open(path, ORDWR); + free(path); + if(fd < 0) + return (char *)(~0); + p = strchr(snap, '/'); + if(p) + path = smprint("%s.%.*s%s", dname, (int)(p - snap), snap, p + 1); + else + path = smprint("%s.%s", dname, snap); + n = fprint(fd, "revert %s %s", dname, path); + free(path); + close(fd); + resetmeta(); + resetcache(); + if(n < 0) + return (char *)(~0); + return nil; +} + +static void +doshutdown(void) +{ + shutdown = 1; + threadkill(snapid); + haltaoe(); + haltnfs(); + halt9p(); + haltfree(); + haltcache(); + threadkillgrp(threadgetgrp()); +} + +void +docons(void *x) +{ + Cmdbuf *cb; + Cmdtab *ct; + char *s; + char buf[256]; + uvlong vl; + int n; + + USED(x); + while(1) { + fprint(pfd[1], "> "); + n = ioread(consio, pfd[1], buf, 255); + if(n <= 0) + return; + buf[n] = 0; + cb = parsecmd(buf, n); + if(cb == nil) { + fprint(pfd[1], "Unparsable command %s\n", buf); + continue; + } + if(cb->nf == 0) + continue; + ct = lookupcmd(cb, ctab, nelem(ctab)); + if(ct == nil) { + fprint(pfd[1], "%s: %r\n", buf); + continue; + } + switch(ct->index) { + case CMallow: + allow = 1; + break; + case CMblockuse: + blockuse(pfd[1], strtoull(cb->f[1], nil, 0)); + break; + case CMcheckalloc: + checkalloc(pfd[1]); + break; + case CMcstat: + fprint(pfd[1], "%s", prcstat()); + break; + case CMdisallow: + allow = 0; + break; + case CMfixfamilies: + fixfamilies(pfd[1]); + break; + case CMfixpaths: + fixpaths(pfd[1]); + break; + case CMhalt: + doshutdown(); + return; + case CMhelp: + showhelp(); + break; + case CMhstat: + fprint(pfd[1], "%s", prhstat()); + break; + case CMlcreate: + lcreate(cb->f[1], strtoull(cb->f[2], nil, 10)); + break; + case CMlls: + fprint(pfd[1], "%s", lls()); + break; + case CMlmeta: + lmeta(cb->f[1]); + break; + case CMlrm: + lrm(cb->f[1]); + break; + case CMmpred: + mpred(pfd[1], strtoull(cb->f[1], nil, 0)); + break; + case CMmprint: + mprint(pfd[1], strtoull(cb->f[1], nil, 0)); + break; + case CMmstat: + fprint(pfd[1], "%s", prmstat()); + break; + case CMnewroot: + newroot(cb->f[1]); + break; + case CMnfsdebug: + if(cb->nf < 2) + fprint(pfd[1], "%d\n", debugnfs); + else + debugnfs = atoi(cb->f[1]); + break; + case CMp2q: + vl = p2q(-1, cb->f[1], 0); + fprint(pfd[1], "%ulld\n", vl); + break; + case CMp9debug: + chatty9p = atoi(cb->f[1]); + break; + case CMphash: + showphash(pfd[1], cb->f[1]); + break; + case CMpmeta: + fprint(pfd[1], "metadata for %s\n", cb->f[1]); + prmeta(pfd[1], p2q(-1, cb->f[1], 0)); + break; + case CMq2m: + vl = q2m(-1, strtoull(cb->f[1], nil, 10), 0); + fprint(pfd[1], "%ulld\n", vl); + break; + case CMqmeta: + fprint(pfd[1], "metadata for %s\n", cb->f[1]); + prmeta(pfd[1], strtoull(cb->f[1], nil, 10)); + break; + case CMrecovermeta: + recovermeta(pfd[1]); + break; + case CMrevert: + s = revert(cb->f[1]); + if(s == (char *)(~0)) + fprint(pfd[1], "%r\n"); + else if(s) + fprint(pfd[1], "%s\n", s); + break; + case CMrmp: + rmp(cb->f[1]); + break; + case CMrootallow: + rootallow = 1; + break; + case CMrootdisallow: + rootallow = 0; + break; + case CMsetmeta: + vl = q2m(-1, strtoull(cb->f[1], nil, 10), 0); + if(cb->f[2][0] == 's') + setmetastr(vl, cb->f[3], nil, cb->f[4], 0); + else + setmetaint(vl, cb->f[3], nil, strtoull(cb->f[4], nil, 0)); + break; + case CMsetmstruct: + vl = strtoull(cb->f[1], nil, 0); + setmstruct(vl, strtoull(cb->f[2], nil, 0), cb->f[3], atoi(cb->f[4]), strtoull(cb->f[5], nil, 0)); + break; + case CMsetqhash: + setqhash(strtoull(cb->f[1], nil, 0), strtoull(cb->f[2], nil, 0)); + break; + case CMsnap: + s = dosnap(); + if(s == (char *)(~0)) + fprint(pfd[1], "%r\n"); + else if(s) + fprint(pfd[1], "%s\n", s); + break; + case CMsuper: + fprint(pfd[1], "%s", prsuper()); + break; + case CMsync: + resetmeta(); + csync(); + break; + } + } +} + +static void +θconsread(Req *r) +{ + char *s; + + s = smprint("%s\n%s\n%s\n%s\n%s", prsuper(), prcstat(), prmstat(), prhstat(), lls()); + readstr(r, s); + free(s); + respond(r, nil); +} + +static void +θconswrite(Req *r) +{ + Cmdbuf *cb; + Cmdtab *ct; + char *s; + + s = nil; + cb = parsecmd(r->ifcall.data, r->ifcall.count); + if(cb == nil) { + respond(r, "unparsable command"); + return; + } + if(cb->nf == 0) { + respond(r, nil); + return; + } + ct = lookupcmd(cb, ctab, nelem(ctab)); + if(ct == nil) { + respond(r, r->error); + return; + } + switch(ct->index) { + case CMallow: + allow = 1; + break; + case CMcheckalloc: + checkalloc(pfd[1]); + break; + case CMdisallow: + allow = 0; + break; + case CMlcreate: + lcreate(cb->f[1], strtoull(cb->f[2], nil, 10)); + break; + case CMlrm: + lrm(cb->f[1]); + break; + case CMnewroot: + newroot(cb->f[1]); + break; + case CMp9debug: + chatty9p = atoi(cb->f[1]); + break; + case CMrevert: + s = revert(cb->f[1]); + break; + case CMrmp: + rmp(cb->f[1]); + break; + case CMrootallow: + rootallow = 1; + break; + case CMrootdisallow: + rootallow = 0; + break; + case CMsetmstruct: + setmstruct(strtoull(cb->f[1], nil, 0), strtoull(cb->f[2], nil, 0), cb->f[3], atoi(cb->f[4]), strtoull(cb->f[5], nil, 0)); + break; + case CMsetqhash: + setqhash(strtoull(cb->f[1], nil, 0), strtoull(cb->f[2], nil, 0)); + break; + case CMsnap: + s = dosnap(); + break; + case CMsync: + resetmeta(); + csync(); + break; + default: + s = "unsupported ctl command"; + break; + } + if(s == (char *)(~0)) + respond(r, r->error); + else + respond(r, s); +} + +static void +mysrvproc(void *a) +{ + Srv *s; + int data; + + s = a; + data = s->infd; + srv(s); + close(data); + threadexits(nil); +} + +static void +snapthread(void *) +{ + while(1) { + recvul(snaptrigger); + if(shutdown) + break; + dosnap(); + } + threadexits(nil); +} + +static void +snapproc(void *) +{ +// Tm *now; + ulong cursec, waitsec; + + sleep(300*1000); /* Give sometime to get the clock set before looking at tod */ + while(1) { + /* + * We'd like to get the time zone correction here, but + * it's doesn't play nice with the threading. I'll come + * back to this later. + */ +// now = localtime(time(nil)); +// cursec = (now->hour * 60 + now->min) * 60 + now->sec; + cursec = time(nil) % SecPerDay; + waitsec = (super.snaptime + SecPerDay - cursec) % SecPerDay; + if(waitsec < 60) + waitsec = SecPerDay; + sleep(waitsec*1000); + sendul(snaptrigger, 1); + if(shutdown) + break; + } + threadexits(nil); +} + +void +initcons(int postcons) +{ + char *me; + int cfd[2]; + + if(postcons) { + consio = ioproc(); + me = getuser(); + θconssrv.tree = alloctree(me, me, 0555, nil); + createfile(θconssrv.tree->root, "θfsctl", me, 0664, nil); + if(pipe(cfd) < 0) + sysfatal("pipe: %r"); + θconssrv.infd = θconssrv.outfd = cfd[1]; + conspost(cfd, pfd); + threadcreate(mysrvproc, &θconssrv, 32 * 1024); + } + snaptrigger = chancreate(sizeof(ulong), 2); + threadcreate(snapthread, nil, 8192); + snapid = proccreate(snapproc, nil, 1024); +} --- /sys/src/cmd/θfs/util.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/util.c Thu Feb 20 02:17:43 2014 @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +/* + * Version of pread that's careful to always work on sector boundaries + */ +long +spread(int fd, void *a, long n, uvlong off) +{ + char *buf; + uvlong aoff, boff; + long an, rn; + + boff = off % 512; + if(n % 512 == 0 && boff == 0) + return pread(fd, a, n, off); + aoff = off & ~511; + an = (n + boff + 511) & ~511; + buf = θmalloc(an); + rn = pread(fd, buf, an, aoff); + if(rn <= 0) { + free(buf); + return rn; + } + rn -= boff; + if(rn > n) + rn = n; + memmove(a, buf + boff, rn); + free(buf); + return rn; +} + +long +θpread(int fd, uvlong qpath, void *a, long n, uvlong off) +{ + uvlong fblk, meta, now, len; + ulong m, tot, boff; + + meta = q2m(fd, qpath, 0); + if(meta == 0) + return -1; + if(getmetaint(fd, meta, "length", &len) == MTnone) + len = 0; + if(off >= len) + n = 0; + else if(off + n > len) + n = len - off; + tot = 0; + while(n > 0) { + fblk = locate(fd, meta, off / BlkSize, 0); + boff = off % BlkSize; + if(boff + n > BlkSize) + m = BlkSize - boff; + else + m = n; + if(fblk != 0) { + if(fd == -1) + m = cread((char *)a + tot, m, fblk * BlkSize + boff); + else + m = spread(fd, (char *)a + tot, m, fblk * BlkSize + boff); + } + else + memset((char *)a + tot, 0, m); + n -= m; + off += m; + tot += m; + } + if(fd == -1 && doatimes) { + now = nsec(); + setmetaint(meta, "atime", nil, now); + } + return tot; +} + +long +θpwrite(uvlong qpath, void *a, long n, uvlong off, int grow) +{ + uvlong fblk, meta, woff, now, len, qvers; + ulong m, tot, boff; + + meta = q2m(-1, qpath, 0); + if(meta == 0) + return -1; + if(getmetaint(-1, meta, "length", &len) == MTnone) + len = 0; + if(grow == 0) { + if(off >= len) + n = 0; + else if(off + n > len) + n = len - off; + } + else if (grow == 2) + off = len; + woff = off; + tot = 0; + while(n > 0) { + fblk = locate(-1, meta, woff / BlkSize, 1); + if(fblk == 0) + break; + boff = woff % BlkSize; + if(boff + n > BlkSize) + m = BlkSize - boff; + else + m = n; + m = cwrite((char *)a + tot, m, fblk * BlkSize + boff); + woff += m; + n -= m; + tot += m; + } + if(grow) { + if(off + tot > len) + setmetaint(meta, "length", nil, off + tot); + } + now = nsec(); + setmetaint(meta, "mtime", nil, now); + setmetaint(meta, "atime", nil, now); + if(getmetaint(-1, meta, "qvers", &qvers) != MTnone) + qvers++; + setmetaint(meta, "qvers", nil, qvers); + return tot; +} + +void +rmdlist(uvlong meta, uvlong myqid) +{ + uvlong sibqid, pqid, predqid; + uvlong pmeta, qvers; + + getmetaint(-1, meta, "sib", &sibqid); + getmetaint(-1, meta, "parent", &pqid); + pmeta = q2m(-1, pqid, 0); + if(pmeta == 0) { + fprint(2, "warning: no parent?!?!\n"); + return; + } + if(getmetaint(-1, pmeta, "qvers", &qvers) != MTnone) + setmetaint(pmeta, "qvers", nil, qvers + 1); + getmetaint(-1, pmeta, "child", &predqid); + if(predqid == myqid) { + setmetaint(pmeta, "child", nil, sibqid); + return; + } + do { + pmeta = q2m(-1, predqid, 0); + if(pmeta == 0) + return; + getmetaint(-1, pmeta, "sib", &predqid); + } while(predqid != myqid); + setmetaint(pmeta, "sib", nil, sibqid); +} + +void * +θmalloc(ulong x) +{ + if(x > 6553600) { + fprint(2, "$%p", getcallerpc(&x)); + return nil; + } + else + return emalloc9p(x); +} + --- /sys/src/cmd/θfs/uid.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/uid.c Thu Feb 20 02:17:43 2014 @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +#define MAXFIELDS 100 + +typedef struct Filebuf Filebuf; +typedef struct P9user P9user; +typedef struct Unixgroup Unixgroup; +typedef struct Unixsys Unixsys; +typedef struct Unixuser Unixuser; + +struct Filebuf +{ + char *buf; + char **lines; + int nlines; +}; + +struct P9user +{ + char *id; + char *name; + char *leader; + char **members; + int nmembers; +}; + +struct Unixgroup +{ + char *name; + int id; + char **members; + int nmembers; +}; + +struct Unixsys +{ + char *name; + int nusers, ngroups; + Unixuser *users; + Unixgroup *groups; + Unixsys *next; +}; + +struct Unixuser +{ + char *name; + int id; +}; + +static P9user *p9users; +static int np9users; +static Unixsys *unixhd; + +/* +static void +dumpusers(void) +{ + P9user *u; + int i, j, fd; + + fd = create("/tmp/users.dump", OWRITE, 0666); + for(u = p9users, i = 0; i < np9users; ++u, ++i) { + fprint(fd, "id:%s name:%s leader:%s nmembers:%d\n", + u->id, u->name, u->leader, u->nmembers); + for(j = 0; j < u->nmembers; ++j) + fprint(fd, "%s ", u->members[j]); + fprint(fd, "\n"); + } + close(fd); +} +*/ + +static Filebuf * +loadfile(char *name) +{ + Filebuf *fb; + char *p; + uvlong len; + uvlong qpath, meta; + long n; + + qpath = p2q(-1, name, 0); + if(qpath == 0) + return nil; + meta = q2m(-1, qpath, 0); + if(meta == 0) + return nil; + if(getmetaint(-1, meta, "length", &len) == MTnone) + return nil; + fb = θmalloc(sizeof(Filebuf)); + fb->buf = θmalloc(len); + n = θpread(-1, qpath, fb->buf, len, 0); + if(n < len) { + free(fb->buf); + free(fb); + return nil; + } + for(p = fb->buf; p < fb->buf + n; ++p) + if(*p == '\n') + ++fb->nlines; + fb->lines = θmalloc(fb->nlines * sizeof(char *)); + gettokens(fb->buf, fb->lines, fb->nlines, "\n"); + return fb; +} + +static void +freefile(Filebuf *fb) +{ + free(fb->buf); + free(fb->lines); + free(fb); +} + +static void +loadusers(void) +{ + Filebuf *fb; + char *flds[MAXFIELDS]; + int i, j, n; + + np9users = 0; + fb = loadfile("//adm/users"); + if(fb == nil) { +fprint(2, "unexpected no /adm/users\n"); + return; +} + p9users = θmalloc(fb->nlines * sizeof(P9user)); + for(i = 0; i < fb->nlines; ++i) { + if(fb->lines[i][0] == '#') + continue; + n = getfields(fb->lines[i], flds, MAXFIELDS, 0, ":,"); + if(n < 3) + continue; + if(flds[3] == nil || flds[3][0] == 0) + p9users[np9users].nmembers = 0; + else + p9users[np9users].nmembers = n - 3; + p9users[np9users].id = estrdup9p(flds[0]); + p9users[np9users].name = estrdup9p(flds[1]); + if(flds[2] && flds[2][0] != '\0') + p9users[np9users].leader = estrdup9p(flds[2]); + p9users[np9users].members = θmalloc((n - 2) * sizeof(char *)); + for(j = 3; j < n; ++j) + p9users[np9users].members[j-3] = estrdup9p(flds[j]); + ++np9users; + } + freefile(fb); +} + +static Unixsys * +buildsys(char *toks[3]) +{ + Filebuf *ufb, *gfb; + Unixsys *us; + char *flds[MAXFIELDS]; + int i, j, n; + + ufb = loadfile(toks[1]); + if(ufb == nil) + return nil; + gfb = loadfile(toks[2]); + if(gfb == nil) { + freefile(ufb); + return nil; + } + us = θmalloc(sizeof(Unixsys)); + us->name = estrdup9p(toks[0]); + us->nusers = ufb->nlines; + us->users = θmalloc(us->nusers *sizeof(Unixuser)); + us->ngroups = gfb->nlines; + us->groups = θmalloc(us->ngroups *sizeof(Unixgroup)); + for(i = 0; i < us->nusers; ++i) { + n = getfields(ufb->lines[i], flds, MAXFIELDS, 0, ":"); + if(n < 3) + continue; + us->users[i].name = estrdup9p(flds[0]); + us->users[i].id = atoi(estrdup9p(flds[2])); + } + for(i = 0; i < us->ngroups; ++i) { + n = getfields(gfb->lines[i], flds, MAXFIELDS, 0, ":,"); + if(n < 3) + continue; + us->groups[i].name = estrdup9p(flds[0]); + us->groups[i].id = atoi(estrdup9p(flds[1])); + us->groups[i].nmembers = n - 3; + us->groups[i].members = θmalloc(us->groups[i].nmembers * sizeof(char *)); + for(j = 0; j < us->groups[i].nmembers; ++j) + us->groups[i].members[j] = estrdup9p(flds[j+3]); + } + freefile(ufb); + freefile(gfb); + return us; +} + +void +inituid(void) +{ + Filebuf *fb; + Unixsys *us; + char *toks[3]; + int i; + + loadusers(); + +// dumpusers(); + + fb = loadfile("//adm/nfs"); + if(fb != nil) { + for(i = 0; i < fb->nlines; ++i) { + tokenize(fb->lines[i], toks, 3); + us = buildsys(toks); + if(us) { + us->next = unixhd; + unixhd = us; + } + } + freefile(fb); + } +} + +int +ingroup(char *user, char *group) +{ + int i, j; + + if(strcmp(user, group) == 0) + return 1; + for(i = 0; i < np9users && strcmp(group, p9users[i].name); ++i) ; + if(i >= np9users) + return 0; + for(j = 0; j < p9users[i].nmembers && strcmp(user, p9users[i].members[j]); ++j) ; + if(j >= p9users[i].nmembers) + return 0; + return 1; +} + +int +isleader(char *user, char *group) +{ + int i; + + for(i = 0; i < np9users && strcmp(group, p9users[i].name); ++i) ; + if(i >= np9users) + return 0; + if(p9users[i].leader == nil) + return 0; + if(strcmp(user, p9users[i].leader) == 0) + return 1; + return 0; +} + +char * +id2uname(char *sys, int id) +{ + Unixsys *s; + int i; + + for(s = unixhd; s && strcmp(s->name, sys) != 0; s = s->next) ; + if(s == nil) + return nil; + for(i = 0; i < s->nusers && s->users[i].id != id; ++i) ; + if(i >= s->nusers) + return nil; + return s->users[i].name; +} + +char * +id2gname(char *sys, int id) +{ + Unixsys *s; + int i; + + for(s = unixhd; s && strcmp(s->name, sys) != 0; s = s->next) ; + if(s == nil) + return nil; + for(i = 0; i < s->ngroups && s->groups[i].id != id; ++i) ; + if(i >= s->ngroups) + return nil; + return s->groups[i].name; +} --- /sys/src/cmd/θfs/super.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/super.c Thu Feb 20 02:17:44 2014 @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +enum { + Nht = 67108859, +}; + +static ulong primes[] = {1048573, 2097143, 4194301, 8388593, + 16777213, 33554393, 67108859, 134217689, 268435399}; + +static Lock slock; + +Super super; + +void +loadsuper(void) +{ + Super *sp; + + lock(&slock); + sp = cbread(0); + if(sp->magic != Magicθ) + sysfatal("Bad super magic"); + memmove(&super, sp, sizeof(Super)); + brelease(0); + if(super.nht == 0) { + super.nht = Nht; + super.nhashblk = (super.nht + NPerBlk - 1) / NPerBlk; + } + if(super.snaptime == 0) + super.snaptime = (3 * 60 + 15) * 60; + initfree(); + unlock(&slock); +} + +static char supbuf[1024]; + +char * +prsuper(void) +{ + char *p, *e; + + p = supbuf; + e = p + nelem(supbuf); + p = seprint(p, e, "Superblock:\n"); + p = seprint(p, e, "magic: %ulld(0x%ullx)\n", super.magic, super.magic); + p = seprint(p, e, "qgen: %ulld(0x%ullx)\n", super.qgen, super.qgen); + p = seprint(p, e, "nblk: %ulld(0x%ullx)\n", super.nblk, super.nblk); + p = seprint(p, e, "nfreemap: %ulld(0x%ullx)\n", super.nfreemap, super.nfreemap); + p = seprint(p, e, "freemap: %ulld(0x%ullx)\n", super.freemap, super.freemap); + p = seprint(p, e, "stat: %ulld(0x%ullx)\n", super.state, super.state); + p = seprint(p, e, "firstdat: %ulld(0x%ullx)\n", super.firstdat, super.firstdat); + p = seprint(p, e, "nfree: %ulld(0x%ullx)\n", super.nfree, super.nfree); + p = seprint(p, e, "firstlun: %ulld(0x%ullx)\n", super.firstlun, super.firstlun); + p = seprint(p, e, "nmeta: %ulld(0x%ullx)\n", super.nmeta, super.nmeta); + p = seprint(p, e, "firstmeta: %ulld(0x%ullx)\n", super.firstmeta, super.firstmeta); + p = seprint(p, e, "ffmeta: %ulld(0x%ullx)\n", super.ffmeta, super.ffmeta); + p = seprint(p, e, "nblob: %ulld(0x%ullx)\n", super.nblob, super.nblob); + p = seprint(p, e, "firstblob: %ulld(0x%ullx)\n", super.firstblob, super.firstblob); + p = seprint(p, e, "ffblob: %ulld(0x%ullx)\n", super.ffblob, super.ffblob); + p = seprint(p, e, "lfblob: %ulld(0x%ullx)\n", super.lfblob, super.lfblob); + p = seprint(p, e, "nht: %ulld(0x%ullx)\n", super.nht, super.nht); + seprint(p, e, "nhashblk: %ulld(0x%ullx)\n", super.nhashblk, super.nhashblk); + return supbuf; +} + +void +savesuper(void) +{ + char *p; + + lock(&slock); + p = cbread(0); + memset(p, 0, BlkSize); + memmove(p, &super, sizeof(Super)); + cbwrite(0); + brelease(0); + unlock(&slock); +} + +void +ream(char *dev) +{ + Qid rootqid; + char *me; + uchar *bigbuf; + uvlong meta, firstnon, lastnon, i; + vlong bperb; + vlong now; + int j, k, sfd; +int ndot = 0; + +fprint(2, "reaming %s\n", dev); + sfd = open(dev, ORDWR); + if(sfd < 0) + sysfatal("Couldn't open device for write: %r"); + /* + * Init superblock + */ + super.magic = Magicθ; + super.version = 1; + super.qgen = 1 | ((uvlong)TFile << 60); + i = devsize(dev); + if(i == ~0ULL) + sysfatal("couldn't get device size:%r\n"); + super.nblk = i / BlkSize; + for(i = 0; i < nelem(primes) - 1 && super.nblk > primes[i]; ++i) ; + super.nht = primes[i]; + super.nhashblk = (super.nht + NPerBlk - 1) / NPerBlk; + bperb = 8 * BlkSize; + super.nfreemap = (super.nblk + bperb - 1) / bperb; + super.freemap = 2 * super.nhashblk + 1; + super.nmeta = super.nblk / 200; + super.firstmeta = super.freemap + super.nfreemap; + super.ffmeta = 1; + super.nblob = super.nblk / 200; + super.firstblob = super.firstmeta + super.nmeta; + super.ffblob = super.firstblob * BlkSize; + super.lfblob = super.ffblob + (super.nblob - 1) * BlkSize + BlkSize/2; + super.state = FSClean; + super.firstdat = super.firstblob + super.nblob; + super.nfree = super.nblk - super.firstdat; +fprint(2, "writing superblock: freemap=%ulld nfreemap=%ulld firstdat=%ulld nmeta=%ulld firstmeta=%ulld\n", super.freemap, super.nfreemap, super.firstdat, super.nmeta, super.firstmeta); + savesuper(); + /* + * Clear hash tables + */ + bigbuf = malloc(1024*1024); + j = (1024 * 1024) / BlkSize; + memset(bigbuf, 0, 1024 * 1024); + for(i = 1; i < super.freemap; i += j) +{ +fprint(2, "."); +if(++ndot % 60 == 0) fprint(2, "\n"); + pwrite(sfd, bigbuf, 1024 * 1024, i * BlkSize); +} +fprint(2, "\n"); + /* + * Init free bit map + */ + firstnon = super.firstdat / (BlkSize * 8); + lastnon = super.nblk / (BlkSize * 8); + memset(bigbuf, 0, BlkSize); + for(i = 0; i < firstnon; ++i) + pwrite(sfd, bigbuf, BlkSize, (super.freemap + i) * BlkSize); + for(i = firstnon; i <= lastnon; ++i) { + memset(bigbuf, 0xff, BlkSize); + if(i == firstnon) { + j = super.firstdat % (BlkSize * 8); + k = j % 8; + memset(bigbuf, 0, j/8); + bigbuf[j/8] = ~((1 << k) - 1); + } + if(i == lastnon) { + j = super.nblk % (BlkSize * 8); + k = j % 8; + bigbuf[j/8] = (1 << k) - 1; + memset(bigbuf + j/8 + 1, 0, BlkSize - (j/8 + 1)); + } + pwrite(sfd, bigbuf, BlkSize, (super.freemap + i) * BlkSize); + } + memset(bigbuf, 0, BlkSize); + for(i = lastnon + 1; i < super.nfreemap; ++i) + pwrite(sfd, bigbuf, BlkSize, (super.freemap + i) * BlkSize); + free(bigbuf); + loadsuper(); + /* + * Initialize the metadata regions + */ + reammeta(sfd); + /* + * Create root directory + */ + rootqid.path = p2q(-1, "/", 1); + meta = q2m(-1, rootqid.path, 1); + setmetastr(meta, "name", nil, "/", 0); + rootqid.vers = 0; + rootqid.type = QTDIR; + setmetaint(meta, "qpath", nil, rootqid.path); + setmetaint(meta, "qvers", nil, rootqid.vers); + setmetaint(meta, "qtype", nil, rootqid.type); + setmetaint(meta, "mode", nil, DMDIR | 0775); + now = nsec(); + setmetaint(meta, "atime", nil, now); + setmetaint(meta, "mtime", nil, now); + setmetaint(meta, "length", nil, 0); + me = getuser(); + setmetastr(meta, "uid", nil, me, 0); + setmetastr(meta, "gid", nil, me, 0); + setmetastr(meta, "muid", nil, me, 0); + setmetaint(meta, "child", nil, 0); + setqhash(rootqid.path, meta); + savesuper(); +fprint(2, "Done with ream\n"); +} --- /sys/src/cmd/θfs/sunos.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/sunos.c Thu Feb 20 02:17:45 2014 @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +uvlong +devsize(char *dev) +{ + struct stat stbuf; + struct extvtoc vtoc; + int fd, n; + + if(stat(dev, &stbuf) == 0) + return stbuf.st_size; + fd = open(dev, OREAD); + if(fd <= 0) + return ~0ULL; + n = read_extvtoc(fd, &vtoc); + close(fd); + if(n < 0) + return ~0ULL; + return vtoc.v_sectorsz * vtoc.v_part[n].p_size; +} + +int +threadpid(int) +{ + return getpid(); +} + +static void +startcons(void *x) +{ + int *pfd; + char *ns, *path; + int acfd, lcfd; + char adir[40], ldir[40]; + + pfd = x; + ns = getns(); + path = smprint("unix!%s/thetafscons", ns); + acfd = announce(path, adir); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(lcfd < 0) + break; + pfd[0] = lcfd; + pfd[1] = lcfd; + docons(nil); + } + threadexits(nil); +} + +void +conspost(int cfd[], int pfd[]) +{ + threadcreate(startcons, pfd, 8192); + if(post9pservice(cfd[0], "thetafsctl", nil) < 0) + fprint(2, "post9pservice failed:%r\n"); +} --- /sys/src/cmd/θfs/plan9.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/plan9.c Thu Feb 20 02:17:45 2014 @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +uvlong +devsize(char *dev) +{ + Dir *d; + uvlong len; + + d = dirstat(dev); + if(d == nil) + return ~0ULL; + len = d->length; + free(d); + return len; +} + +static void +consthread(void *) +{ + docons(nil); + threadexits(nil); +} + +void +conspost(int cfd[], int pfd[]) +{ + if(pipe(pfd) < 0) + sysfatal("pipe: %r"); + postfd("θfscons", pfd[0]); + postfd("θfsctl", cfd[0]); + threadcreate(consthread, nil, 8192); +} --- /sys/src/cmd/θfs/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/mkfile Thu Feb 20 02:17:47 2014 @@ -0,0 +1,34 @@ + +#include +#include +#include +#include <9p.h> +#include "dat.h" + +enum { + GMperBlk = BlkSize / sizeof(GMeta), +}; + +static void freeblob(uvlong); + +static uvlong nget; +static uvlong nrm; +static uvlong nset; +static uvlong nalloc; +static uvlong nfree; +static uvlong nmiss; +static QLock mlock; +static QLock alock; +static QLock block; +static GMeta *mbuf; +static uvlong mblk; + +void +reammeta(int fd) +{ + GMeta *mp; + Blob *bp; + char *p, *e; + int i, j, n; + + /* First the GMeta structures */ + p = θmalloc(16 * BlkSize); + e = p + 16 * BlkSize; + mp = (GMeta *)p; + mp->next = 0; + mp->type = MTistring; + strcpy(mp->name, "invalid"); + strcpy(mp->m.str, "errnt"); + for(++mp, j = 2; (char *)mp < e; ++mp, ++j) + mp->next = j; + pwrite(fd, p, 16 * BlkSize, super.firstmeta * BlkSize); + memset(p, 0, sizeof(GMeta)); + for(i = 16; i < super.nmeta; i += 16) { + n = super.nmeta - i; + if(n > 16) + n = 16; + for(mp = (GMeta *)p; (char *)mp < e; ++mp, ++j) + mp->next = j; +if((i/16) % 10 == 0) fprint(2, ","); + pwrite(fd, p, n * BlkSize, (super.firstmeta + i) * BlkSize); + } + /* Now the string/blob pool */ + for(i = 0; i < super.nblob; i += 16) { + n = super.nblob - i; + if(n > 16) + n = 16; + memset(p, 0, n * BlkSize); + for(j = 0; j < n; ++j) { + bp = (Blob *)(p + j * BlkSize); + bp->len = 0x8000 | (BlkSize/2 - sizeof(short)); + bp->next = (super.firstblob + i + j) * BlkSize + BlkSize/2; + bp = (Blob *)(p + j * BlkSize + BlkSize/2); + bp->len = 0x8000 | (BlkSize/2 - sizeof(short)); + if(i + j + 1 < super.nblob) + bp->next = (super.firstblob + i + j + 1) * BlkSize; + } +if((i/16) % 10 == 0) fprint(2, ";"); + pwrite(fd, p, n * BlkSize, (super.firstblob + i) * BlkSize); + } + free(p); +} + +void +resetmeta(void) +{ + qlock(&mlock); + if(mbuf) { + brelease(mblk); + mbuf = nil; + mblk = 0; + } + qunlock(&mlock); +} + +static int +getmstruct(int fd, GMeta *buf, uvlong idx) +{ + uvlong off, blk; + + if(idx > super.nmeta * (BlkSize / sizeof(GMeta))) { + fprint(2, "Invalid metadata index: %ulld\n", idx); + return -1; + } + if(fd != -1) + return spread(fd, buf, sizeof(GMeta), idx * sizeof(GMeta) + super.firstmeta * BlkSize); + blk = idx / GMperBlk + super.firstmeta; + off = idx % GMperBlk; + qlock(&mlock); + if(blk != mblk) { + ++nmiss; + if(mbuf) + brelease(mblk); + mbuf = cbread(blk); + mblk = blk; + } + memmove(buf, mbuf + off, sizeof(GMeta)); + qunlock(&mlock); + return sizeof(GMeta); +} + +static int +savemstruct(GMeta *buf, uvlong idx) +{ + uvlong off, blk; + + if(idx > super.nmeta * (BlkSize / sizeof(GMeta))) { + fprint(2, "Invalid metadata index: %ulld\n", idx); + return -1; + } + blk = idx / GMperBlk + super.firstmeta; + off = idx % GMperBlk; + qlock(&mlock); + if(blk != mblk) { + ++nmiss; + if(mbuf) + brelease(mblk); + mbuf = cbread(blk); + mblk = blk; + } + memmove(mbuf + off, buf, sizeof(GMeta)); + cbwrite(blk); + qunlock(&mlock); + return sizeof(GMeta); +} + +static uvlong +allocmeta(GMeta *buf) +{ + uvlong nmeta; + + qlock(&alock); + if(super.ffmeta == 0) { + fprint(2, "Out of metadata space!\n"); + qunlock(&alock); + return 0; + } + if(getmstruct(-1, buf, super.ffmeta) < 0) { + qunlock(&alock); + return 0; + } + ++nalloc; + nmeta = super.ffmeta; + super.ffmeta = buf->next; + savesuper(); + qunlock(&alock); + return nmeta; +} + +static void +freemeta(uvlong idx) +{ + GMeta buf; + + qlock(&alock); + if(getmstruct(-1, &buf, idx) < 0) { + qlock(&alock); + return; + } + if(buf.type == MTstring || buf.type == MTblob) + freeblob(buf.m.val); + ++nfree; + memset(&buf, 0, sizeof(GMeta)); + buf.next = super.ffmeta; + super.ffmeta = idx; + savesuper(); + savemstruct(&buf, idx); + qunlock(&alock); +} + +/* +static void +insmeta(GMeta *buf, uvlong idx, uvlong after) +{ + GMeta abuf; + + if(getmstruct(-1, &abuf, after) < 0) + return; + buf->next = abuf.next; + abuf.next = idx; + savemstruct(&abuf, after); + savemstruct(buf, idx); +} +*/ + +static Blob * +getbstruct(int fd, uvlong bp) +{ + Blob *b; + void *a; + uvlong blk; + ulong off, m; + + blk = bp / BlkSize; + if(blk < super.firstblob || blk >= super.firstblob + super.nblob) + return nil; + off = bp % BlkSize; + m = BlkSize - off; + if(m > 32768) + m = 32768; + if(fd == -1) + b = (Blob *)((char *)cbread(blk) + off); + else { + b = θmalloc(m); + spread(fd, b, m, bp); + } + m = b->len & 0x7fff; + a = θmalloc(m + sizeof(short)); + memmove(a, b, m + sizeof(short)); + if(fd == -1) + brelease(blk); + else + free(b); + return a; +} + +static int +savebstruct(Blob *b, uvlong bp) +{ + return cwrite(b, b->len & 0x7fff, bp); +} + +static Blob * +allocblob(int n, uvlong *bp) +{ + Blob *cb, *pb, *nb; + uvlong cur, prev; + int an; + + an = (n + BlobQuan - 1) & ~(BlobQuan - 1); + prev = 0; + cb = nil; + pb = nil; + qlock(&block); + for(cur = super.ffblob; cur; ) { + cb = getbstruct(-1, cur); + if(cb == nil) { + qunlock(&block); + return nil; + } + if((cb->len & 0x7fff) >= an) + break; + pb = cb; + prev = cur; + cur = cb->next; + } + if(cur == 0) { + qunlock(&block); + free(cb); + fprint(2, "No free blobs\n"); + return nil; + } + if((cb->len & 0x7fff) >= an + BlobQuan) { + if(prev != 0) { + pb->next = cur + an; + savebstruct(pb, prev); + } + else { + super.ffblob = cur + an; + savesuper(); + } + nb = (Blob *)((char *)cb + an); + nb->len = cb->len - an; + nb->next = cb->next; + savebstruct(nb, cur + an); + cb->len = an - sizeof(short); + } + else { + if(prev != 0) { + pb->next = cb->next; + savebstruct(pb, prev); + } + else { + super.ffblob = cb->next; + savesuper(); + } + cb->len &= 0x7fff; + } + qunlock(&block); + free(pb); + if(bp) + *bp = cur; + return cb; +} + +/* + * This is a pretty gross hack, and probably oversimplified. + * However, I'm not real happy with this part and may redo + * it anyway. When freeing a blob (or long string) in the pool, + * we don't attempt to coalesce them, we just add it on to + * the end of the list. Until we've done enough allocations + * to use up all the pool space once, we don't really care. + * Because the most common use for the pool space is strings + * and the quantum is set to 64, I suspect nearly all requests + * will be satisfied with a single quantum and coalescing + * wouldn't have significant benefit anyway. So there's + * the excuse for taking what is probably too simple an + * approach. + */ +static void +freeblob(uvlong bp) +{ + Blob *b, *b2; + + qlock(&block); + b = getbstruct(-1, bp); + b->len |= 0x8000; + b->next = 0; + savebstruct(b, bp); + b2 = getbstruct(-1, super.lfblob); + if(b2) { + b2->next = bp; + savebstruct(b2, super.lfblob); + super.lfblob = bp; + savesuper(); + } + else + fprint(2, "Unexpected failure to read super.lfblob\n"); + qunlock(&block); +} + +void * +getblob(int fd, uvlong bp, int *n) +{ + Blob *b; + void *a; + + b = getbstruct(fd, bp); + if(b == nil) + return nil; + if(b->len & 0x8000) { + free(b); + return nil; + } + if(n) + *n = b->len; + a = θmalloc(b->len); + memmove(a, b->data, b->len); + free(b); + return a; +} + +uvlong +setblob(void *blob, int n, uvlong bp) +{ + Blob *b; + uvlong nbp; + + b = getbstruct(-1, bp); + if(b) { + if(b->len == n) { + memmove(b->data, blob, n); + savebstruct(b, bp); + return bp; + } + else { + b->len |= 0x8000; + savebstruct(b, bp); + } + } + b = allocblob(n, &nbp); + if(b == nil) + return 0; + memmove(b->data, blob, n); + savebstruct(b, nbp); + free(b); + return nbp; +} + +int +getmeta(int fd, uvlong stidx, char *name, MVal *val) +{ + GMeta buf; + uvlong next; + + ++nget; + for(next = stidx; next; ) { + if(getmstruct(fd, &buf, next) < 0) + return MTnone; + if(strcmp(name, buf.name) == 0) + break; + next = buf.next; + } + if(next == 0) + return MTnone; + *val = buf.m; + return buf.type; +} + +int +getmetaint(int fd, uvlong stidx, char *name, uvlong *val) +{ + MVal x; + int typ; + + typ = getmeta(fd, stidx, name, &x); + switch(typ) { + case MTint: + *val = x.val; + return typ; + default: + return MTnone; + } +} + +char * +getmetastr(int fd, uvlong stidx, char *name) +{ + MVal x; + char *p; + int typ; + + typ = getmeta(fd, stidx, name, &x); + switch(typ) { + case MTistring: + p = estrdup9p(x.str); + return p; + case MTstring: + return getblob(fd, x.val, nil); + default: + return nil; + } +} + +uvlong +setmeta(uvlong stidx, char *name, char *newname, uvlong val, int type) +{ + GMeta buf, nbuf; + uvlong next, last, nmeta; + + ++nset; + last = 0; + for(next = stidx; next; ) { + if(getmstruct(-1, &buf, next) < 0) + return 0; + last = next; + if(strcmp(name, buf.name) == 0) { + if(type == MTistring) { + if(buf.type == MTstring) + freeblob(buf.m.val); + strcpy(buf.m.str, (char *)val); + } + else + buf.m.val = val; + buf.type = type; /* in case we're changing the string length */ + if(newname) + strcpy(buf.name, newname); + savemstruct(&buf, last); + return last; + } + next = buf.next; + } + nmeta = allocmeta(&nbuf); + if(nmeta == 0) + return 0; + if(last == 0) + nbuf.next = 0; + else { + nbuf.next = buf.next; + buf.next = nmeta; + savemstruct(&buf, last); + } + nbuf.type = type; + if(newname) + strcpy(nbuf.name, newname); + else + strcpy(nbuf.name, name); + if(type == MTistring) + strcpy(nbuf.m.str, (char *)val); + else + nbuf.m.val = val; + savemstruct(&nbuf, nmeta); + return nmeta; +} + +uvlong +setmetaint(uvlong stidx, char *name, char *newname, uvlong val) +{ + return setmeta(stidx, name, newname, val, MTint); +} + +uvlong +setmetastr(uvlong stidx, char *name, char *newname, char *s, uvlong bp) +{ + uvlong nbp; + int n; + + n = strlen(s); + if(n <= 7) { + return setmeta(stidx, name, newname, (uvlong)s, MTistring); + } + nbp = setblob(s, n + 1, bp); + return setmeta(stidx, name, newname, nbp, MTstring); +} + +void +setmstruct(uvlong idx, uvlong next, char *name, int type, uvlong val) +{ + GMeta mb; + + if(getmstruct(-1, &mb, idx) < 0) + return; + mb.next = next; + strcpy(mb.name, name); + mb.type = type; + mb.m.val = val; + savemstruct(&mb, idx); +} + +uvlong +setmetablob(uvlong stidx, char *name, char *newname, uchar *blob, int n, uvlong bp) +{ + uvlong nbp; + + nbp = setblob(blob, n, bp); + return setmeta(stidx, name, newname, nbp, MTblob); +} + +uvlong +rmmeta(uvlong midx, uvlong victim) +{ + GMeta buf, vbuf; + uvlong next; + + ++nrm; + if(getmstruct(-1, &vbuf, victim) < 0) + return midx; + if(midx == victim) { + next = vbuf.next; + freemeta(victim); + return next; + } + for(next = midx; next; ) { + if(getmstruct(-1, &buf, next) < 0) + return midx; + if(buf.next == victim) { + buf.next = vbuf.next; + freemeta(victim); + savemstruct(&buf, next); + return midx; + } + next = vbuf.next; + } + return midx; +} + +void +rmmlist(uvlong midx) +{ + GMeta buf; + uvlong next; + + ++nrm; + next = midx; + while(next) { + if(getmstruct(-1, &buf, next) < 0) + return; + freemeta(next); + next = buf.next; + } +} + +static uvlong +promote1(uvlong midx, uvlong dblk, int) +{ + uvlong *p; + uvlong nblk; + + nblk = allocblock(); + if(nblk == 0) + return 0; + p = cbclean(nblk); + p[0] = dblk; + cbwrite(nblk); + setmetaint(midx, "dblock", "index", nblk); + brelease(nblk); + return nblk; +} + +static uvlong +promote2(uvlong midx, uvlong iblk, int) +{ + uvlong *p; + uvlong nblk; + + nblk = allocblock(); + if(nblk == 0) + return 0; + p = cbclean(nblk); + p[0] = iblk; + cbwrite(nblk); + setmetaint(midx, "index", "indirect", nblk); + brelease(nblk); + return nblk; +} + +static uvlong +promote3(uvlong midx, uvlong iblk, int levels) +{ + uvlong *p; + char *name; + uvlong nblk; + + if(levels == 1) { + nblk = allocblock(); + if(nblk == 0) + return 0; + p = cbclean(nblk); + p[0] = iblk; + cbwrite(nblk); + brelease(nblk); + iblk = nblk; + } + nblk = allocblock(); + if(nblk == 0) + return 0; + p = cbclean(nblk); + p[0] = iblk; + cbwrite(nblk); + if(levels == 1) + name = "index"; + else + name = "indirect"; + setmetaint(midx, name, "dblindir", nblk); + brelease(nblk); + return nblk; +} + +static uvlong +doindir(int fd, uvlong iblk, int off, int allocate) +{ + uvlong *p; + uvlong pblk; + + if(iblk < super.firstdat || iblk >= super.nblk) + return 0; + if(fd != -1) + spread(fd, &pblk, sizeof(uvlong), iblk * BlkSize + off * sizeof(uvlong)); + else { + p = cbread(iblk); + pblk = p[off]; + if(pblk == 0) { + if(allocate) { + pblk = allocblock(); + if(pblk == 0) + return 0; + p[off] = pblk; + cbwrite(iblk); + cbclean(pblk); + cbwrite(pblk); + brelease(pblk); + } + } + brelease(iblk); + } + return pblk; +} + +uvlong +locate(int fd, uvlong midx, uvlong vblk, int allocate) +{ + uvlong *p; + uvlong iblk, pblk; + ulong pperb; + int levels, l1off, l2off, l3off; + + if(getmetaint(fd, midx, "dblindir", &iblk) == MTint) + levels = 3; + else if(getmetaint(fd, midx, "indirect", &iblk) == MTint) + levels = 2; + else if(getmetaint(fd, midx, "index", &iblk) == MTint) + levels = 1; + else if(getmetaint(fd, midx, "dblock", &iblk) == MTint) + levels = 0; + else + return 0; + pperb = BlkSize / sizeof(uvlong); + l1off = vblk % pperb; + l2off = (vblk / pperb) % pperb; + l3off = vblk / (pperb * pperb); + if(levels < 3 && l3off != 0) { + iblk = promote3(midx, iblk, levels); + levels = 3; + } + else if(levels < 2 && l2off != 0) { + iblk = promote2(midx, iblk, levels); + levels = 2; + } + else if(levels < 1 && l1off > 0) { + iblk = promote1(midx, iblk, levels); + levels = 1; + } + pblk = 0; + switch(levels) { + case 3: + iblk = doindir(fd, iblk, l3off, allocate); + case 2: + iblk = doindir(fd, iblk, l2off, allocate); + case 1: + if(iblk == 0) + return 0; + p = cbread(iblk); + pblk = p[l1off]; + if(pblk == 0 && allocate) { + pblk = allocblock(); + cbclean(pblk); + p[l1off] = pblk; + cbwrite(pblk); + brelease(pblk); + cbwrite(iblk); + } + brelease(iblk); + break; + case 0: + pblk = iblk; + if(pblk == 0 && allocate) { + pblk = allocblock(); + cbclean(pblk); + setmetaint(midx, "dblock", nil, pblk); + cbwrite(pblk); + brelease(pblk); + } + break; + } + if(pblk < super.firstdat || pblk >= super.nblk) { + fprint(2, "Bogus block number found in locate: index:%ulld\n", iblk); + return 0; + } + return pblk; +} + +void +freedata(uvlong midx) +{ + uvlong *index1, *index2, *index3; + uvlong iblk; + int i, j, k; + + if(getmetaint(-1, midx, "dblindir", &iblk) == MTint) { + if(iblk == 0) + return; + if(iblk < super.firstdat) { + fprint(2,"Bogus dblindir block in freedat: %ulld\n", iblk); + return; + } + index3 = cbread(iblk); + for(i = 0; i < BlkSize / sizeof(uvlong); ++i) { + if(index3[i] >= super.firstdat && index3[i] < super.nblk) { + index2 = cbread(index3[i]); + for(j = 0; j < BlkSize / sizeof(uvlong); ++j) { + if(index2[j] >= super.firstdat && index2[j] < super.nblk) { + index1 = cbread(index2[j]); + for(k = 0; k < BlkSize / sizeof(uvlong); ++k) + if(index1[k] != 0) + freeblock(index1[k]); + brelease(index2[j]); + freeblock(index2[j]); + } + } + brelease(index3[i]); + freeblock(index3[i]); + } + } + brelease(iblk); + freeblock(iblk); + } + else if(getmetaint(-1, midx, "indirect", &iblk) == MTint) { + if(iblk == 0) + return; + if(iblk < super.firstdat) { + fprint(2, "Bogus indirect block in freedat: %ulld\n", iblk); + return; + } + index2 = cbread(iblk); + for(i = 0; i < BlkSize / sizeof(uvlong); ++i) { + if(index2[i] >= super.firstdat && index2[i] < super.nblk) { + index1 = cbread(index2[i]); + for(j = 0; j < BlkSize / sizeof(uvlong); ++j) + if(index1[j] != 0) + freeblock(index1[j]); + brelease(index2[i]); + freeblock(index2[i]); + } + } + brelease(iblk); + freeblock(iblk); + } + else if(getmetaint(-1, midx, "index", &iblk) == MTint) { + if(iblk == 0) + return; + if(iblk < super.firstdat) { + fprint(2, "Bogus index block in freedat: %ulld\n", iblk); + return; + } + index1 = cbread(iblk); + for(i = 0; i < BlkSize / sizeof(uvlong); ++i) + if(index1[i] != 0) + freeblock(index1[i]); + brelease(iblk); + freeblock(iblk); + } +} + +void +prmeta(int fd, uvlong qpath) +{ + GMeta buf; + char *p; + uvlong meta, next; + int i, n; + + meta = q2m(-1, qpath, 0); + if(meta == 0) { + fprint(fd, "no metadata\n"); + return; + } + for(next = meta; next; ) { + if(getmstruct(-1, &buf, next) < 0) + break; + switch(buf.type) { + case MTnone: + break; + case MTint: + fprint(fd, "%s: %ulld(%016ullx)\n", buf.name, buf.m.val, buf.m.val); + break; + case MTistring: + fprint(fd, "%s: %s\n", buf.name, buf.m.str); + break; + case MTstring: + p = getblob(-1, buf.m.val, nil); + fprint(fd, "%s: %s\n", buf.name, p); + free(p); + break; + case MTblob: + fprint(fd, "%s:", buf.name); + p = getblob(-1, buf.m.val, &n); + for(i = 0; i < n; ++i) + fprint(fd, " %02x", p[i]); + fprint(fd, "\n"); + free(p); + break; + } + next = buf.next; + } +} + +static char mstatbuf[1024]; + +char * +prmstat(void) +{ + char *p, *e; + + p = mstatbuf; + e = p + nelem(mstatbuf); + p = seprint(p, e, "Metadata stats:\n"); + p = seprint(p, e, "getmeta calls: %ulld\n", nget); + p = seprint(p, e, "setmeta calls: %ulld\n", nset); + p = seprint(p, e, "rmmeta calls: %ulld\n", nrm); + p = seprint(p, e, "alloc calls: %ulld\n", nalloc); + p = seprint(p, e, "free calls: %ulld\n", nfree); + seprint(p, e, "misses: %ulld\n", nmiss); + return mstatbuf; +} + +static uvlong +qoffset(ulong bucket) +{ + return BlkSize * (super.nhashblk + 1) + bucket * sizeof(uvlong); +} + +void +recovermeta(int fd) +{ + GMeta mb; + uvlong midx1, midx2, qhnext, qb, n; + int saidit; + + /* First set all the marker flags */ + fprint(fd, "Setting flags\n"); + for(midx1 = 1; midx1 < super.nmeta * GMperBlk; ++midx1) { + getmstruct(-1, &mb, midx1); + mb.type |= 0x80; + savemstruct(&mb, midx1); + } + + /* Go through all the q2m hash table and mark referenced ones in use */ + fprint(fd, "Marking ones referenced from QID hash table\n"); + for(qb = 0; qb < super.nht; ++qb) { + cread(&midx1, sizeof(uvlong), qoffset(qb)); + qhnext = 0; + n = 0; + saidit = 0; + while(midx1 != 0) { + ++n; + getmstruct(-1, &mb, midx1); + mb.type &= 0x7f; + if(mb.type == MTnone && !saidit) { + fprint(fd, "Unexpected null metadatum at %ulld in bucket %ulld\n", midx1, qb); + saidit = 1; + } + midx2 = midx1; + midx1 = mb.next; + if(strcmp(mb.name, "qhnext") == 0 && mb.m.val != 0) { + qhnext = mb.m.val; + fprint(fd, "Warning, QID collision qb:%ulld midx:%ulld\n", qb, midx2); + } + savemstruct(&mb, midx2); + if(midx1 == 0) { + midx1 = qhnext; + qhnext = 0; + } + } + if(n > 128) + fprint(fd, "Unexpected large list at qb %ulld, size %ulld\n", qb, n); + } + + fprint(fd, "Sizing old free list\n"); + n = 0; + for(midx1 = super.ffmeta; midx1 != 0; ) { + ++n; + if(n >= super.nmeta * GMperBlk) { + fprint(fd, "Cycle in old free list?\n"); + break; + } + getmstruct(-1, &mb, midx1); + midx1 = mb.next; + } + fprint(fd, "Old free list has %ulld structures\n", n); + + /* Reclaim the free ones */ + fprint(fd, "Rebuilding free list\n"); + n = 0; + super.ffmeta = 0; + for(midx1 = super.nmeta * GMperBlk - 1; midx1 != 0; --midx1) { + getmstruct(-1, &mb, midx1); + if(mb.type & 0x80) { + memset(&mb, 0, sizeof(GMeta)); + mb.next = super.ffmeta; + super.ffmeta = midx1; + savemstruct(&mb, midx1); + ++n; + } + } + savesuper(); + fprint(fd, "Recovered %ulld free metadata structures\n", n); +} + +static int +markinuse(char *shadow, uvlong blk) +{ + long byt; + int bit, old; + + if(blk < super.firstdat || blk >= super.nblk) + return -1; + byt = blk / 8; + bit = blk % 8; + old = shadow[byt] & (1 << bit); + shadow[byt] &= ~(1 << bit); + return old; +} + +/* Dealing with humans always makes the code ugly. */ +static char *idxnames[] = {"data", "index", "indirect", "dblindir"}; + +static void +chkidxalloc(int fd, char *shadow, uvlong blk, uvlong midx, int lev) +{ + uvlong *iblk; + int i; + + if(blk == 0) + return; + switch(markinuse(shadow, blk)) { + case -1: + fprint(fd, "Bogus %s block: %ulld metadataum %ulld\n", idxnames[lev+1], blk, midx); + return; + case 0: + fprint(fd, "Doubly allocated %s block: %ulld meta %ulld\n", idxnames[lev+1], blk, midx); + break; + } + iblk = cbread(blk); + if(iblk == nil) { + fprint(fd, "unexpected error reading block %ulld\n", blk); + return; + } + for(i = 0; i < BlkSize / sizeof(uvlong); ++i) { + if(iblk[i] == 0) + continue; + if(lev > 0) + chkidxalloc(fd, shadow, iblk[i], midx, lev - 1); + else { + switch(markinuse(shadow, iblk[i])) { + case -1: + fprint(fd, "Bogus %s block: %ulld in %s block %ulld meta %ulld\n", + idxnames[lev], iblk[i], idxnames[lev+1], blk, midx); + i = BlkSize / sizeof(uvlong); + break; + case 0: + fprint(fd, "Doubly allocated %s block: %ulld in %s block %ulld meta %ulld\n", + idxnames[lev], iblk[i], idxnames[lev+1], blk, midx); + break; + } + } + } + brelease(blk); +} + +void +checkalloc(int fd) +{ + GMeta mb; + uvlong *hblk; + char *shadow, *fb; + uvlong idx; + long i; + int j, k, l; + + fprint(fd, "Initializing shadow free map\n"); + shadow = θmalloc(super.nfreemap * BlkSize); + for(i = super.firstdat; i < super.nblk; ++i) + shadow[i/8] |= 1 << (i % 8); + for(i = 0, j = 0; j < super.nhashblk; ++j) { + hblk = cbread(j + 1); + for(k = 0; k < BlkSize / sizeof(uvlong) && i < super.nht; ++k, ++i) { + if(i % 100000 == 0) + fprint(fd, "."); + idx = hblk[k]; + while(idx != 0) { + switch(markinuse(shadow, idx)) { + case -1: + fprint(fd, "Bogus block number in hash list for bucket %ld\n", i); + idx = 0; + break; + case 0: + fprint(fd, "Doubly allocated block in hash table: %ulld bucket %ld\n", idx, i); + default: + if(cread(&idx, sizeof(uvlong), idx * BlkSize + (BlkSize - sizeof(uvlong))) < 0) { + fprint(fd, "Error reading bucket next link: %ld %ulld\n", i, idx); + idx = 0; + } + if(idx != 0) + fprint(fd, ","); + break; + } + } + } + brelease(j + 1); + } + fprint(fd, "Scanning metadata\n"); + for(idx = 1; idx < super.nmeta * GMperBlk; ++idx) { + if(idx % 100000 == 0) + fprint(fd, "."); + getmstruct(-1, &mb, idx); + if(mb.type != MTint) + continue; + if(strcmp(mb.name, "index") == 0) + chkidxalloc(fd, shadow, mb.m.val, idx, 0); + else if(strcmp(mb.name, "indirect") == 0) + chkidxalloc(fd, shadow, mb.m.val, idx, 1); + else if(strcmp(mb.name, "dblindir") == 0) + chkidxalloc(fd, shadow, mb.m.val, idx, 2); + } + fprint(fd, "Comparing to on-disk free map\n"); + l = 0; + for(j = 0; j < super.nfreemap; ++j) { + fb = cbread(super.freemap + j); + for(k = 0; k < BlkSize; ++k) { + if(fb[k] != shadow[j*BlkSize+k]) { + if(++l < 10) /* don't flood the output with too many */ + fprint(fd, "%d:%02ux-%02ux\n", j*BlkSize+k, fb[k], (uchar)shadow[j*BlkSize+k]); + if((fb[k] & shadow[j*BlkSize+k]) != fb[k]) { + fprint(fd, "Marking in use\n"); + fb[k] &= shadow[j*BlkSize + k]; + cbwrite(super.freemap + j); + } + } + } + brelease(super.freemap + j); + } + free(shadow); +} + +void +mprint(int fd, uvlong idx) +{ + GMeta mb; + char *p; + int i, n; + + getmstruct(-1, &mb, idx); + switch(mb.type) { + case MTnone: + fprint(fd, "Meta:%ulld name:%s type:none next:%ulld val:%ulld\n", idx, mb.name, mb.next, mb.m.val); + break; + case MTint: + fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld val:%ulld(%016ullx)\n", idx, mb.name, mb.type, mb.next, mb.m.val, mb.m.val); + break; + case MTistring: + fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld val:%s\n", idx, mb.name, mb.type, mb.next, mb.m.str); + break; + case MTstring: + p = getblob(-1, mb.m.val, nil); + fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld val:(%ulld)%s\n", idx, mb.name, mb.type, mb.next, mb.m.val, p); + free(p); + break; + case MTblob: + fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld ", idx, mb.name, mb.type, mb.next); + p = getblob(-1, mb.m.val, &n); + for(i = 0; i < n; ++i) + fprint(fd, " %02x", p[i]); + fprint(fd, "\n"); + free(p); + break; + default: + fprint(fd, "unknown Meta:%ulld type%d next%ulld\n", idx, mb.type, mb.next); + break; + } +} + +void +mpred(int fd, uvlong idx) +{ + GMeta mb; + uvlong i; + + for(i = 1; i < super.nmeta * GMperBlk; ++i) { + getmstruct(-1, &mb, i); + if(mb.type != MTnone && mb.next == idx) { + fprint(fd, "Meta:%ulld predecessor:%ulld\n", idx, i); + mprint(fd, i); + return; + } + } +} + +static void +idxuse(int fd, uvlong iblk, uvlong blk, uvlong midx, int lev) +{ + uvlong *bp; + int i; + + if(iblk == 0) + return; + if(iblk == blk) + fprint(fd, "%s entry meta: %ulld\n", idxnames[lev+1], midx); + bp = cbread(iblk); + if(bp == nil) { + fprint(fd, "error reading block %ulld\n", iblk); + return; + } + for(i = 0; i < BlkSize / sizeof(uvlong); ++i) { + if(bp[i] == blk) + fprint(fd, "%s block in %s block %ulld in meta %ulld\n", idxnames[lev], idxnames[lev+1], iblk, midx); + if(lev > 0) + idxuse(fd, bp[i], blk, midx, lev - 1); + } + brelease(iblk); +} + +void +blockuse(int fd, uvlong blk) +{ + GMeta mb; +// PQMap *pq, *pend; +// char *p; + uvlong /* hlist, */ midx; +// long i; + + if(blk == 0) { + fprint(fd, "superblock\n"); + return; + } + if(blk < super.nhashblk + 1) { + fprint(fd, "P2Q hash table\n"); + return; + } + if(blk < 2 * super.nhashblk + 1) { + fprint(fd, "Q2M hash table\n"); + return; + } + if(blk >= super.freemap && blk < super.freemap + super.nfreemap) { + fprint(fd, "free bitmap\n"); + return; + } + if(blk >= super.firstmeta && blk < super.firstmeta + super.nmeta) { + fprint(fd, "metadata structure pool\n"); + return; + } + if(blk >= super.firstblob && blk < super.firstblob + super.nblob) { + fprint(fd, "string/blob pool\n"); + return; + } +#ifdef NOTDEF + for(i = 0; i < super.nht; ++i) { + if(i % 100000 == 0) + fprint(fd, "."); + if(cread(&hlist, sizeof(uvlong), BlkSize + i * sizeof(uvlong)) < 0) { + fprint(fd, "Error reading bucket %ld\n", i); + continue; + } + if(hlist == blk) + fprint(fd, "P2Q hash bucket %ld\n", i); + while(hlist != 0) { + p = cbread(hlist); + if(p == nil) { + fprint(fd, "Error reading hash list block %ulld\n", hlist); + break; + } + pend = (PQMap *)(p + BlkSize); + --pend; +// for(pq = (PQMap *)p; pq < pend && pq->qpath != 0; pq = nextpq(pq)) { +// } + } + } +#endif + for(midx = 1; midx < super.nmeta * GMperBlk; ++midx) { + getmstruct(-1, &mb, midx); + if(mb.type != MTint) + continue; + if(mb.m.val == 0) + continue; + if(strcmp(mb.name, "index") == 0) + idxuse(fd, mb.m.val, blk, midx, 0); + else if(strcmp(mb.name, "indirect") == 0) + idxuse(fd, mb.m.val, blk, midx, 1); + else if(strcmp(mb.name, "dblindir") == 0) + idxuse(fd, mb.m.val, blk, midx, 2); + } +} + +void +fixfamilies(int fd) +{ + GMeta mb; + uvlong midx; + + for(midx = 1; midx < super.nmeta * GMperBlk; ++midx) { + if(getmstruct(-1, &mb, midx) < 0) + continue; + if(mb.type != MTint) + continue; + if(strcmp(mb.name, "child") != 0 && strcmp(mb.name, "sib") != 0) + continue; + if(mb.m.val == 0) + continue; + if(q2m(-1, mb.m.val, 0) == 0) { + fprint(fd, "clearing dangling %s:%ulld in meta struct %ulld\n", mb.name, mb.m.val, midx); + mb.m.val = 0; + savemstruct(&mb, midx); + } + } +} --- /sys/src/cmd/θfs/macosx.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/macosx.c Thu Feb 20 02:17:49 2014 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +uvlong +devsize(char *dev) +{ + Dir *d; + uvlong len; + + d = dirstat(dev); + if(d == nil) + return ~0ULL; + len = d->length; + free(d); + return len; +} + +int +threadpid(int dummy) +{ + return getpid(); +} + +static void +startcons(void *x) +{ + int *pfd; + char *ns, *path; + int acfd, lcfd; + char adir[40], ldir[40]; + + pfd = x; + ns = getns(); + path = smprint("unix!%s/thetafscons", ns); + acfd = announce(path, adir); + if(acfd < 0) + threadexits(nil); + while(1) { + lcfd = listen(adir, ldir); + if(lcfd < 0) + break; + pfd[0] = lcfd; + pfd[1] = lcfd; + docons(nil); + } + threadexits(nil); +} + +void +conspost(int cfd[], int pfd[]) +{ + threadcreate(startcons, pfd, 8192); + if(post9pservice(cfd[0], "thetafsctl", nil) < 0) + fprint(2, "post9pservice failed:%r\n"); +} --- /sys/src/cmd/θfs/free.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/free.c Thu Feb 20 02:17:49 2014 @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +enum { + Falloc = 1, + Ffree, +}; + +typedef struct Freereq Freereq; + +struct Freereq { + int req; + uvlong blk; + Channel *resp; +}; + +static uvlong firstfree; +static Channel *freechan; + +static void +_allocblock(Freereq *r) +{ + uchar *p, *tfree; + vlong ff, fb, i; + int blk, bit; + int j, x; + + ff = firstfree; + fb = firstfree / (8 * BlkSize); + tfree = cbread(fb + super.freemap); + blk = (firstfree / 8) % BlkSize; + bit = firstfree & 7; + tfree[blk] &= ~(1 << bit); + cbwrite(fb + super.freemap); + p = nil; /* make the compiler happy */ + for(i = fb; i < super.nfreemap; ++i) { + for(p = tfree; p < tfree + BlkSize && *p == 0; ++p) ; + if(p < tfree + BlkSize) + break; + brelease(i + super.freemap); + tfree = cbread(i + 1 + super.freemap); + } + if(i >= super.nfreemap) + sysfatal("No free space"); + brelease(i + super.freemap); + for(j = 0, x = *p; j < 8 && (x&1) == 0; ++j, x >>= 1) ; + firstfree = 8 * (i * BlkSize + (p - tfree)) + j; + --super.nfree; + send(r->resp, &ff); +} + +static void +_freeblock(Freereq *r) +{ + uchar *tfree; + vlong fb; + int blk, bit; + + if(!ccanfree(r->blk)) { + fprint(2, "wanting to free active block\n"); + return; + } + blk = (r->blk / 8) % BlkSize; + bit = r->blk & 7; + fb = r->blk / (8 * BlkSize); + tfree = cbread(fb + super.freemap); + if(tfree == 0) { + fprint(2, "invalid free block: fb:%lld freemap:%ulld\n", fb, super.freemap); + return; + } + tfree[blk] |= 1 << bit; + cbwrite(fb + super.freemap); + brelease(fb + super.freemap); + ++super.nfree; + if(r->blk < firstfree) + firstfree = r->blk; +} + +static void +handler(void *) +{ + Freereq r; + + while(1) { + if(recv(freechan, &r) == 0) { + if(shutdown) + threadexits(nil); + continue; + } + switch(r.req) { + case Falloc: + _allocblock(&r); + break; + case Ffree: + _freeblock(&r); + break; + } + } +} + +void +initfree(void) +{ + uchar *p, *tfree; + vlong i; + int j, x; + + p = nil; /* make the compiler happy */ + tfree = nil; + for(i = 0; i < super.nfreemap; ++i) { + tfree = cbread(i + super.freemap); + for(p = tfree; p < tfree + BlkSize && *p == 0; ++p) ; + if(p < tfree + BlkSize) + break; + brelease(i + super.freemap); + } + if(i >= super.nfreemap) + sysfatal("No free space"); + for(j = 0, x = *p; j < 8 && (x&1) == 0; ++j, x >>= 1) ; + firstfree = 8 * (i * BlkSize + (p - tfree)) + j; + brelease(i + super.freemap); + freechan = chancreate(sizeof(Freereq), 2); + threadcreate(handler, nil, 8192); +} + +void +haltfree(void) +{ +/* chanclose(freechan); */ +} + +uvlong +allocblock(void) +{ + Freereq r; + uvlong blk; + + r.req = Falloc; + r.resp = chancreate(sizeof(uvlong), 0); + send(freechan, &r); + recv(r.resp, &blk); + chanfree(r.resp); + return blk; +} + +void +freeblock(uvlong block) +{ + Freereq r; + + if(block < super.firstdat || block > super.nblk) { + fprint(2, "Bogus block in free from %p: %ulld\n", getcallerpc(&block), block); + return; + } + r.req = Ffree; + r.blk = block; + send(freechan, &r); +} --- /sys/src/cmd/θfs/fis.h Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/fis.h Thu Feb 20 02:17:50 2014 @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma lib "libfis.a" +#pragma src "/sys/src/libfis" + +/* ata errors */ +enum { + Emed = 1<<0, /* media error */ + Enm = 1<<1, /* no media */ + Eabrt = 1<<2, /* abort */ + Emcr = 1<<3, /* media change request */ + Eidnf = 1<<4, /* no user-accessible address */ + Emc = 1<<5, /* media change */ + Eunc = 1<<6, /* data error */ + Ewp = 1<<6, /* write protect */ + Eicrc = 1<<7, /* interface crc error */ + + Efatal = Eidnf|Eicrc, /* must sw reset */ +}; + +/* ata status */ +enum { + ASerr = 1<<0, /* error */ + ASdrq = 1<<3, /* request */ + ASdf = 1<<5, /* fault */ + ASdrdy = 1<<6, /* ready */ + ASbsy = 1<<7, /* busy */ + + ASobs = 1<<1|1<<2|1<<4, +}; + +enum { + /* fis types */ + H2dev = 0x27, + D2host = 0x34, + + /* fis flags bits */ + Fiscmd = 0x80, + + /* ata bits */ + Ataobs = 0xa0, + Atalba = 0x40, + + /* nominal fis size (fits any fis) */ + Fissize = 0x20, +}; + +/* sata device-to-host (0x27) fis layout */ +enum { + Ftype, + Fflags, + Fcmd, + Ffeat, + Flba0, + Flba8, + Flba16, + Fdev, + Flba24, + Flba32, + Flba40, + Ffeat8, + Fsc, + Fsc8, + Ficc, /* isochronous cmd completion */ + Fcontrol, +}; + +/* sata host-to-device fis (0x34) differences */ +enum{ + Fioport = 1, + Fstatus, + Frerror, +}; + +/* ata protcol type */ +enum{ + Pnd = 0<<0, /* data direction */ + Pin = 1<<0, + Pout = 2<<0, + Pdatam = 3<<0, + + Ppio = 1<<2, /* ata protocol */ + Pdma = 2<<2, + Pdmq = 3<<2, + Preset = 4<<2, + Pdiag = 5<<2, + Ppkt = 6<<2, + Pprotom = 7<<2, + + P48 = 0<<5, /* command “size” */ + P28 = 1<<5, + Pcmdszm = 1<<5, + + Pssn = 0<<6, /* sector size */ + P512 = 1<<6, + Pssm = 1<<6, +}; + +typedef struct Sfis Sfis; +struct Sfis { + ushort feat; + uchar udma; + uchar speeds; + uint sig; + uint lsectsz; + uint physshift; /* log2(log/phys) */ + uint c; /* disgusting, no? */ + uint h; + uint s; +}; + +enum { + Dlba = 1<<0, /* required for sata */ + Dllba = 1<<1, + Dsmart = 1<<2, + Dpower = 1<<3, + Dnop = 1<<4, + Datapi = 1<<5, + Datapi16= 1<<6, + Data8 = 1<<7, + Dsct = 1<<8, + Dnflag = 9, +}; + +enum { + Pspinup = 1<<0, + Pidready = 1<<1, +}; + +void setfissig(Sfis*, uint); +int txmodefis(Sfis*, uchar*, uchar); +int atapirwfis(Sfis*, uchar*, uchar*, int, int); +int featfis(Sfis*, uchar*, uchar); +int flushcachefis(Sfis*, uchar*); +int identifyfis(Sfis*, uchar*); +int nopfis(Sfis*, uchar*, int); +int rwfis(Sfis*, uchar*, int, int, uvlong); +void skelfis(uchar*); +void sigtofis(Sfis*, uchar*); +uvlong fisrw(Sfis*, uchar*, int*); + +void idmove(char*, ushort*, int); +vlong idfeat(Sfis*, ushort*); +uvlong idwwn(Sfis*, ushort*); +int idss(Sfis*, ushort*); +int idpuis(ushort*); +ushort id16(ushort*, int); +uint id32(ushort*, int); +uvlong id64(ushort*, int); +char *pflag(char*, char*, Sfis*); +uint fistosig(uchar*); + +/* scsi */ +typedef struct Cfis Cfis; +struct Cfis { + uchar phyid; + uchar encid[8]; + uchar tsasaddr[8]; + uchar ssasaddr[8]; + uchar ict[2]; +}; + +void smpskelframe(Cfis*, uchar*, int); +uint sashash(uvlong); +uchar *sasbhash(uchar*, uchar*); --- /sys/src/cmd/θfs/cache.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/cache.c Thu Feb 20 02:17:51 2014 @@ -0,0 +1,862 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include <9p.h> +#include "dat.h" + +enum { + CDirty = 1, + CFlushing = 2, + CFree = 4, + + CBrelease = 1, + CBclean, + CBread, + CBwrite, + CCanfree, + CReset, + + Ncht = 4001, + + Nreaders = 4, + + Ridle = 0, + Rloading = 1, +}; + +typedef struct CBlock CBlock; +typedef struct Cachereq Cachereq; +typedef struct Cacheresp Cacheresp; +typedef struct Reader Reader; + +struct CBlock { + Ref ref; + uvlong blkno; + uchar buf[BlkSize]; + int flags; + CBlock *next, *prev; + CBlock *htnext, *htprev; + CBlock *wnext, *wprev; +}; + +struct Cachereq { + int req; + uvlong blk; + Channel *resp; +}; + +struct Cacheresp { + int res; + void *p; +}; + +struct Reader { + char *dev; + Channel *rdchan; + int state; + uvlong loading; +}; + +static int mypid; +static Channel *wbtrigger; +static CBlock *ht[Ncht]; +static CBlock *chd, *ctl; +static CBlock *whd, *wtl; +static CBlock *freehd; +static int maxcache; +static Ref ncache; +static Ref ndirty; +static Ref nwlist; +static int timertid; +static uvlong nmiss; +static uvlong nread; +static uvlong nwrite; +static ulong hrate; +static int syncing; +static Reader rds[Nreaders]; +static Lock calock; + +Channel *cachechan; + +/* + * Because all the allocs and frees are done in threads of the + * same process, we shouldn't need any locks + */ +static CBlock * +cballoc(void) +{ + CBlock *p; + + if(freehd == nil) + return θmalloc(sizeof(CBlock)); + lock(&calock); + p = freehd; + freehd = p->next; + if(!(p->flags & CFree)) + fprint(2, "Internal error: non-free block on free list\n"); + p->flags &= ~CFree; + p->next = nil; + unlock(&calock); + return p; +} + +static void +cbfree(CBlock *p) +{ + if(p->flags & CFree) + fprint(2, "Freeing already free block?!?!?\n"); + else if(p->ref.ref != 0) + fprint(2, "Freeing in use block\n"); + else if(p->next || p->prev || p->htnext || p->htprev || p->wnext || p->wprev) + fprint(2, "Freeing block in data structures\n"); + else { + lock(&calock); + p->flags |= CFree; + p->next = freehd; + freehd = p; + unlock(&calock); + } +} + +static CBlock * +lookup(uvlong blk) +{ + CBlock *p; + int idx; + + idx = blk % Ncht; + for(p = ht[idx]; p && p->blkno != blk; p = p->htnext) ; + return p; +} + +static void +updatestats(int hit) +{ + if(!hit) { + ++nmiss; + hrate = (999 * hrate + 500) / 1000; + } + else + hrate = (999 * hrate + 500) / 1000 + 1000; +} + +static void +insht(CBlock *p) +{ + int idx; + + idx = p->blkno % Ncht; + if(ht[idx]) + ht[idx]->htprev = p; + p->htnext = ht[idx]; + ht[idx] = p; +} + +static void +rmht(CBlock *p) +{ + CBlock *nxt, *prv; + int idx; + + idx = p->blkno % Ncht; + nxt = p->htnext; + prv = p->htprev; + if(nxt) + nxt->htprev = prv; + if(prv) + prv->htnext = nxt; + if(ht[idx] == p) + ht[idx] = nxt; + p->htnext = nil; + p->htprev = nil; +} + +static void +inslru(CBlock *p) +{ + if(chd == nil) + chd = p; + else + ctl->next = p; + p->prev = ctl; + p->next = nil; + ctl = p; + incref(&ncache); +} + +static void +rmlru(CBlock *p) +{ + if(p->next) + p->next->prev = p->prev; + else + ctl = p->prev; + if(p->prev) + p->prev->next = p->next; + else + chd = p->next; + p->next = nil; + p->prev = nil; + decref(&ncache); +} + +static void +insw(CBlock *p) +{ + if(whd == nil) + whd = p; + else + wtl->wnext = p; + p->wprev = wtl; + p->wnext = nil; + wtl = p; + incref(&nwlist); +} + +static void +rmw(CBlock *p) +{ + if(p->wnext) + p->wnext->wprev = p->wprev; + else + wtl = p->wprev; + if(p->wprev) + p->wprev->wnext = p->wnext; + else + whd = p->wnext; + p->wnext = nil; + p->wprev = nil; + decref(&nwlist); +} + +static void +mvlru(CBlock *p) +{ + if(p != ctl) { + rmlru(p); + inslru(p); + } +} + +static void +dolru(void) +{ + CBlock *p; + int i; + + if(ncache.ref < maxcache) + return; + for(p = chd, i = 0; p && (p->ref.ref > 0 || (p->flags & (CDirty | CFlushing))); p = p->next, ++i) + if(i > ncache.ref) { + fprint(2, "cycle in LRU list? n:%ld d:%ld\n", ncache.ref, ndirty.ref); + chd->prev = nil; + ctl->next = nil; + return; + } + if(p) { + rmht(p); + rmlru(p); + if(p->flags & CDirty) { + decref(&ndirty); + p->flags &= ~CDirty; + rmw(p); + } + cbfree(p); + } +} + +static long +_iopread(va_list *arg) +{ + void *a; + vlong off; + long n; + int fd; + + fd = va_arg(*arg, int); + a = va_arg(*arg, void*); + n = va_arg(*arg, long); + off = va_arg(*arg, vlong); + return pread(fd, a, n, off); +} + +static long +_iopwrite(va_list *arg) +{ + void *a; + vlong off; + long n; + int fd; + + fd = va_arg(*arg, int); + a = va_arg(*arg, void*); + n = va_arg(*arg, long); + off = va_arg(*arg, vlong); + return pwrite(fd, a, n, off); +} + +static void +wbtimer(void *) +{ + while(!shutdown) { + sleep(15000); + if(syncing != 1) + sendul(wbtrigger, 1); + } + sendul(wbtrigger, 1); +} + +static void +wbthread(void *d) +{ + Ioproc *wbio; + CBlock *p; + char *dev; + int fd; + + dev = d; + wbio = ioproc(); + fd = ioopen(wbio, dev, ORDWR); + if(fd < 0) + sysfatal("wb open: %r"); + while(!shutdown) { + recvul(wbtrigger); + syncing = 1; + do { + for(p = whd; p && p->ref.ref > 0 && p->blkno >= super.firstdat; p = p->wnext) ; + if(p) { + p->flags |= CFlushing; + p->flags &= ~CDirty; + decref(&ndirty); + rmw(p); + ++nwrite; + iocall(wbio, _iopwrite, fd, p->buf, BlkSize, p->blkno * BlkSize); + p->flags &= ~CFlushing; + } + } while(p); + syncing = 0; + } + ioclose(wbio, fd); + closeioproc(wbio); +} + +static int +_brelease(uvlong blk) +{ + CBlock *p; + int rv; + + rv = 0; + p = lookup(blk); + if(p) { + if(p->ref.ref == 0) { + fprint(2, "trying to decrement below 0: blk %ulld\n", blk); + rv = -1; + } + else + decref(&p->ref); + } + else + rv = -1; + dolru(); + return rv; +} + +static void * +_cbclean(uvlong blk) +{ + CBlock *p; + + p = lookup(blk); + if(p) { + memset(p->buf, 0, BlkSize); + mvlru(p); + incref(&p->ref); + updatestats(1); + return p->buf; + } + updatestats(0); + dolru(); + p = cballoc(); + memset(p->buf, 0, BlkSize); + p->blkno = blk; + incref(&p->ref); + insht(p); + inslru(p); + return p->buf; +} + +static void +reader(void *a) +{ + Cachereq r; + Cacheresp rsp; + Ioproc *cio; + Reader *rp; + CBlock *p; + int cfd, i; + + rp = a; + cio = ioproc(); + cfd = ioopen(cio, rp->dev, ORDWR); + if(cfd < 0) + sysfatal("Couldn't open device: %r"); + while(1) { + if(recv(rp->rdchan, &r) == 0) { + if(shutdown) { + closeioproc(cio); + threadexits(nil); + } + continue; + } + /* + * See if it got loaded while it was in the channel queue + */ + p = lookup(r.blk); + if(p) { + mvlru(p); + incref(&p->ref); + updatestats(1); + rsp.p = p->buf; + send(r.resp, &rsp); + continue; + } + /* + * If another reader is already loading this block, pass off the request + * to that reader. That way, by the time this request gets looked at + * again, the block will already be loaded. + */ + for(i = 0; i < Nreaders && (rds[i].state != Rloading || rds[i].loading != r.blk); ++i) ; + if(i < Nreaders) { + send(rds[i].rdchan, &r); + continue; + } + rp->state = Rloading; + rp->loading = r.blk; + dolru(); + p = cballoc(); + p->blkno = r.blk; + incref(&p->ref); + ++nread; + if(iocall(cio, _iopread, cfd, p->buf, BlkSize, r.blk * BlkSize) != BlkSize) { + rp->state = Ridle; + cbfree(p); + rsp.p = nil; + send(r.resp, &rsp); + continue; + } + insht(p); + inslru(p); + rp->state = Ridle; + rsp.p = p->buf; + send(r.resp, &rsp); + } +} + +static void +_cbread(Cachereq *r) +{ + Cacheresp rsp; + CBlock *p; + static int rr; + + p = lookup(r->blk); + if(p) { + mvlru(p); + incref(&p->ref); + updatestats(1); + rsp.p = p->buf; + send(r->resp, &rsp); + return; + } + updatestats(0); + send(rds[rr].rdchan, r); + ++rr; + if(rr >= Nreaders) + rr = 0; +} + +static void +_cbwrite(uvlong blk) +{ + CBlock *p; + + p = lookup(blk); + if(p) { + mvlru(p); + if(!(p->flags & CDirty)) { + p->flags |= CDirty; + incref(&ndirty); + insw(p); + } + } + if(ndirty.ref > ncache.ref / 10 && !syncing) + nbsendul(wbtrigger, 1); +} + +static int +_ccanfree(uvlong blk) +{ + CBlock *p; + + p = lookup(blk); + if(p) { + if(p->ref.ref > 0 /* || (p->flags & (CDirty | CFlushing)) */ ) { + fprint(2, "Wanting to free block %ulld with ref %ld and flags %x\n", blk, p->ref.ref, p->flags); + return 0; + } + if(p->flags & CDirty) { + decref(&ndirty); + rmw(p); + p->flags &= ~CDirty; + } + rmht(p); + rmlru(p); + cbfree(p); + } + return 1; +} + +static void +_resetcache(void) +{ + CBlock *p; + + while(1) { + for(p = chd; p && p->ref.ref > 0; p = p->next) ; + if(p == nil) + break; + rmht(p); + rmlru(p); + cbfree(p); + } + if(chd) + fprint(2, "warning: active blocks during reset\n"); +} + +static void +handler(void *) +{ + Cacheresp rsp; + Cachereq r; + + mypid = threadpid(threadid()); + while(1) { + if(recv(cachechan, &r) == 0) { + if(shutdown) + threadexits(nil); + continue; + } + switch(r.req) { + case CBrelease: + rsp.res = _brelease(r.blk); + if(r.resp) + send(r.resp, &rsp); + break; + case CBclean: + rsp.p = _cbclean(r.blk); + send(r.resp, &rsp); + break; + case CBread: + _cbread(&r); + break; + case CBwrite: + _cbwrite(r.blk); + send(r.resp, &rsp); + break; + case CCanfree: + rsp.res = _ccanfree(r.blk); + send(r.resp, &rsp); + break; + case CReset: + _resetcache(); + send(r.resp, &rsp); + break; + } + } +} + +void +initcache(char *dev, int m) +{ + int i; + + maxcache = m; + for(i = 0; i < Nreaders; ++i) { + rds[i].dev = dev; + rds[i].rdchan = chancreate(sizeof(Cachereq), 10); + threadcreate(reader, &rds[i], 8192); + } + cachechan = chancreate(sizeof(Cachereq), 2); + threadcreate(handler, nil, 8192); + wbtrigger = chancreate(sizeof(ulong), 2); + threadcreate(wbthread, dev, 8192); + timertid = proccreate(wbtimer, nil, 1024); +} + +void +haltcache(void) +{ + int i; + + for(i = 0; i < Nreaders; ++i) + chanclose(rds[i].rdchan); + chanclose(cachechan); + threadkill(timertid); + sendul(wbtrigger, 1); + for(i = 0; i < 30 && whd; ++i) { + fprint(2, "."); + sleep(1000); + } +} + +int +brelease(uvlong blk) +{ + Cachereq r; + + if(mypid == threadpid(threadid())) +// return _brelease(blk); +{ +int n; +n=_brelease(blk); +if(n==-1) fprint(2, "brelease error called from %p\n", getcallerpc(&blk)); +return n; +} + r.req = CBrelease; + r.blk = blk; + r.resp = nil; + send(cachechan, &r); + return 0; +} + +void * +cbclean(uvlong blk) +{ + Cachereq r; + Cacheresp rsp; + + if(mypid == threadpid(threadid())) + return _cbclean(blk); + r.req = CBclean; + r.blk = blk; + r.resp = chancreate(sizeof(Cacheresp), 0); + send(cachechan, &r); + recv(r.resp, &rsp); + chanfree(r.resp); + return rsp.p; +} + +void * +cbread(uvlong blk) +{ + Cachereq r; + Cacheresp rsp; + CBlock *p; + + if(mypid == threadpid(threadid())) { + p = lookup(blk); + if(p) { + mvlru(p); + incref(&p->ref); + updatestats(1); + return p->buf; + } + } + r.req = CBread; + r.blk = blk; + r.resp = chancreate(sizeof(Cacheresp), 0); + send(cachechan, &r); + recv(r.resp, &rsp); + chanfree(r.resp); + return rsp.p; +} + +void +cbwrite(uvlong blk) +{ + Cachereq r; + Cacheresp rsp; + + if(mypid == threadpid(threadid())) { + _cbwrite(blk); + return; + } + r.req = CBwrite; + r.blk = blk; + r.resp = chancreate(sizeof(Cacheresp), 0); + send(cachechan, &r); + recv(r.resp, &rsp); + chanfree(r.resp); +} + +int +ccanfree(uvlong blk) +{ + Cachereq r; + Cacheresp rsp; + + if(mypid == threadpid(threadid())) + return _ccanfree(blk); + r.req = CCanfree; + r.blk = blk; + r.resp = chancreate(sizeof(Cacheresp), 0); + send(cachechan, &r); + recv(r.resp, &rsp); + chanfree(r.resp); + return rsp.res; +} + +int +cread(void *a, int n, uvlong off) +{ + uchar *p; + uvlong blk; + ulong boff; + + blk = off / BlkSize; + boff = off % BlkSize; + if(boff + n > BlkSize) { + fprint(2, "invalid block crossing\n"); + return -1; + } + p = cbread(blk); + if(p == nil) + return -1; + memmove(a, p + boff, n); + brelease(blk); + return n; +} + +int +cwrite(void *a, int n, uvlong off) +{ + uchar *p; + uvlong blk; + ulong boff; + + blk = off / BlkSize; + if(blk == 0) + return -1; + boff = off % BlkSize; + if(boff + n > BlkSize) { + fprint(2, "invalid block crossing\n"); + return -1; + } + p = cbread(blk); + if(p == nil) + return -1; + memmove(p + boff, a, n); + cbwrite(blk); + brelease(blk); + return n; +} + +void +csync(void) +{ + syncing = 2; + threadint(timertid); + while(syncing != 0) + yield(); +} + +static char cstatbuf[1024]; + +char * +prcstat(void) +{ + CBlock *cb; + char *p, *e; + int ldirty, i, nhash; + int refhist[10]; +int saidit = 0; + + ldirty = 0; + p = cstatbuf; + e = p + nelem(cstatbuf); + memset(refhist, 0, 10 * sizeof(int)); + p = seprint(p, e, "Cache stats:\n"); + p = seprint(p, e, "ncache: %ld\n", ncache.ref); + p = seprint(p, e, "nwlist: %ld\n", nwlist.ref); + p = seprint(p, e, "ndirty: %ld\n", ndirty.ref); + for(cb = chd; cb; cb = cb->next) { + if(cb->flags & CDirty) +{ +if(!saidit) {p = seprint(p, e, "dirty block ref:%ld blk:%ulld\n", cb->ref.ref, cb->blkno); ++saidit;} + ++ldirty; +} + if(cb->ref.ref < 0) { + p = seprint(p, e, "bad ref count: %ld on block %ulld; setting to 0\n", cb->ref.ref, cb->blkno); + cb->ref.ref = 0; + } + else if(cb->ref.ref >= 9) + ++refhist[9]; + else + ++refhist[cb->ref.ref]; + if(cb->ref.ref > 0) + p = seprint(p, e, "In use block: %ulld flags %ux\n", cb->blkno, cb->flags); + } + nhash = 0; + for(i = 0; i < Ncht; ++i) { + for(cb = ht[i]; cb; cb = cb->htnext) + ++nhash; + } + p = seprint(p, e, "nhash: %d\n", nhash); + p = seprint(p, e, "ldirty: %d\n", ldirty); + p = seprint(p, e, "nread: %ulld\n", nread); + p = seprint(p, e, "nwrite: %ulld\n", nwrite); + p = seprint(p, e, "nmiss: %ulld\n", nmiss); + p = seprint(p, e, "hit rate: %uld%%\n", (hrate + 5000) / 10000); + p = seprint(p, e, "ref count histogram:\n"); + p = seprint(p, e, " 0 1 2 3 4 5 6 7 8 >8\n"); + for(i = 0; i < 10; ++i) + p = seprint(p, e, "%4d ", refhist[i]); + seprint(p, e, "\n"); + return cstatbuf; +} + +void +resetcache(void) +{ + Cachereq r; + Cacheresp rsp; + + if(mypid == threadpid(threadid())) { + _resetcache(); + return; + } + r.req = CReset; + r.resp = chancreate(sizeof(Cacheresp), 0); + send(cachechan, &r); + recv(r.resp, &rsp); + chanfree(r.resp); +} --- /sys/src/cmd/θfs/aoe.h Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/θfs/aoe.h Thu Feb 20 02:17:51 2014 @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2013, Coraid, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Coraid nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +enum { + ACata, + ACconfig, + ACmask, + ACres, + ACkey, + ACosd, +}; + +enum { + AQCread, + AQCtest, + AQCprefix, + AQCset, + AQCfset, +}; + +enum { + AOCformat = 0x01, + AOCcreate, + AOClist, + AOCread = 0x05, + AOCwrite, + AOCappend, + AOCflush, + AOCremove = 0x0a, + AOCpcreate, + AOCpremove, + AOCgetattr = 0x0e, + AOCsetattr, + AOCccreate = 0x15, + AOCcremove, + AOCclist, +}; + +enum { + AEunk, + AEcmd, /* bad command */ + AEarg, /* bad argument */ + AEoff, /* device offline */ + AEcfg, /* config string already set */ + AEver, /* unsupported version */ + AEres, /* target reserved */ +}; + +enum { + /* mask commands */ + Mread = 0, + Medit, + + /* mask directives */ + MDnop = 0, + MDadd, + MDdel, + + /* mask errors */ + MEunk = 1, + MEbad, + MEfull, + + /* reserve / release */ + Rrread = 0, + Rrset, + Rrforce, +}; + +enum { + Aoetype = 0x88a2, + Aoesectsz = 512, + Aoemaxcfg = 1024, + + Aoehsz = 24, + Aoeatasz = 12, + Aoecfgsz = 8, + Aoerrsz = 2, + Aoemsz = 4, + Aoemdsz = 8, + + Aoever = 1, + + AFerr = 1<<2, + AFrsp = 1<<3, + + AAFwrite = 1, + AAFext = 1<<6, +}; + +typedef struct Aoehdr Aoehdr; +typedef struct Aoeata Aoeata; +typedef struct Aoecfg Aoecfg; +typedef struct Aoemd Aoemd; +typedef struct Aoem Aoem; +typedef struct Aoerr Aoerr; +typedef struct Aoeosd Aoeosd; + +struct Aoehdr { + uchar dst[Eaddrlen]; + uchar src[Eaddrlen]; + uchar type[2]; + uchar verflag; + uchar error; + uchar major[2]; + uchar minor; + uchar cmd; + uchar tag[4]; +}; + +struct Aoeata { + uchar aflag; + uchar errfeat; + uchar scnt; + uchar cmdstat; + uchar lba[6]; + uchar res[2]; +}; + +struct Aoecfg { + uchar bufcnt[2]; + uchar fwver[2]; + uchar scnt; + uchar verccmd; + uchar cslen[2]; +}; + +struct Aoemd { + uchar dres; + uchar dcmd; + uchar ea[Eaddrlen]; +}; + +struct Aoem { + uchar mres; + uchar mcmd; + uchar merr; + uchar mcnt; +}; + +struct Aoerr { + uchar rcmd; + uchar nea; + uchar ea0[]; +}; + +struct Aoeosd { + uchar ocmd; + uchar oflag; + uchar olen[2]; + uchar opid[8]; + uchar ooid[8]; + uchar oaddr[8]; +};