ring-buffer file system written long ago. Reference: /n/atom/patch/applied2013/rbfs Date: Wed Jun 19 22:08:04 CES 2013 Signed-off-by: quanstro@quanstro.net --- /sys/src/cmd/rbfs/dat.h Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/rbfs/dat.h Wed Jun 19 22:07:49 2013 @@ -0,0 +1,21 @@ +#include +#include +#include + +typedef struct Ring Ring; +struct Ring { + RWLock; + Ring *next; + char name[28]; + uint esz; + uint n; + ulong r, w; + int *ltab; + char **etab; + ulong *ttab; +}; + +Ring *ringalloc(char *, int, int); +int ringread(Ring*, char*, int, vlong, int*); +vlong ringsize(Ring*); +void ringwrite(Ring*, char*, int, vlong, int*); --- /sys/src/cmd/rbfs/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/rbfs/mkfile Wed Jun 19 22:07:49 2013 @@ -0,0 +1,10 @@ + +#include + +enum { + Operm = 0x3, + Nram = 2048, + Maxsize = 768*1024*1024, + Maxfdata = 8192, +}; + +typedef struct Fid Fid; +typedef struct Ram Ram; + +struct Fid { + short busy; + short open; + short rclose; + int fid; + Fid *next; + char *user; + Ram *ram; +}; + +struct Ram { + short busy; + short open; + long parent; + Qid qid; + long perm; + char *name; + ulong atime; + ulong mtime; + char *user; + char *group; + char *muid; + Ring *r; +}; + +enum { + Pexec = 1, + Pwrite = 2, + Pread = 4, + Pother = 1, + Pgroup = 8, + Powner = 64, +}; + +ulong path; /* incremented for each new file */ +Fid *fids; +Ram ram[Nram]; +int nram; +int mfd[2]; +char *user; +uchar mdata[IOHDRSZ+Maxfdata]; +uchar rdata[Maxfdata]; /* buffer for data in reply */ +uchar statbuf[STATMAX]; +Fcall thdr; +Fcall rhdr; +int messagesize = sizeof mdata; + +void io(void); +int perm(Fid*, Ram*, int); + +char Eperm[] = "permission denied"; +char Enotdir[] = "not a directory"; +char Enoauth[] = "rbfs: authentication not required"; +char Enotexist[] = "file does not exist"; +char Einuse[] = "file in use"; +char Eexist[] = "file exists"; +char Eisdir[] = "file is a directory"; +char Enotowner[] = "not owner"; +char Eisopen[] = "file already open for I/O"; +char Excl[] = "exclusive use file already open"; +char Ename[] = "illegal name"; +char Eversion[] = "unknown 9P version"; +char Enotempty[] = "directory not empty"; +char Enomem[] = "out of memory"; + +int debug; +int private; +int defline = 256; +int defn = 1024; +static int memlim = 1; + +void +notifyf(void *a, char *s) +{ + USED(a); + if(strncmp(s, "interrupt", 9) == 0) + noted(NCONT); + noted(NDFLT); +} + +void* +emalloc(ulong n) +{ + void *p; + + p = malloc(n); + if(!p) + sysfatal(Enomem); + memset(p, 0, n); + return p; +} + +void* +erealloc(void *p, ulong n) +{ + p = realloc(p, n); + if(!p) + sysfatal(Enomem); + return p; +} + +char * +estrdup(char *q) +{ + char *p; + int n; + + n = strlen(q) + 1; + p = malloc(n); + if(!p) + sysfatal(Enomem); + memmove(p, q, n); + return p; +} + +void +usage(void) +{ + fprint(2, "usage: %s [-Dipsu] [-n lines] [-l linesz] [-m mountpoint] [-S srvname]\n", argv0); + exits("usage"); +} + +void +main(int argc, char *argv[]) +{ + Ram *r; + char *defmnt, *service; + int p[2]; + int fd; + int stdio = 0; + + service = "rb"; + defmnt = "/n/rb"; + ARGBEGIN{ + case 'i': + defmnt = 0; + stdio = 1; + mfd[0] = 0; + mfd[1] = 1; + break; + case 'm': + defmnt = EARGF(usage()); + break; + case 'p': + private++; + break; + case 's': + defmnt = 0; + break; + case 'u': + memlim = 0; /* unlimited memory consumption */ + break; + case 'D': + debug = 1; + break; + case 'S': + defmnt = 0; + service = EARGF(usage()); + break; + case 'l': + defline = atoi(EARGF(usage())); + if(defline <= 0) + usage(); + break; + case 'n': + defn = atoi(EARGF(usage())); + if(defn <= 0) + usage(); + break; + default: + usage(); + }ARGEND + + if(memlim && defn*defline >= Maxsize) + usage(); + if(pipe(p) < 0) + sysfatal("pipe failed"); + if(!stdio){ + mfd[0] = p[0]; + mfd[1] = p[0]; + if(defmnt == 0){ + char buf[64]; + snprint(buf, sizeof buf, "#s/%s", service); + fd = create(buf, OWRITE|ORCLOSE, 0666); + if(fd < 0) + sysfatal("create failed"); + sprint(buf, "%d", p[1]); + if(write(fd, buf, strlen(buf)) < 0) + sysfatal("writing service file"); + } + } + + user = getuser(); + notify(notifyf); + nram = 1; + r = &ram[0]; + r->busy = 1; + r->r = 0; + r->perm = DMDIR | 0775; + r->qid.type = QTDIR; + r->qid.path = 0LL; + r->qid.vers = 0; + r->parent = 0; + r->user = user; + r->group = user; + r->muid = user; + r->atime = time(0); + r->mtime = r->atime; + r->name = estrdup("."); + + if(debug) { + fmtinstall('F', fcallfmt); + fmtinstall('M', dirmodefmt); + } + switch(rfork(RFFDG|RFPROC|RFNAMEG|RFNOTEG)){ + case -1: + sysfatal("fork"); + case 0: + close(p[1]); + io(); + break; + default: + close(p[0]); /* don't deadlock if child fails */ + if(defmnt && mount(p[1], -1, defmnt, MREPL|MCREATE, "") < 0) + sysfatal("mount failed"); + } + exits(0); +} + +Fid * +newfid(int fid) +{ + Fid *f, *ff; + + ff = 0; + for(f = fids; f; f = f->next) + if(f->fid == fid) + return f; + else if(!ff && !f->busy) + ff = f; + if(ff){ + ff->fid = fid; + return ff; + } + f = emalloc(sizeof *f); + f->ram = nil; + f->fid = fid; + f->next = fids; + fids = f; + return f; +} + +static int +emptydir(Ram *dr) +{ + long didx = dr - ram; + Ram *r; + + for(r=ram; r<&ram[nram]; r++) + if(r->busy && didx==r->parent) + return 0; + return 1; +} + +char* +realremove(Ram *r) +{ + if(r->qid.type & QTDIR && !emptydir(r)) + return Enotempty; + if(r->r) + free(r->r); + r->r = 0; + r->parent = 0; + memset(&r->qid, 0, sizeof r->qid); + free(r->name); + r->name = nil; + r->busy = 0; + return nil; +} + +char * +rclunk(Fid *f) +{ + char *e = nil; + + if(f->open) + f->ram->open--; + if(f->rclose) + e = realremove(f->ram); + f->busy = 0; + f->open = 0; + f->ram = 0; + return e; +} + +char* +rversion(Fid*) +{ + Fid *f; + + for(f = fids; f; f = f->next) + if(f->busy) + rclunk(f); + if(thdr.msize > sizeof mdata) + rhdr.msize = sizeof mdata; + else + rhdr.msize = thdr.msize; + messagesize = rhdr.msize; + if(strncmp(thdr.version, "9P2000", 6) != 0) + return Eversion; + rhdr.version = "9P2000"; + return 0; +} + +uint +ramstat(Ram *r, uchar *buf, uint nbuf) +{ + int n; + Dir dir; + + dir.name = r->name; + dir.qid = r->qid; + dir.mode = r->perm; + dir.length = r->r? ringsize(r->r): 0; + dir.uid = r->user; + dir.gid = r->group; + dir.muid = r->muid; + dir.atime = r->atime; + dir.mtime = r->mtime; + n = convD2M(&dir, buf, nbuf); + if(n > 2) + return n; + return 0; +} + + +int +perm(Fid *f, Ram *r, int p) +{ + if((p*Pother) & r->perm) + return 1; + if(strcmp(f->user, r->group)==0 && ((p*Pgroup) & r->perm)) + return 1; + if(strcmp(f->user, r->user)==0 && ((p*Powner) & r->perm)) + return 1; + return 0; +} + +char* +rauth(Fid*) +{ + return Enoauth; +} + +char* +rflush(Fid *f) +{ + USED(f); + return 0; +} + +char* +rattach(Fid *f) +{ + /* no authentication! */ + f->busy = 1; + f->rclose = 0; + f->ram = &ram[0]; + rhdr.qid = f->ram->qid; + if(thdr.uname[0]) + f->user = estrdup(thdr.uname); + else + f->user = "none"; + if(strcmp(user, "none") == 0) + user = f->user; + return 0; +} + +char* +clone(Fid *f, Fid **nf) +{ + if(f->open) + return Eisopen; + if(f->ram->busy == 0) + return Enotexist; + *nf = newfid(thdr.newfid); + (*nf)->busy = 1; + (*nf)->open = 0; + (*nf)->rclose = 0; + (*nf)->ram = f->ram; + (*nf)->user = f->user; /* no ref count; the leakage is minor */ + return 0; +} + +char* +rwalk(Fid *f) +{ + Ram *r, *fram; + char *name; + Ram *parent; + Fid *nf; + char *err; + ulong t; + int i; + + err = nil; + nf = nil; + rhdr.nwqid = 0; + if(thdr.newfid != thdr.fid){ + err = clone(f, &nf); + if(err) + return err; + f = nf; /* walk the new fid */ + } + fram = f->ram; + if(thdr.nwname > 0){ + t = time(0); + for(i=0; iqid.type & QTDIR) == 0){ + err = Enotdir; + break; + } + if(fram->busy == 0){ + err = Enotexist; + break; + } + fram->atime = t; + name = thdr.wname[i]; + if(strcmp(name, ".") == 0){ + Found: + rhdr.nwqid++; + rhdr.wqid[i] = fram->qid; + continue; + } + parent = &ram[fram->parent]; + if(!perm(f, parent, Pexec)){ + err = Eperm; + break; + } + if(strcmp(name, "..") == 0){ + fram = parent; + goto Found; + } + for(r=ram; r < &ram[nram]; r++) + if(r->busy && r->parent==fram-ram && strcmp(name, r->name)==0){ + fram = r; + goto Found; + } + break; + } + if(i==0 && err == nil) + err = Enotexist; + } + if(nf != nil && (err!=nil || rhdr.nwqidbusy = 0; + f->ram = nil; + } + if(rhdr.nwqid > 0) + err = nil; /* didn't get everything in 9P2000 right! */ + if(rhdr.nwqid == thdr.nwname) /* update the fid after a successful walk */ + f->ram = fram; + return err; +} + +char * +ropen(Fid *f) +{ + Ram *r; + int mode, trunc; + + if(f->open) + return Eisopen; + r = f->ram; + if(r->busy == 0) + return Enotexist; + if(r->perm & DMEXCL) + if(r->open) + return Excl; + mode = thdr.mode; + if(r->qid.type & QTDIR){ + if(mode != OREAD) + return Eperm; + rhdr.qid = r->qid; + return 0; + } + if(mode & ORCLOSE){ + /* can't remove root; must be able to write parent */ + if(r->qid.path==0 || !perm(f, &ram[r->parent], Pwrite)) + return Eperm; + f->rclose = 1; + } + trunc = mode & OTRUNC; + mode &= Operm; + if(mode==OWRITE || mode==ORDWR || trunc) + if(!perm(f, r, Pwrite)) + return Eperm; + if(mode==OREAD || mode==ORDWR) + if(!perm(f, r, Pread)) + return Eperm; + if(mode==OEXEC) + if(!perm(f, r, Pexec)) + return Eperm; + if(trunc && (r->perm&DMAPPEND)==0){ + if(r->r) + free(r->r); + r->r = 0; + r->qid.vers++; + } + rhdr.qid = r->qid; + rhdr.iounit = messagesize-IOHDRSZ; + f->open = 1; + r->open++; + return 0; +} + +char * +rcreate(Fid *f) +{ + Ram *r; + char *name; + long parent, prm; + + if(f->open) + return Eisopen; + if(f->ram->busy == 0) + return Enotexist; + parent = f->ram - ram; + if((f->ram->qid.type&QTDIR) == 0) + return Enotdir; + /* must be able to write parent */ + if(!perm(f, f->ram, Pwrite)) + return Eperm; + prm = thdr.perm; + name = thdr.name; + if(strcmp(name, ".")==0 || strcmp(name, "..")==0) + return Ename; + for(r=ram; r<&ram[nram]; r++) + if(r->busy && parent==r->parent) + if(strcmp((char*)name, r->name)==0) + return Einuse; + for(r=ram; r->busy; r++) + if(r == &ram[Nram-1]) + return "no free ram resources"; + r->busy = 1; + r->qid.path = ++path; + r->qid.vers = 0; + if(prm & DMDIR) + r->qid.type |= QTDIR; + r->parent = parent; + free(r->name); + r->name = estrdup(name); + r->user = f->user; + r->group = f->ram->group; + r->muid = f->ram->muid; + if(prm & DMDIR) + prm = (prm&~0777) | (f->ram->perm&prm&0777); + else + prm = (prm&(~0777|0111)) | (f->ram->perm&prm&0666); + r->perm = prm; + if(r-ram >= nram) + nram = r - ram + 1; + r->atime = time(0); + r->mtime = r->atime; + f->ram->mtime = r->atime; + f->ram = r; + rhdr.qid = r->qid; + rhdr.iounit = messagesize-IOHDRSZ; + f->open = 1; + if(thdr.mode & ORCLOSE) + f->rclose = 1; + r->open++; + return 0; +} + +char* +rread(Fid *f) +{ + Ram *r; + uchar *buf; + long off; + int n, m, cnt; + + if(f->ram->busy == 0) + return Enotexist; + n = 0; + rhdr.count = 0; + off = thdr.offset; + buf = rdata; + cnt = thdr.count; + if(cnt > messagesize) /* shouldn't happen, anyway */ + cnt = messagesize; + if(f->ram->qid.type & QTDIR){ + for(r=ram+1; off > 0; r++){ + if(r->busy && r->parent==f->ram-ram) + off -= ramstat(r, statbuf, sizeof statbuf); + if(r == &ram[nram-1]) + return 0; + } + for(; r<&ram[nram] && n < cnt; r++){ + if(!r->busy || r->parent!=f->ram-ram) + continue; + m = ramstat(r, buf+n, cnt-n); + if(m == 0) + break; + n += m; + } + rhdr.data = (char*)rdata; + rhdr.count = n; + return 0; + } + r = f->ram; + r->atime = time(0); + n = -1; + rhdr.data = (char*)rdata; + rhdr.count = ringread(r->r, rhdr.data, cnt, off, &n); + return 0; +} + +char* +rwrite(Fid *f) +{ + int cnt, n; + ulong off; + Ram *r; + + r = f->ram; + if(r->busy == 0) + return Enotexist; + off = thdr.offset; + if(r->perm & DMAPPEND) + off = r->r? ringsize(r->r): 0; + cnt = thdr.count; + if(r->qid.type & QTDIR) + return Eisdir; + if(memlim && off+cnt >= Maxsize) /* sanity check */ + return "write too big"; + if(r->r == nil) + r->r = ringalloc("x", defline, defn); + ringwrite(r->r, thdr.data, cnt, off, &n); + r->qid.vers++; + r->mtime = time(0); + rhdr.count = cnt; + return 0; +} + +char* +rremove(Fid *f) +{ + Ram *r; + + if(f->open) + f->ram->open--; + f->busy = 0; + f->open = 0; + r = f->ram; + f->ram = 0; + if(r->qid.path == 0 || !perm(f, &ram[r->parent], Pwrite)) + return Eperm; + ram[r->parent].mtime = time(0); + return realremove(r); +} + +char * +rstat(Fid *f) +{ + if(f->ram->busy == 0) + return Enotexist; + rhdr.nstat = ramstat(f->ram, statbuf, sizeof statbuf); + rhdr.stat = statbuf; + return 0; +} + +char * +rwstat(Fid *f) +{ + int ndata; + Dir dir; + Ram *r, *s; + + if(f->ram->busy == 0) + return Enotexist; + convM2D(thdr.stat, thdr.nstat, &dir, (char*)statbuf); + r = f->ram; + ndata = r->r? ringsize(r->r): 0; + /* + * To change length, must have write permission on file. + */ + if(dir.length!=~0 && dir.length!=ndata) + if(!perm(f, r, Pwrite)) + return Eperm; + + /* + * To change name, must have write permission in parent + * and name must be unique. + */ + if(dir.name[0]!='\0' && strcmp(dir.name, r->name)!=0){ + if(!perm(f, &ram[r->parent], Pwrite)) + return Eperm; + for(s=ram; s<&ram[nram]; s++) + if(s->busy && s->parent==r->parent) + if(strcmp(dir.name, s->name)==0) + return Eexist; + } + + /* + * To change mode, must be owner or group leader. + * Because of lack of users file, leader=>group itself. + */ + if(dir.mode!=~0 && r->perm!=dir.mode){ + if(strcmp(f->user, r->user) != 0) + if(strcmp(f->user, r->group) != 0) + return Enotowner; + } + + /* + * To change group, must be owner and member of new group, + * or leader of current group and leader of new group. + * Second case cannot happen, but we check anyway. + */ + if(dir.gid[0]!='\0' && strcmp(r->group, dir.gid)!=0){ + if(strcmp(f->user, r->user) == 0) + // if(strcmp(f->user, dir.gid) == 0) + goto ok; + if(strcmp(f->user, r->group) == 0) + if(strcmp(f->user, dir.gid) == 0) + goto ok; + return Enotowner; + ok:; + } + + /* all ok; do it */ + if(dir.mode != ~0){ + dir.mode &= ~DMDIR; /* cannot change dir bit */ + dir.mode |= r->perm&DMDIR; + r->perm = dir.mode; + } + if(dir.name[0] != '\0'){ + free(r->name); + r->name = estrdup(dir.name); + } + if(dir.gid[0] != '\0') + r->group = estrdup(dir.gid); + if(dir.length!=~0 && dir.length!=ndata){ + print("fixme\n"); + /*r->r = ringrealloc(r, dir.length)*/; + } + ram[r->parent].mtime = time(0); + return 0; +} + +char *(*fcalls[])(Fid*) = { + [Tversion] rversion, + [Tflush] rflush, + [Tauth] rauth, + [Tattach] rattach, + [Twalk] rwalk, + [Topen] ropen, + [Tcreate] rcreate, + [Tread] rread, + [Twrite] rwrite, + [Tclunk] rclunk, + [Tremove] rremove, + [Tstat] rstat, + [Twstat] rwstat, +}; + +int needfid[] = { + [Tversion] 0, + [Tflush] 0, + [Tauth] 0, + [Tattach] 0, + [Twalk] 1, + [Topen] 1, + [Tcreate] 1, + [Tread] 1, + [Twrite] 1, + [Tclunk] 1, + [Tremove] 1, + [Tstat] 1, + [Twstat] 1, +}; + + +void +io(void) +{ + char *err, buf[40]; + int n, pid, ctl; + Fid *fid; + + pid = getpid(); + if(private){ + snprint(buf, sizeof buf, "/proc/%d/ctl", pid); + ctl = open(buf, OWRITE); + if(ctl < 0){ + fprint(2, "can't protect rbfs\n"); + }else{ + fprint(ctl, "noswap\n"); + fprint(ctl, "private\n"); + close(ctl); + } + } + + for(;;){ + /* + * reading from a pipe or a network device + * will give an error after a few eof reads. + * however, we cannot tell the difference + * between a zero-length read and an interrupt + * on the processes writing to us, + * so we wait for the error. + */ + n = read9pmsg(mfd[0], mdata, messagesize); + if(n < 0){ + rerrstr(buf, sizeof buf); + if(buf[0]=='\0' || strstr(buf, "hungup")) + exits(""); + sysfatal("mount read"); + } + if(n == 0) + continue; + if(convM2S(mdata, n, &thdr) == 0) + continue; + + if(debug) + fprint(2, "rbfs %d:<-%F\n", pid, &thdr); + + if(thdr.type<0 || thdr.type>=nelem(fcalls) || !fcalls[thdr.type]) + err = "bad fcall type"; + else if(((fid=newfid(thdr.fid))==nil || !fid->ram) && needfid[thdr.type]) + err = "fid not in use"; + else + err = (*fcalls[thdr.type])(fid); + if(err){ + rhdr.type = Rerror; + rhdr.ename = err; + }else{ + rhdr.type = thdr.type + 1; + rhdr.fid = thdr.fid; + } + rhdr.tag = thdr.tag; + if(debug) + fprint(2, "rbfs %d:->%F\n", pid, &rhdr);/**/ + n = convS2M(&rhdr, mdata, messagesize); + if(n == 0) + sysfatal("convS2M error on write"); + if(write(mfd[1], mdata, n) != n) + sysfatal("mount write"); + } +} --- /sys/src/cmd/rbfs/rb.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/rbfs/rb.c Wed Jun 19 22:07:49 2013 @@ -0,0 +1,89 @@ +#include "dat.h" + +Ring* +ringalloc(char *name, int esz, int n) +{ + int sz, i; + char *p; + Ring *r; + + sz = 2*sizeof *r; + sz += n*(esz + sizeof r->ltab[0] + sizeof r->ttab[0]); + r = malloc(sz); + memset(r, 0, sz); + snprint(r->name, sizeof r->name, "%s", name); + r->ltab = (int*)(r + 1); + r->ttab = (ulong*)(r->ltab + n); + r->etab = (char**)(r->ttab + n); + p = (char*)(r->etab + n); + for(i = 0; i < n; i++){ + r->etab[i] = p; + p += esz; + } + r->esz = esz; + r->n = n; + return r; +} + +void +ringwrite(Ring *r, char *s, int l, vlong, int *idx) +{ + uint i; + + wlock(r); + if(r->r + r->n <= r->w) + r->r = r->w - r->n + 1; + *idx = r->w++; + i = (uint)*idx % r->n; + memmove(r->etab[i], s, l); + r->ltab[i] = l; + r->ttab[i] = time(0); + wunlock(r); +} + +vlong +ringsize(Ring *r) +{ + int i; + vlong sz; + + sz = 0; + rlock(r); + for(i = r->r; i < r->w; i++) + sz += r->ltab[i % r->n]; + runlock(r); + return sz; +} + +int +ringread(Ring *r, char *buf, int l, vlong o, int *idx) +{ + char *p, *e, *s; + int n, i; + + p = buf; + e = buf + l; + rlock(r); + if(*idx == -1) + *idx = r->r; + for(i = *idx; i != r->w && p != e; i++){ + n = r->ltab[i % r->n]; + if(o > n){ + o -= n; + continue; + } + s = r->etab[i % r->n]; + if(o > 0){ + n -= o; + s += o; + o = 0; + } + if(p + n > e) + n = e - p; + memmove(p, s, n); + p += n; + } + runlock(r); + *idx = i; + return p - buf; +}