# HG changeset patch # User Noah Evans # Date 1332947681 -7200 # Node ID 6ba826535859dc3534915066040016e79decd8ce # Parent 37ca23c2c31d9b8bffc6651ac9e6bfc04d412990 kexec: Kexec ported to the latest version of Nix. Untested, but this should be the minimum set of changes to get everything to work. Kexec provides for the in kernel execution (of relinked) standard plan9 executables. It's not yet what you'd expect though. traditional libc will not work, the current kludge is to replace the system calls in libc with function pointers to the relevant kernel functions (use nm on your compiled kernel) or to provide wrappers which marshal the system calls directly (still using nm to find the syscalls). Libdynld and a modified devdynld are probably a better interface than the current approach. R=rminnich, nemo.mbox, nixiedev, quanstro CC=nix-dev http://codereview.appspot.com/5936051 diff -r 37ca23c2c31d -r 6ba826535859 sys/src/nix/k10/k8cpukexec --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/nix/k10/k8cpukexec Wed Mar 28 17:14:41 2012 +0200 @@ -0,0 +1,202 @@ +dev +dev + root + cons + arch + env + pipe + proc + kexec + cmd + mnt + srv + dup + rtc + ssl + cap + kprof +# pmc pmcio + segment + acpi + tube + zp + +# add to get cec in the kernel +# cec + + ether netif + ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno + + uart + +uart +dev + uarti8250 + uartpci pci + +ip +dev + tcp + udp + ipifc + icmp + icmp6 + +link +dev + ether8169 pci ethermii + ether82557 pci + ether82563 pci + etherigbe pci ethermii + ethermedium + loopbackmedium + netdevmedium + +# ht + +misc +dev +# cache + mp apic ioapic msi pci sipi +# rdb + +# +#boot cpu +# int cpuflag = 1; +#boot cpu boot $3 +# int cpuflag = 1; +# char* bootdisk = "$3"; +#boot rootdir $3 +# char* rootdir = "$3"; +#boot (bboot|romboot|dosboot) +# int cpuflag = 1; +# char* bootprog = $2; +#boot boot $3 +# char* bootdisk = "$3"; +# +boot cpu + tcp + +rootdir + bootk8cpu.out boot + /amd64/bin/auth/factotum factotum + /amd64/bin/ip/ipconfig ipconfig + ../root/nvram nvram + +conf + int cpuserver = 1; + +# +#dbgflg +# chan 'c' +# apic 'A' +# hpet 'H' +# ht 'H' +# ioapic 'I' +# mp 'M' +# pci 'P' +# arch 'V' +# +dbgflg + acore 'c' + apic 'A' + arch 'V' + asm 'm' + devacpi 'C' + devsegment 'z' + devtube 'T' + devzp 'z' + hpet 'H' + ht 'H' + image 'p' + ioapic 'I' + kexec 'k' + main 'x' + memory 'm' + mp 'M' + nixcall 'n' + page 'p' + pager 'p' + physalloc 'm' + sysproc 'E' + sysseg 'p' + syssem 'S' + syszio 'z' + tcore 'c' + mmu 'v' + +amd64 +dev + l32p + l64v + l64idt + l64acidt + l64cpuid + l64syscall + l64acsyscall + l64fpu + acore + arch + archk10 + asm + cga + crap + fpu + i8254 + i8259 + kbd + main + map + memory + mmu + multiboot + qmalloc + random + syscall + tcore + trap + vsvm + physalloc + +port + alarm + allocb + chan + dev + devtab + edf + fault + image + kexec + latin1 + nixcall + page + pager + parse + pgrp + portclock + print + proc + ps + qio + qlock + rebootcmd + segment + sysauth + sysfile + sysproc + sysseg + syssem + systab + taslock + tod + syszio + syscallfmt + +# +#dir +# pc -.I. +# +dir + 386 + ip + port + +lib + libc + libip + libsec diff -r 37ca23c2c31d -r 6ba826535859 sys/src/nix/port/devcmd.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/nix/port/devcmd.c Wed Mar 28 17:14:41 2012 +0200 @@ -0,0 +1,733 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "kexec.h" + +enum +{ + Qtopdir, /* top level directory */ + Qcmd, + Qclonus, + Qconvdir, + Qconvbase, + Qdata = Qconvbase, + Qstderr, + Qctl, + Qalloc, + Qexec, + Qstatus, + Qwait, + + Debug=0 /* to help debug os.c */ +}; +#define TYPE(x) ((ulong)(x).path & 0xf) +#define CONV(x) (((ulong)(x).path >> 4)&0xfff) +#define QID(c, y) (((c)<<4) | (y)) + +typedef struct Conv Conv; +struct Conv +{ + int x; + int inuse; + int fd[3]; /* stdin, stdout, and stderr */ + int count[3]; /* number of readers on stdin/stdout/stderr */ + int perm; + ulong esz; + char* owner; + char* state; + Cmdbuf* cmd; + char* dir; + QLock l; /* protects state changes */ + Queue* waitq; + void* child; + char* error; /* on start up */ + int nice; + short killonclose; + short killed; + Rendez startr; + Proc *p; +}; + +static struct +{ + QLock l; + int nc; + int maxconv; + Conv** conv; +} cmd; + +static Conv* cmdclone(char*); +static void cmdproc(void*); + +static int +cmd3gen(Chan *c, int i, Dir *dp) +{ + Qid q; + Conv *cv; + + cv = cmd.conv[CONV(c->qid)]; + switch(i){ + default: + return -1; + case Qdata: + mkqid(&q, QID(CONV(c->qid), Qdata), 0, QTFILE); + devdir(c, q, "data", 0, cv->owner, cv->perm, dp); + return 1; + case Qstderr: + mkqid(&q, QID(CONV(c->qid), Qstderr), 0, QTFILE); + devdir(c, q, "stderr", 0, cv->owner, 0444, dp); + return 1; + case Qalloc: + mkqid(&q, QID(CONV(c->qid), Qalloc), 0, QTFILE); + devdir(c, q, "alloc", 0, cv->owner, cv->perm, dp); + return 1; + case Qexec: + mkqid(&q, QID(CONV(c->qid), Qexec), 0, QTFILE); + devdir(c, q, "exec", 0, cv->owner, cv->perm, dp); + return 1; + case Qctl: + mkqid(&q, QID(CONV(c->qid), Qctl), 0, QTFILE); + devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp); + return 1; + case Qstatus: + mkqid(&q, QID(CONV(c->qid), Qstatus), 0, QTFILE); + devdir(c, q, "status", 0, cv->owner, 0444, dp); + return 1; + case Qwait: + mkqid(&q, QID(CONV(c->qid), Qwait), 0, QTFILE); + devdir(c, q, "wait", 0, cv->owner, 0444, dp); + return 1; + } +} + +static int +cmdgen(Chan *c, char *name, Dirtab *d, int nd, int s, Dir *dp) +{ + Qid q; + Conv *cv; + + USED(name); + USED(nd); + USED(d); + + if(s == DEVDOTDOT){ + switch(TYPE(c->qid)){ + case Qtopdir: + case Qcmd: + mkqid(&q, QID(0, Qtopdir), 0, QTDIR); + devdir(c, q, "#C", 0, eve, DMDIR|0555, dp); + break; + case Qconvdir: + mkqid(&q, QID(0, Qcmd), 0, QTDIR); + devdir(c, q, "cmd", 0, eve, DMDIR|0555, dp); + break; + default: + panic("cmdgen %llux", c->qid.path); + } + return 1; + } + + switch(TYPE(c->qid)) { + case Qtopdir: + if(s >= 1) + return -1; + mkqid(&q, QID(0, Qcmd), 0, QTDIR); + devdir(c, q, "cmd", 0, "cmd", DMDIR|0555, dp); + return 1; + case Qcmd: + if(s < cmd.nc) { + cv = cmd.conv[s]; + mkqid(&q, QID(s, Qconvdir), 0, QTDIR); + sprint(up->genbuf, "%d", s); + devdir(c, q, up->genbuf, 0, cv->owner, DMDIR|0555, dp); + return 1; + } + s -= cmd.nc; + if(s == 0){ + mkqid(&q, QID(0, Qclonus), 0, QTFILE); + devdir(c, q, "clone", 0, "cmd", 0666, dp); + return 1; + } + return -1; + case Qclonus: + if(s == 0){ + mkqid(&q, QID(0, Qclonus), 0, QTFILE); + devdir(c, q, "clone", 0, "cmd", 0666, dp); + return 1; + } + return -1; + case Qconvdir: + return cmd3gen(c, Qconvbase+s, dp); + case Qdata: + case Qstderr: + case Qalloc: + case Qexec: + case Qctl: + case Qstatus: + case Qwait: + return cmd3gen(c, TYPE(c->qid), dp); + } + return -1; +} + +static void +cmdinit(void) +{ + cmd.maxconv = 1000; + cmd.conv = mallocz(sizeof(Conv*)*(cmd.maxconv+1), 1); + /* cmd.conv is checked by cmdattach, below */ +} + +static Chan * +cmdattach(char *spec) +{ + Chan *c; + + if(cmd.conv == nil) + error(Enomem); + c = devattach('C', spec); + mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR); + return c; +} + +static Walkqid* +cmdwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, cmdgen); +} + +static long +cmdstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, 0, 0, cmdgen); +} + +static Chan * +cmdopen(Chan *c, int omode) +{ + int perm; + Conv *cv; + char *user; + + perm = 0; + omode = openmode(omode); + switch(omode) { + case OREAD: + perm = 4; + break; + case OWRITE: + perm = 2; + break; + case ORDWR: + perm = 6; + break; + } + + switch(TYPE(c->qid)) { + default: + break; + case Qtopdir: + case Qcmd: + case Qconvdir: + case Qstatus: + if(omode != OREAD) + error(Eperm); + break; + case Qclonus: + qlock(&cmd.l); + if(waserror()){ + qunlock(&cmd.l); + nexterror(); + } + cv = cmdclone(up->user); + poperror(); + qunlock(&cmd.l); + if(cv == 0) + error(Enodev); + mkqid(&c->qid, QID(cv->x, Qctl), 0, QTFILE); + break; + case Qdata: + case Qstderr: + case Qctl: + case Qalloc: + case Qexec: + case Qwait: + qlock(&cmd.l); + cv = cmd.conv[CONV(c->qid)]; + qlock(&cv->l); + if(waserror()){ + qunlock(&cv->l); + qunlock(&cmd.l); + nexterror(); + } + user = up->user; + if((perm & (cv->perm>>6)) != perm) { + if(strcmp(user, cv->owner) != 0 || + (perm & cv->perm) != perm) + error(Eperm); + } + switch(TYPE(c->qid)){ + case Qdata: + if(omode == OWRITE || omode == ORDWR) + cv->count[0]++; + if(omode == OREAD || omode == ORDWR) + cv->count[1]++; + break; + case Qstderr: + if(omode != OREAD) + error(Eperm); + cv->count[2]++; + break; + case Qwait: + if(cv->waitq == nil) + cv->waitq = qopen(1024, Qmsg, nil, 0); + break; + } + cv->inuse++; + if(cv->inuse == 1) { + cv->state = "Open"; + kstrdup(&cv->owner, user); + cv->perm = 0660; + cv->nice = 0; + } + poperror(); + qunlock(&cv->l); + qunlock(&cmd.l); + break; + } + c->mode = omode; + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static void +closeconv(Conv *c) +{ + kstrdup(&c->owner, "cmd"); + kstrdup(&c->dir, "FIXME"); + c->perm = 0666; + c->state = "Closed"; + c->killonclose = 0; + c->killed = 0; + c->nice = 0; + free(c->cmd); + c->cmd = nil; + if(c->waitq != nil){ + qfree(c->waitq); + c->waitq = nil; + } + free(c->error); + c->error = nil; +} + +static void +cmdfdclose(Conv *c, int fd) +{ + if(--c->count[fd] == 0 && c->fd[fd] != -1){ +// close(c->fd[fd]); + c->fd[fd] = -1; + } +} + +static void +cmdclose(Chan *c) +{ + Conv *cc; + int r; + + if((c->flag & COPEN) == 0) + return; + + switch(TYPE(c->qid)) { + case Qctl: + case Qalloc: + case Qexec: + case Qdata: + case Qstderr: + case Qwait: + cc = cmd.conv[CONV(c->qid)]; + qlock(&cc->l); + if(TYPE(c->qid) == Qdata){ + if(c->mode == OWRITE || c->mode == ORDWR) + cmdfdclose(cc, 0); + if(c->mode == OREAD || c->mode == ORDWR) + cmdfdclose(cc, 1); + }else if(TYPE(c->qid) == Qstderr) + cmdfdclose(cc, 2); + + r = --cc->inuse; + if(cc->child != nil){ + if(!cc->killed) + if(r == 0 || (cc->killonclose && TYPE(c->qid) == Qctl)){ + // oscmdkill(cc->child); + cc->killed = 1; + } + }else if(r == 0) + closeconv(cc); + + qunlock(&cc->l); + break; + } +} + +static long +cmdread(Chan *ch, void *a, long n, vlong offset) +{ + Conv *c; + Proc *p; + char *s, *cmds; + int fd; + char buf[256]; + + USED(offset); + + s = a; + switch(TYPE(ch->qid)) { + default: + error(Eperm); + case Qcmd: + case Qtopdir: + case Qconvdir: + return devdirread(ch, a, n, 0, 0, cmdgen); + case Qctl: + sprint(up->genbuf, "%ld", CONV(ch->qid)); + return readstr(offset, s, n, up->genbuf); + case Qalloc: + c = cmd.conv[CONV(ch->qid)]; + p = c->p; + snprint(buf, sizeof(buf), "%#p %#p %#p %#p %#p %#p %#p %#p", + p->seg[TSEG]->base, p->seg[TSEG]->top, + p->seg[DSEG]->base, p->seg[DSEG]->top, + p->seg[BSEG]->base, p->seg[BSEG]->top, + p->seg[SSEG]->base, p->seg[SSEG]->top); + return readstr(offset, s, n, buf); + case Qexec: + c = cmd.conv[CONV(ch->qid)]; + snprint(up->genbuf, sizeof(up->genbuf), "%ld", c->esz); + return readstr(offset, s, n, up->genbuf); + case Qstatus: + c = cmd.conv[CONV(ch->qid)]; + cmds = ""; + if(c->cmd != nil) + cmds = c->cmd->f[1]; + snprint(up->genbuf, sizeof(up->genbuf), "cmd/%d %d %s %q %q\n", + c->x, c->inuse, c->state, c->dir, cmds); + return readstr(offset, s, n, up->genbuf); + case Qdata: + case Qstderr: + fd = 1; + if(TYPE(ch->qid) == Qstderr) + fd = 2; + c = cmd.conv[CONV(ch->qid)]; + qlock(&c->l); + if(c->fd[fd] == -1){ + qunlock(&c->l); + return 0; + } + qunlock(&c->l); + // osenter(); +// n = read(c->fd[fd], a, n); +// osleave(); +// if(n < 0) +// oserror(); + return n; + case Qwait: + c = cmd.conv[CONV(ch->qid)]; + return qread(c->waitq, a, n); + } +} + +static int +cmdstarted(void *a) +{ + Conv *c; + + c = a; + return c->child != nil || c->error != nil || strcmp(c->state, "Execute") != 0; +} + +enum +{ + CMdir, + CMstart, + CMexec, + CMkill, + CMnice, + CMkillonclose +}; + +static +Cmdtab cmdtab[] = { + CMdir, "dir", 2, + CMstart, "start", 0, + CMexec, "exec", 0, + CMkill, "kill", 1, + CMnice, "nice", 0, + CMkillonclose, "killonclose", 0, +}; + +static long +cmdwrite(Chan *ch, void *a, long n, vlong offset) +{ + int i, r; + Conv *c; + Segment *s; + Cmdbuf *cb; + Cmdtab *ct; + + USED(offset); + + switch(TYPE(ch->qid)) { + default: + error(Eperm); + case Qctl: + c = cmd.conv[CONV(ch->qid)]; + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, cmdtab, nelem(cmdtab)); + switch(ct->index){ + case CMdir: + kstrdup(&c->dir, cb->f[1]); + break; + case CMstart: + // so what do we do with this? + // we need to do the process. + if(cb->nf < 2) + error(Ebadctl); + c = cmd.conv[CONV(ch->qid)]; + s = c->p->seg[TSEG]; + // XXX: set the text name? + //kstrdup(&c->p->text, cb->f[1]); + kforkexecac(c->p, atoi(cb->f[2]), nil, cb->f+3); + break; + case CMexec: + poperror(); /* cb */ + qlock(&c->l); + if(waserror()){ + qunlock(&c->l); + free(cb); + nexterror(); + } + if(c->child != nil || c->cmd != nil) + error(Einuse); + for(i = 0; i < nelem(c->fd); i++) + if(c->fd[i] != -1) + error(Einuse); + if(cb->nf < 1) + error(Etoosmall); +// kproc("cmdproc", cmdproc, c, 0); /* cmdproc held back until unlock below */ + free(c->cmd); + c->cmd = cb; /* don't free cb */ + c->state = "Execute"; + poperror(); + qunlock(&c->l); + while(waserror()) + ; +// Sleep(&c->startr, cmdstarted, c); + poperror(); + if(c->error) + error(c->error); + return n; /* avoid free(cb) below */ + } + poperror(); + free(cb); + break; + case Qexec: + c = cmd.conv[CONV(ch->qid)]; + s = c->p->seg[TSEG]; + if(s->base+offset+n > s->top) + error(Etoobig); + memmove((void*)(s->base + offset), a, n); + if(offset+n > c->esz) + c->esz = offset+n; + // XXX: can this every not be n? + return n; + case Qdata: + c = cmd.conv[CONV(ch->qid)]; + qlock(&c->l); + if(c->fd[0] == -1){ + qunlock(&c->l); + error(Ehungup); + } + qunlock(&c->l); +// osenter(); +// r = write(c->fd[0], a, n); +// osleave(); + if(r == 0) + error(Ehungup); + if(r < 0) { + /* XXX perhaps should kill writer "write on closed pipe" here, 2nd time around? */ +// oserror(); + } + return r; + } + return n; +} + +static long +cmdwstat(Chan *c, uchar *dp, long n) +{ + Dir *d; + Conv *cv; + + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qctl: + case Qdata: + case Qstderr: + d = malloc(sizeof(*d)+n); + if(d == nil) + error(Enomem); + if(waserror()){ + free(d); + nexterror(); + } + n = convM2D(dp, n, d, (char*)&d[1]); + if(n == 0) + error(Eshortstat); + cv = cmd.conv[CONV(c->qid)]; + if(!iseve() && strcmp(up->user, cv->owner) != 0) + error(Eperm); + if(!emptystr(d->uid)) + kstrdup(&cv->owner, d->uid); + if(d->mode != ~0UL) + cv->perm = d->mode & 0777; + poperror(); + free(d); + break; + } + return n; +} + +static Conv* +cmdclone(char *user) +{ + Conv *c, **pp, **ep; + int i; + + c = nil; + ep = &cmd.conv[cmd.maxconv]; + for(pp = cmd.conv; pp < ep; pp++) { + c = *pp; + if(c == nil) { + c = malloc(sizeof(Conv)); + if(c == nil) + error(Enomem); + qlock(&c->l); + c->inuse = 1; + c->x = pp - cmd.conv; + cmd.nc++; + *pp = c; + break; + } + if(canqlock(&c->l)){ + if(c->inuse == 0 && c->child == nil) + break; + qunlock(&c->l); + } + } + if(pp >= ep) + return nil; + + c->inuse = 1; + kstrdup(&c->owner, user); + kstrdup(&c->dir, "FIXME"); + c->perm = 0660; + c->state = "Closed"; + c->esz = 0; + for(i=0; ifd); i++) + c->fd[i] = -1; + // XXX: this should go somewhere else. + c->p = setupseg(0); + + qunlock(&c->l); + return c; +} + +static void +cmdproc(void *a) +{ + Conv *c; + int n; + char status[ERRMAX]; + void *t; + + c = a; + qlock(&c->l); + if(Debug) + print("f[0]=%q f[1]=%q\n", c->cmd->f[0], c->cmd->f[1]); + if(waserror()){ + if(Debug) + print("failed: %q\n", up->errstr); + kstrdup(&c->error, up->errstr); + c->state = "Done"; + qunlock(&c->l); +// Wakeup(&c->startr); + pexit("cmdproc", 0); + } +// t = oscmd(c->cmd->f+1, c->nice, c->dir, c->fd); +// if(t == nil) +// oserror(); + c->child = t; /* to allow oscmdkill */ + poperror(); + qunlock(&c->l); +// Wakeup(&c->startr); + if(Debug) + print("started\n"); + +// while(waserror()) +// oscmdkill(t); +// osenter(); + mwait(&c->p->ac->icc->fn); + +// n = oscmdwait(t, status, sizeof(status)); +// osleave(); + if(n < 0){ +// oserrstr(up->genbuf, sizeof(up->genbuf)); + n = snprint(status, sizeof(status), "0 0 0 0 %q", up->genbuf); + } + qlock(&c->l); + c->child = nil; +// oscmdfree(t); + if(Debug){ + status[n]=0; + print("done %d %d %d: %q\n", c->fd[0], c->fd[1], c->fd[2], status); + } + if(c->inuse > 0){ + c->state = "Done"; + if(c->waitq != nil) + qproduce(c->waitq, status, n); + }else + closeconv(c); + qunlock(&c->l); + pexit("", 0); +} + +Dev cmddevtab = { + 'C', + "cmd", + + devreset, + cmdinit, + devshutdown, + cmdattach, + cmdwalk, + cmdstat, + cmdopen, + devcreate, + cmdclose, + cmdread, + devbread, + cmdwrite, + devbwrite, + devremove, + cmdwstat +}; diff -r 37ca23c2c31d -r 6ba826535859 sys/src/nix/port/devkexec.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/nix/port/devkexec.c Wed Mar 28 17:14:41 2012 +0200 @@ -0,0 +1,407 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "kexec.h" + +enum +{ + Maxkexecsize = 16300, +}; + +int kxdbg = 0; +#define KXDBG if(!kxdbg) {} else print + + + +static Kexecgrp *kexecgrp(Chan *c); +static int kexecwriteable(Chan *c); + + +static Kexecgrp kgrp; /* global kexec group containing the kernel configuration */ + +static Kvalue* +kexeclookup(Kexecgrp *kg, uintptr addr, ulong qidpath) +{ + Kvalue *e; + int i; + + for(i=0; inent; i++){ + e = kg->ent[i]; + if(e->qid.path == qidpath || e->addr==addr) + return e; + } + return nil; +} + +static int +kexecgen(Chan *c, char *name, Dirtab*, int, int s, Dir *dp) +{ + Kexecgrp *kg; + Kvalue *e; + uintptr addr; + + print("starting gen name %s\n", name); + + if(s == DEVDOTDOT){ + devdir(c, c->qid, "#§", 0, eve, DMDIR|0775, dp); + return 1; + } + print("getting kg name %s\n", name); + + kg = kexecgrp(c); + rlock(kg); + e = 0; + if(name) { + addr = strtoull(name, nil, 0); + print("got addr %p\n", addr); + + e = kexeclookup(kg, addr, -1); + }else if(s < kg->nent) + e = kg->ent[s]; + + if(e == 0) { + runlock(kg); + return -1; + } + + /* make sure name string continues to exist after we release lock */ + // how will we free this? + snprint(up->genbuf, sizeof up->genbuf, "0x%p", addr); + print("up->genbuf %s e 0x%p\n", up->genbuf, e); + print("e qid %d e->addr 0x%p size %ld len %ld\n", e->qid, e->addr, e->size, e->len); + + devdir(c, e->qid, up->genbuf, e->len, eve, 0666, dp); + runlock(kg); + print("finished gen\n"); + + return 1; +} + +#define QPATH(p,d,t) ((p)<<16 | (d)<<8 | (t)<<0) + +static Chan* +kexecattach(char *spec) +{ + Chan *c; +// Kexecgrp *kgrp = nil; + Qid qid; + + + c = devattach(L'§', spec); + c->aux = &kgrp; + return c; +} + +static Walkqid* +kexecwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, kexecgen); +} + + +static long +kexecstat(Chan *c, uchar *db, long n) +{ + long nn; + + if(c->qid.type & QTDIR) + c->qid.vers = kexecgrp(c)->vers; + nn = devstat(c, db, n, 0, 0, kexecgen); + + return nn; +} + +static Chan* +kexecopen(Chan *c, int omode) +{ + Kexecgrp *kg; + Kvalue *e; + int trunc; + + kg = kexecgrp(c); + if(c->qid.type & QTDIR) { + if(omode != OREAD) + error(Eperm); + }else { + trunc = omode & OTRUNC; + if(omode != OREAD && !kexecwriteable(c)) + error(Eperm); + if(trunc) + wlock(kg); + else + rlock(kg); + e = kexeclookup(kg, 0, c->qid.path); + if(e == 0) { + if(trunc) + wunlock(kg); + else + runlock(kg); + error(Enonexist); + } + if(trunc && e->size) { // better validity check? + e->qid.vers++; + e->size = 0; + e->len = 0; + } + if(trunc) + wunlock(kg); + else + runlock(kg); + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static void +kexeccreate(Chan *c, char *name, int omode, int) +{ + Kexecgrp *kg; + Kvalue *e; + Kvalue **ent; + uintptr addr; + + addr = strtoull(name, nil, 0); + + if(c->qid.type != QTDIR) + error(Eperm); + + omode = openmode(omode); + kg = kexecgrp(c); + + wlock(kg); + if(waserror()) { + wunlock(kg); + nexterror(); + } + + if(kexeclookup(kg, addr, -1)) + error(Eexist); + + e = smalloc(sizeof(Kvalue)); + e->addr = addr; + + if(kg->nent == kg->ment){ + kg->ment += 32; + ent = smalloc(sizeof(kg->ent[0])*kg->ment); + if(kg->nent) + memmove(ent, kg->ent, sizeof(kg->ent[0])*kg->nent); + free(kg->ent); + kg->ent = ent; + } + e->qid.path = ++kg->path; + e->qid.vers = 0; + kg->vers++; + kg->ent[kg->nent++] = e; + c->qid = e->qid; + + wunlock(kg); + poperror(); + + c->offset = 0; + c->mode = omode; + c->flag |= COPEN; +} + +static void +kexecremove(Chan *c) +{ + int i; + Kexecgrp *kg; + Kvalue *e; + + if(c->qid.type & QTDIR) + error(Eperm); + + kg = kexecgrp(c); + wlock(kg); + e = 0; + for(i=0; inent; i++){ + if(kg->ent[i]->qid.path == c->qid.path){ + e = kg->ent[i]; + kg->nent--; + kg->ent[i] = kg->ent[kg->nent]; + kg->vers++; + break; + } + } + wunlock(kg); + if(e == 0) + error(Enonexist); + free(e); +} + +static void +kexecclose(Chan *c) +{ + /* + * cclose can't fail, so errors from remove will be ignored. + * since permissions aren't checked, + * kexecremove can't not remove it if its there. + */ + if(c->flag & CRCLOSE) + kexecremove(c); +} + +static long +kexecread(Chan *c, void *a, long n, vlong off) +{ + Kexecgrp *kg; + Kvalue *e; + long offset; + + if(c->qid.type & QTDIR) + return devdirread(c, a, n, 0, 0, kexecgen); + + kg = kexecgrp(c); + rlock(kg); + e = kexeclookup(kg, 0, c->qid.path); + if(e == 0) { + runlock(kg); + error(Enonexist); + } + + offset = off; + if(offset > e->len) /* protects against overflow converting vlong to long */ + n = 0; + else if(offset + n > e->len) + n = e->len - offset; + if(n <= 0) + n = 0; +// else +// memmove(a, e->value+offset, n); + runlock(kg); + return n; +} + +/* + +need to make slots. the slots themselves can be set somewhere else. + +need make the writes + +open will handle the parsing of the hex numbers. + +no, do it the other way around. just define the slots. +can work on the interface later. + +kmap the space where the values need to stay safe. + +then when that is correct you can do it the other. + +kmap address range +put it in + + +write is going to be significantly different. + +the first thing to do is to make this just work. + add to the kernel cfg. + +*/ + +static long +kexecwrite(Chan *c, void *a, long n, vlong off) +{ + Kexecgrp *kg; + Kvalue *e; + long offset; + + if(n <= 0) + return 0; + offset = off; + if(offset > Maxkexecsize || n > (Maxkexecsize - offset)) + error(Etoobig); + print("a: %s\n", a); + kg = kexecgrp(c); + wlock(kg); + e = kexeclookup(kg, 0, c->qid.path); + if(e == 0) { + wunlock(kg); + error(Enonexist); + } + + // XXX: what to do with what is written? + + e->qid.vers++; + kg->vers++; + wunlock(kg); + return n; +} + +Dev kexecdevtab = { + L'§', + "kexec", + + devreset, + devinit, + devshutdown, + kexecattach, + kexecwalk, + kexecstat, + kexecopen, + kexeccreate, + kexecclose, + kexecread, + devbread, + kexecwrite, + devbwrite, + kexecremove, + devwstat, +}; + +void +kexeccpy(Kexecgrp *to, Kexecgrp *from) +{ + int i; + Kvalue *ne, *e; + + rlock(from); + to->ment = (from->nent+31)&~31; + to->ent = smalloc(to->ment*sizeof(to->ent[0])); + for(i=0; inent; i++){ + e = from->ent[i]; + ne = smalloc(sizeof(Kvalue)); + ne->addr = e->addr; + ne->size = e->size; + ne->qid.path = ++to->path; + to->ent[i] = ne; + } + to->nent = from->nent; + runlock(from); +} + +void +closekgrp(Kexecgrp *kg) +{ + int i; + Kvalue *e; + + if(decref(kg) == 0){ + for(i=0; inent; i++){ + e = kg->ent[i]; + free(e); + } + free(kg->ent); + free(kg); + } +} + +static Kexecgrp* +kexecgrp(Chan *c) +{ + if(c->aux == nil) + return &kgrp; + return c->aux; +} + +static int +kexecwriteable(Chan *c) +{ + return iseve() || c->aux == nil; +} + diff -r 37ca23c2c31d -r 6ba826535859 sys/src/nix/port/kexec.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/nix/port/kexec.c Wed Mar 28 17:14:41 2012 +0200 @@ -0,0 +1,454 @@ +#include "u.h" +#include "tos.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/edf.h" +#include +#include "kexec.h" + + +/* XXX: MOVE ME TO K10 */ + +enum { + Maxslot = 32, +}; + +static uvlong +vl2be(uvlong v) +{ + uchar *p; + + p = (uchar*)&v; + return ((uvlong)((p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3])<<32) + |((uvlong)(p[4]<<24)|(p[5]<<16)|(p[6]<<8)|p[7]); +} + +static ulong +l2be(long l) +{ + uchar *cp; + + cp = (uchar*)&l; + return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3]; +} + +typedef struct { + Exec; + uvlong hdr[1]; +} Khdr; + +enum { + AsmNONE = 0, + AsmMEMORY = 1, + AsmRESERVED = 2, + AsmACPIRECLAIM = 3, + AsmACPINVS = 4, + + AsmDEV = 5, +}; + +Proc* +setupseg(int core) +{ + Segment *s; + uintptr ka; + Proc *p; + static Pgrp *kpgrp; + + // XXX: we're going to need this for locality domains. + USED(core); + + p = newproc(); + p->psstate = 0; + p->procmode = 0640; + p->kp = 1; + p->noswap = 1; + + p->scallnr = up->scallnr; + memmove(p->arg, up->arg, sizeof(up->arg)); + p->nerrlab = 0; + p->slash = up->slash; + p->dot = up->dot; + if(p->dot) + incref(p->dot); + + memmove(p->note, up->note, sizeof(p->note)); + p->nnote = up->nnote; + p->notified = 0; + p->lastnote = up->lastnote; + p->notify = up->notify; + p->ureg = 0; + p->dbgreg = 0; + + kstrdup(&p->user, eve); + if(kpgrp == 0) + kpgrp = newpgrp(); + p->pgrp = kpgrp; + incref(kpgrp); + + memset(p->time, 0, sizeof(p->time)); + p->time[TReal] = sys->ticks; + + procpriority(p, PriKproc, 0); + + + // XXX: kluge 4 pages of address space for this. + // how will it expand up? gives us <50 kprocs as is. + + /* + * we create the color and core at allocation time, not execution. This + * is probably not the best idea but it's a start. + */ + + // XXX: now that we are asmalloc we are no long proc. + + ka = (uintptr)KADDR(asmalloc(0, BIGPGSZ, AsmMEMORY, 1)); + s = newseg(SG_TEXT|SG_RONLY, ka, 1); + p->seg[TSEG] = s; +// s->color = acpicorecolor(core); + + /* Data. Shared. */ + // XXX; Now that the address space is all funky how are we going to handle shared data segments? + ka = (uintptr)KADDR(asmalloc(0, BIGPGSZ, AsmMEMORY, 2)); + s = newseg(SG_DATA, ka, 1); + p->seg[DSEG] = s; + s->color = p->seg[TSEG]->color; + + /* BSS. Uses asm from data map. */ + p->seg[BSEG] = newseg(SG_BSS, ka+BIGPGSZ, 1); + p->seg[BSEG]->color= up->seg[TSEG]->color; + + /* Stack */ + ka = (uintptr)KADDR(asmalloc(0, BIGPGSZ, AsmMEMORY, 1)); + p->seg[SSEG] = newseg(SG_STACK, ka, 1); + nixprepage(-1); + + return p; +} + +void +kforkexecac(Proc *p, int core, char *ufile, char **argv) +{ + Khdr hdr; + Tos *tos; + Chan *chan; + int argc, i, n; + char *a, *elem, *file, *args; + long hdrsz, magic, textsz, datasz, bsssz; + uintptr textlim, datalim, bsslim, entry, tbase, tsize, dbase, dsize, bbase, bsize, sbase, ssize, stack; + Mach *mp; + static Pgrp *kpgrp; + + DBG("kexec on core %d\n", core); + // XXX: since this is kernel code we can't do attachimage, + // we should be reading the file into kernel memory. + // this only matters if we are using ufile. + // YYY: look at dev reboot for help. + + file = nil; + elem = nil; + chan = nil; + mp = nil; + + USED(chan); + + if(waserror()){ + DBG("kforkexecac: failing: %s\n", up->errstr); + if(file) + free(file); + if(elem) + free(elem); + if(chan) + cclose(chan); + if(core > 0 && mp != nil) + mp->proc = nil; + if(core != 0) + p->ac = nil; + nexterror(); + } + + if(core != 0) + p->ac = getac(p, core); + + argc = 0; + if(ufile != nil){ + panic("ufile not implemented yet"); + file = validnamedup(ufile, 1); + DBG("kforkexecac: up %#p file %s\n", up, file); + chan = namec(file, Aopen, OEXEC, 0); + kstrdup(&elem, up->genbuf); + + hdrsz = chan->dev->read(chan, &hdr, sizeof(Khdr), 0); + DBG("wrote ufile\n"); + + if(hdrsz < 2) + error(Ebadexec); + }else{ + /* somebody already wrote in our text segment */ + hdr = *(Khdr*)p->seg[TSEG]->base; + hdrsz = sizeof(Khdr); + } + +// p = (char*)&hdr; + magic = l2be(hdr.magic); + DBG("badexec3\n"); + + if(hdrsz != sizeof(Khdr) || magic != AOUT_MAGIC) + error(Ebadexec); + if(magic & HDR_MAGIC){ + entry = vl2be(hdr.hdr[0]); + hdrsz = sizeof(Khdr); + } + else{ + entry = l2be(hdr.entry); + hdrsz = sizeof(Exec); + } + + textsz = l2be(hdr.text); + datasz = l2be(hdr.data); + bsssz = l2be(hdr.bss); + + tbase = p->seg[TSEG]->base; + tsize = tbase - p->seg[TSEG]->top; + dbase = p->seg[DSEG]->base; + dsize = dbase - p->seg[DSEG]->top; + bbase = p->seg[BSEG]->base; + bsize = bbase - p->seg[BSEG]->top; + sbase = p->seg[SSEG]->base; + ssize = sbase - p->seg[SSEG]->top; + + // XXX: we are no longer contiguous. + textlim = ROUNDUP(hdrsz+textsz, BIGPGSZ); + // XXX: we are going to be at least two pages here. + datalim = BIGPGROUND(datasz); + bsslim = BIGPGROUND(datalim+bsssz); + + // XXX: this is pretty fragile + memmove((void*)dbase, (void*)(entry+textsz), datasz); + DBG("writing data dbase %#p tbase %#p textsz %ld datasz %ld\n", dbase, tbase, textsz, datasz); +// memmove((void*)dbase, (void*)"testing data", 13); + /* + * Check the binary header for consistency, + * e.g. the entry point is within the text segment and + * the segments don't overlap each other. + */ + // XXX: max instruction size on amd64 is 15 bytes provide a check for consistency. + DBG("kexec: entry %#p tbase %#p hdrsz %ld textsz %ld\n", entry, tbase, hdrsz, textsz); + if(entry < tbase+hdrsz || entry >= tbase+hdrsz+textsz) + error(Ebadexec); + // XXX: what about the kernel stack we are making here? + DBG("kexec: testing if sizes overflow limits\n"); + if(textsz >= textlim || datasz > datalim || bsssz > bsslim) + error(Ebadexec); + + DBG("kexec: do the top of the segments overflow limits?\n"); + if(textlim >= tbase+tsize || datalim >= dbase+dsize || bsslim >= bbase+bsize) + error(Ebadexec); + + DBG("kexec: is bss below data?\n"); + if(bsslim < datalim) + error(Ebadexec); + /* + Interesting thought, the previously allocated segments for + data and text are shared and constant. The BSS and the stack + are not. What you really want is the ability to make an + executable text and data and then create child executables on + top of that. This will lower external fragmentation and allow + a bunch of communicating shared memory processes (ie. go) in + kernel space. + + Fundamentally this means that the allocation of the text and + the data should be separate from the bss and the stack. This + will require that you change the linkers as well to allow the + separation of data and bss sections. + */ + + /* + * Stack is a pointer into the temporary stack + * segment, and will move as items are pushed. + */ + + // need to work something out here with the stack. + stack = sbase+ssize-sizeof(Tos); + + + /* + * XXX: When we are linking this how do we set the tos? We will need to change trap right? + */ + tos = (Tos*)stack; + tos->cyclefreq = m->cyclefreq; + cycles((uvlong*)&tos->pcycles); + tos->pcycles = -tos->pcycles; + tos->kcycles = tos->pcycles; + tos->clock = 0; + + DBG("kexec: argument processing\n"); + if(0) + for(i = 0;; i++, argv++){ + a = *(char**)validaddr(argv, sizeof(char**), 0); + if(a == nil) + break; + a = validaddr(a, 1, 0); + n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1; + + if(argc > 0 && i == 0) + continue; + + stack -= n; + if(stack < sbase+ssize-4096) + error(Enovmem); + args = UINT2PTR(stack); + memmove(args, a, n); + args[n-1] = 0; + argc++; + } + // DBG("kexec: ensuring we have argc\n"); + if(0) + if(argc < 1) + error(Ebadexec); + + a = args = UINT2PTR(stack); + stack = sysexecstack(stack, argc); + // XXX: look through math on this. look at ../../9/port/ exec.c + // YYY: this looks like a Jimism for 9k. + // DBG("kexec: ensuring the stack \n"); + if(0) + if(stack-(argc+1)*sizeof(char**)-BIGPGSZ < sbase+ssize-4096) + error(Ebadexec); + + argv = (char**)stack; + *--argv = nil; + // XXX: replace USTKTOP with a new variable representing the top of stack. + if(0) + for(i = 0; i < argc; i++){ + *--argv = args + (USTKTOP-sbase+ssize); + args += strlen(args) + 1; + } + + DBG("argsing\n"); + n = args - a; + if(0) + if(n <= 0) + error(Egreg); + if(n > 128) + n = 128; + DBG("kexec: allocating args\n"); + // XXX: hangs in smalloc, not sure why. +// args = smalloc(n); +// if(waserror()){ +// DBG("erroring\n"); +// free(args); +// nexterror(); +// } +// DBG("kexec: moving args\n"); +// memmove(args, a, n); +// if(0) +// while(n > 0 && (args[n-1] & 0xc0) == 0x80) +// n--; +// args[n-1] = '\0'; + + kstrdup(&p->text, "kexecproc"); + p->args = nil; + //elem; +// elem = nil; +// p->args = args; +// p->nargs = n; + poperror(); /* p (up->args) */ + + + + + +/* + qlock(&p->debug); + + sysprocsetup(p); + qunlock(&p->debug); +*/ + + // why is this sched and not ureg? + p->sched.pc = entry; + // the real question here is how do you set up the stack? + p->sched.sp = PTR2UINT(stack-BY2SE); + p->sched.sp = STACKALIGN(p->sched.sp); + + + // XXX: what does it imply if you have a kproc that runs on an ac? + if(core > 0){ + DBG("kexec: coring %d\n", core); + mp = p->ac; + mp->icc->flushtlb = 1; + mp->icc->rc = ICCOK; + + DBG("kexec: exotic proc on cpu%d\n", mp->machno); + qlock(&p->debug); + if(waserror()){ + DBG("kexec: had error"); + qunlock(&p->debug); + nexterror(); + } + p->nicc++; + p->state = Exotic; + p->psstate = 0; + DBG("kexec: unlocking"); + qunlock(&p->debug); + poperror(); + mfence(); + mp->icc->fn = (void*)entry; + sched(); + }else{ + DBG("kexec: readying\n"); + ready(p); + p->newtlb = 1; + mmuflush(); + } + DBG("kforkexecac up %#p done\n" + "textsz %lx datasz %lx bsssz %lx hdrsz %lx\n" + "textlim %ullx datalim %ullx bsslim %ullx\n", up, + textsz, datasz, bsssz, hdrsz, textlim, datalim, bsslim); +} + +void +syskforkexecac(Ar0* ar0, va_list list) +{ +// int core; +// uintptr base, size; +// char *file, **argv; + //XXX: get system call working. + USED(ar0, list); + + // XXX: fix sysexecregs + panic("syskforkexecac: don't call me yet"); + /* + * void* syskforkexecac(uintptr base, size, int core, char *ufile, char **argv) + */ +// base = va_arg(list, uintptr); +// size = va_arg(list, uintptr); +// core = va_arg(list, unsigned int); +// file = va_arg(list, char*); +// file = validaddr(file, 1, 0); +// argv = va_arg(list, char**); +// evenaddr(PTR2UINT(argv)); + // XXX: going to need to setup segs here. + //kforkexecac(p, core, file, argv); + // this is not going to work. I need to think about it. + // ar0->v = sysexecregs(entry, stack - PTR2UINT(argv), argc); + +} + + +void +printhello(void) +{ + print("hello\n"); +} + +void +printargs(char *arg) +{ + print("%#p %s\n", arg, arg); +} \ No newline at end of file diff -r 37ca23c2c31d -r 6ba826535859 sys/src/nix/port/kexec.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/nix/port/kexec.h Wed Mar 28 17:14:41 2012 +0200 @@ -0,0 +1,28 @@ +typedef struct Kvalue Kvalue; +typedef struct Kexecgrp Kexecgrp; + + +/* Kexec structures */ +struct Kvalue +{ + uintptr addr; + uvlong size; + int len; + int inuse; + Kvalue *link; + Qid qid; +}; + +struct Kexecgrp +{ + Ref; + RWlock; + Kvalue **ent; + int nent; + int ment; + ulong path; /* qid.path of next Kvalue to be allocated */ + ulong vers; /* of Kexecgrp */ +}; + +void kforkexecac(Proc*, int, char*, char**); +Proc* setupseg(int core); diff -r 37ca23c2c31d -r 6ba826535859 sys/src/nix/port/portdat.h --- a/sys/src/nix/port/portdat.h Wed Mar 28 10:41:35 2012 +0200 +++ b/sys/src/nix/port/portdat.h Wed Mar 28 17:14:41 2012 +0200 @@ -1147,6 +1147,8 @@ vlong offset; }; + + #define DEVDOTDOT -1 #pragma varargck type "I" uchar*