# HG changeset patch # User Francisco J Ballesteros # Date 1328736783 -3600 # Node ID 29403dfab2b1296d30ab2d13a911baec90fa0845 # Parent d2175c0b7b6ce3b0ec36457f2c05086ab40ad692 the: [damn] sched patch, uploaded again. I copied again the relevant files from our current version to a fresh clone of the repo. I think they didn't overwrite anything not already found in this files. There are more files than needed, as a reminder, because of the new sys->ticks and sys->load, which are no longer officially on mach 0. R=nixiedev, nemo, john CC=nix-dev http://codereview.appspot.com/5642062 diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/386/devether.c --- a/sys/src/nix/386/devether.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/386/devether.c Wed Feb 08 22:33:03 2012 +0100 @@ -119,7 +119,7 @@ if(bp == nil) return; memmove(bp->wp, pkt->d, n); - i = TK2MS(MACHP(0)->ticks); + i = TK2MS(sys->ticks); bp->wp[58] = len>>8; bp->wp[59] = len; bp->wp[60] = i>>24; diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/ip/ip.h --- a/sys/src/nix/ip/ip.h Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/ip/ip.h Wed Feb 08 22:33:03 2012 +0100 @@ -567,7 +567,7 @@ extern uchar v4prefix[IPaddrlen]; extern uchar IPallbits[IPaddrlen]; -#define NOW TK2MS(MACHP(0)->ticks) +#define NOW TK2MS(sys->machptr[0]->ticks) /* * media diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/k10/acore.c --- a/sys/src/nix/k10/acore.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/k10/acore.c Wed Feb 08 22:33:03 2012 +0100 @@ -101,12 +101,10 @@ acmmuswitch(); for(;;){ acstackok(); - m->load = 0; mwait(&m->icc->fn); if(m->icc->flushtlb) acmmuswitch(); DBG("acsched: cpu%d: fn %#p\n", m->machno, m->icc->fn); - m->load = 100; m->icc->fn(); DBG("acsched: cpu%d: idle\n", m->machno); mfence(); @@ -138,7 +136,6 @@ u = m->proc->dbgreg; DBG("cpu%d: touser usp = %#p entry %#p\n", m->machno, u->sp, u->ip); - m->load = 100; xactouser(u->sp); panic("actouser"); } @@ -156,7 +153,7 @@ * BUG: We should setup some trapenable() mechanism for the AC, * so that code like fpu.c could arrange for handlers specific for * the AC, instead of doint that by hand here. - * + * * All interrupts are masked while in the "kernel" */ void @@ -192,15 +189,15 @@ DBG("actrap: cpu%d: IPI\n", m->machno); apiceoi(IdtIPI); break; + case IdtTIMER: + apiceoi(IdtTIMER); + panic("timer interrupt in an AC"); + break; case IdtPF: /* this case is here for debug only */ m->pfault++; DBG("actrap: cpu%d: PF cr2 %#ullx\n", m->machno, cr2get()); break; - case IdtTIMER: - apiceoi(IdtTIMER); - panic("timer interrupt in an AC"); - break; default: print("actrap: cpu%d: %ulld\n", m->machno, u->type); } @@ -215,10 +212,8 @@ m->icc->fn = nil; ready(m->proc); - m->load = 0; mwait(&m->icc->fn); - m->load = 100; if(m->icc->flushtlb) acmmuswitch(); if(m->icc->fn != actrapret) @@ -252,7 +247,6 @@ mfence(); m->icc->fn = nil; ready(p); - m->load = 0; /* * The next call is probably going to make us jmp * into user code, forgetting all our state in this @@ -281,6 +275,13 @@ ndnr(); } +char *rolename[] = +{ + [NIXAC] "AC", + [NIXTC] "TC", + [NIXKC] "KC", + [NIXXC] "XC", +}; void acmodeset(int mode) @@ -289,9 +290,10 @@ case NIXAC: case NIXKC: case NIXTC: + case NIXXC: break; default: - panic("apmodeset: bad mode %d", mode); + panic("acmodeset: bad mode %d", mode); } m->nixtype = mode; } diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/k10/apic.c --- a/sys/src/nix/k10/apic.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/k10/apic.c Wed Feb 08 22:33:03 2012 +0100 @@ -319,7 +319,7 @@ if(m->machno == 0) apicrput(Tp, 0); - xlapicmachptr[apicno] = sys->machptr[m->machno]; + xlapicmachptr[apicno] = m; return 1; } diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/k10/dat.h --- a/sys/src/nix/k10/dat.h Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/k10/dat.h Wed Feb 08 22:33:03 2012 +0100 @@ -110,7 +110,6 @@ struct Conf { - ulong nmach; /* processors */ ulong nproc; /* processes */ Confmem mem[4]; /* physical memory */ uvlong npage; /* total physical pages of memory */ @@ -266,15 +265,14 @@ void* alarm; /* alarms bound to this clock */ int inclockintr; - Proc* readied; /* for runproc */ - ulong schedticks; /* next forced context switch */ + ulong qstart; /* time when up started running */ + int qexpired; /* quantum expired */ int tlbfault; int tlbpurge; int pfault; int cs; int syscall; - int load; int intr; int mmuflush; /* make current proc flush it's mmu state */ int ilockdepth; @@ -288,8 +286,6 @@ int cpumhz; u64int rdtsc; - Sched* sch; /* scheduler used */ - Lock pmclock; PmcCtr pmc[PmcMaxCtrs]; @@ -337,6 +333,11 @@ uintptr vmunmapped; /* 1st unmapped va */ uintptr vmend; /* 1st unusable va */ u64int epoch; /* crude time synchronisation */ + + int nc[NIXROLES]; /* number of online processors */ + int nmach; + int load; + ulong ticks; /* of the clock since boot time */ }; uchar syspage[4*KiB]; }; @@ -393,13 +394,14 @@ * MMU information array machptr, mainly for disambiguation and access to * the clock which is only maintained by the bootstrap processor (0). */ -#define MACHP(n) (sys->machptr[n]) - extern register Mach* m; /* R15 */ extern register Proc* up; /* R14 */ extern uintptr kseg0; +extern char*rolename[]; + + #pragma varargck type "P" uintmem /* @@ -416,3 +418,4 @@ extern char dbgflg[256]; #define dbgprint print /* for now */ + diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/k10/devacpi.c --- a/sys/src/nix/k10/devacpi.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/k10/devacpi.c Wed Feb 08 22:33:03 2012 +0100 @@ -783,15 +783,23 @@ { Srat *sl; Mach *m; + static int colors[32]; if(core < 0 || core >= MACHMAX) return -1; - m = MACHP(core); + m = sys->machptr[core]; if(m == nil) return -1; + + if(core >= 0 && core < nelem(colors) && colors[core] != 0) + return colors[core] - 1; + for(sl = srat; sl != nil; sl = sl->next) - if(sl->type == SRlapic && sl->lapic.apic == m->apicno) + if(sl->type == SRlapic && sl->lapic.apic == m->apicno){ + if(core >= 0 && core < nelem(colors)) + colors[core] = 1 + sl->lapic.dom; return sl->lapic.dom; + } return -1; } diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/k10/ioapic.c --- a/sys/src/nix/k10/ioapic.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/k10/ioapic.c Wed Feb 08 22:33:03 2012 +0100 @@ -180,10 +180,14 @@ ioapicdump(); } +static int dfpolicy = 0; + static void ioapicintrdd(u32int* hi, u32int* lo) { - static int i; + int i; + static int df; + static Lock dflock; /* * Set delivery mode (lo) and destination field (hi), @@ -200,21 +204,35 @@ * generations, both AMD and Intel, using the APIC and xAPIC. * * Interrupt routing policy can be set here. - * Currently, just assign each interrupt to a different CPU on - * a round-robin basis. Some idea of the packages/cores/thread - * topology would be useful here, e.g. to not assign interrupts - * to more than one thread in a core, or to use a "noise" core. - * But, as usual, Intel make that an onerous task. */ - for(;; i = (i+1) % nelem(xlapic)){ - if(!xlapic[i].useable) - continue; - if(sys->machptr[xlapic[i].machno] == nil) - continue; - if(sys->machptr[xlapic[i].machno]->online != 0) - break; + switch(dfpolicy){ + default: /* noise core 0 */ + *hi = sys->machptr[0]->apicno<<24; + break; + case 1: /* round-robin */ + /* + * Assign each interrupt to a different CPU on a round-robin + * Some idea of the packages/cores/thread topology would be + * useful here, e.g. to not assign interrupts to more than one + * thread in a core. But, as usual, Intel make that an onerous + * task. + */ + lock(&dflock); + for(;;){ + i = df++; + if(df >= sys->nmach+1) + df = 0; + if(sys->machptr[i] == nil || !sys->machptr[i]->online) + continue; + i = sys->machptr[i]->apicno; + if(xlapic[i].useable && xlapic[i].addr == 0) + break; + } + unlock(&dflock); + + *hi = i<<24; + break; } - *hi = i<<24; *lo |= Pm|MTf; } diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/k10/main.c --- a/sys/src/nix/k10/main.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/k10/main.c Wed Feb 08 22:33:03 2012 +0100 @@ -32,7 +32,7 @@ static int oargblen; static int maxcores = 1024; /* max # of cores given as an argument */ -static int numtcs = 16; /* initial # of TCs */ +static int numtcs = 32; /* initial # of TCs */ char dbgflg[256]; static int vflag = 0; @@ -82,20 +82,12 @@ } } -extern void setmachsched(Mach*); - void squidboy(int apicno) { - char *n[] = { - [NIXAC] "AC", - [NIXTC] "TC", - [NIXKC] "KC" - }; vlong hz; sys->machptr[m->machno] = m; - setmachsched(m); /* * Need something for initial delays * until a timebase is worked out. @@ -139,7 +131,7 @@ m->rdtsc = rdtsc(); print("cpu%d color %d role %s tsc %lld\n", - m->machno, corecolor(m->machno), n[m->nixtype], m->rdtsc); + m->machno, corecolor(m->machno), rolename[m->nixtype], m->rdtsc); switch(m->nixtype){ case NIXAC: acmmuswitch(); @@ -165,7 +157,7 @@ timersinit(); adec(&active.nbooting); - ainc(&active.nonline); /* this was commented out */ + ainc(&active.nonline); schedinit(); break; @@ -181,10 +173,9 @@ extern void testicc(int); /* setup arguments for all */ - for(i = 1; i < MACHMAX; i++) - if((mp = sys->machptr[i]) != nil && mp->online != 0) - if(mp->nixtype == NIXAC) - testicc(i); + for(i = 0; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->online && mp->nixtype == NIXAC) + testicc(i); print("bootcore: all cores done\n"); } @@ -203,7 +194,7 @@ uvlong now, start; for(i = 1; i < MACHMAX; i++) - if((mp = sys->machptr[i]) != nil && mp->online != 0){ + if((mp = sys->machptr[i]) != nil && mp->online){ /* * Inter-core calls. A ensure *mp->iccall and mp->icargs * go into different cache lines. @@ -211,9 +202,11 @@ mp->icc = mallocalign(sizeof *m->icc, ICCLNSZ, 0, 0); mp->icc->fn = nil; if(i < numtcs){ - conf.nmach++; + sys->nmach++; mp->nixtype = NIXTC; - } + sys->nc[NIXTC]++; + }else + sys->nc[NIXAC]++; ainc(&active.nbooting); } sys->epoch = rdtsc(); @@ -290,7 +283,7 @@ vsvminit(MACHSTKSZ, NIXTC); - conf.nmach = 1; + sys->nmach = 1; fmtinit(); print("\nNIX\n"); diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/k10/physalloc.c --- a/sys/src/nix/k10/physalloc.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/k10/physalloc.c Wed Feb 08 22:33:03 2012 +0100 @@ -347,6 +347,7 @@ uintmem m; m = 0; + color = *colorp; if(color >= 0){ color %= ndoms; diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/k10/tcore.c --- a/sys/src/nix/k10/tcore.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/k10/tcore.c Wed Feb 08 22:33:03 2012 +0100 @@ -27,7 +27,7 @@ mp = nil; if(core == 0) - panic("can't getac for a TC"); + panic("can't getac for a %s", rolename[NIXTC]); lock(&nixaclock); if(waserror()){ unlock(&nixaclock); @@ -44,12 +44,10 @@ Found: mp->proc = p; }else{ - for(i = 1; i < MACHMAX; i++){ - mp = sys->machptr[i]; - if(mp != nil && mp->online && mp->nixtype == NIXAC && - mp->proc == nil) - goto Found; - } + for(i = 0; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->online && mp->nixtype == NIXAC) + if(mp->proc == nil) + goto Found; error("not enough cores"); } unlock(&nixaclock); @@ -277,7 +275,7 @@ print("before PF:\n"); print("AC:\n"); dumpptepg(4, up->ac->pml4->pa); - print("\nTC:\n"); + print("\n%s:\n", rolename[NIXTC]); dumpptepg(4, m->pml4->pa); } trap(ureg); diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/alarm.c --- a/sys/src/nix/port/alarm.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/alarm.c Wed Feb 08 22:33:03 2012 +0100 @@ -14,7 +14,7 @@ ulong now; for(;;){ - now = MACHP(0)->ticks; + now = sys->ticks; qlock(&alarms); while((rp = alarms.head) && rp->alarm <= now){ if(rp->alarm != 0L){ @@ -46,7 +46,7 @@ ulong now; p = alarms.head; - now = MACHP(0)->ticks; + now = sys->ticks; if(p && p->alarm <= now) wakeup(&alarmr); @@ -59,14 +59,14 @@ ulong when, old; if(up->alarm) - old = tk2ms(up->alarm - MACHP(0)->ticks); + old = tk2ms(up->alarm - sys->ticks); else old = 0; if(time == 0) { up->alarm = 0; return old; } - when = ms2tk(time)+MACHP(0)->ticks; + when = ms2tk(time)+sys->ticks; qlock(&alarms); l = &alarms.head; diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/devaoe.c --- a/sys/src/nix/port/devaoe.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/devaoe.c Wed Feb 08 22:33:03 2012 +0100 @@ -260,7 +260,7 @@ srb = malloc(sizeof *srb+sz); srb->dp = srb->data = srb+1; - srb->ticksent = MACHP(0)->ticks; + srb->ticksent = sys->ticks; return srb; } @@ -271,7 +271,7 @@ srb = malloc(sizeof *srb); srb->dp = srb->data = db; - srb->ticksent = MACHP(0)->ticks; + srb->ticksent = sys->ticks; return srb; } @@ -396,7 +396,7 @@ { int n; - n = MACHP(0)->ticks & 0xffff; + n = sys->ticks & 0xffff; n -= tag & 0xffff; if(n < 0) n += 1<<16; @@ -410,7 +410,7 @@ do { t = ++d->lasttag << 16; - t |= MACHP(0)->ticks & 0xffff; + t |= sys->ticks & 0xffff; } while (t == Tfree || t == Tmgmt); return t; } @@ -497,7 +497,7 @@ downdev(d, "resend fails; no netlink/ea"); return -1; } - if(f->srb && MACHP(0)->ticks - f->srb->ticksent > Srbtimeout){ + if(f->srb && sys->ticks - f->srb->ticksent > Srbtimeout){ eventlog("%æ: srb timeout\n", d); frameerror(d, f, Etimedout); return -1; @@ -515,7 +515,7 @@ f->dl = l; f->nl = l->nl; f->eaidx = i; - f->ticksent = MACHP(0)->ticks; + f->ticksent = sys->ticks; return f->tag; } @@ -603,7 +603,7 @@ } nbc = Nbcms/Nms; } - starttick = MACHP(0)->ticks; + starttick = sys->ticks; rlock(&devs); for(d = devs.d; d; d = d->next){ if(!canqlock(d)) @@ -626,7 +626,7 @@ if(d->nout == d->maxout){ if(d->maxout > 1) d->maxout--; - d->lastwadj = MACHP(0)->ticks; + d->lastwadj = sys->ticks; } a = (Aoeata*)f->hdr; if(a->scnt > Dbcnt / Aoesectsz && @@ -645,14 +645,14 @@ } } if(d->nout == d->maxout && d->maxout < d->nframes && - TK2MS(MACHP(0)->ticks - d->lastwadj) > 10*1000){ + TK2MS(sys->ticks - d->lastwadj) > 10*1000){ d->maxout++; - d->lastwadj = MACHP(0)->ticks; + d->lastwadj = sys->ticks; } qunlock(d); } runlock(&devs); - i = Nms - TK2MS(MACHP(0)->ticks - starttick); + i = Nms - TK2MS(sys->ticks - starttick); if(i > 0) tsleep(&up->sleep, return0, 0, i); goto loop; diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/devcons.c --- a/sys/src/nix/port/devcons.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/devcons.c Wed Feb 08 22:33:03 2012 +0100 @@ -366,7 +366,7 @@ for(i=0; i<1000; i++){ if(canlock(l)) return 1; - if(l->m == MACHP(m->machno)) + if(l->m == m) return 0; microdelay(100); } @@ -820,11 +820,10 @@ { ulong l; Mach *mp; - char *b, *bp, ch, *s; + char *b, *bp, ch, *s, *e; char tmp[512]; /* Qswap is 381 bytes at clu */ int i, k, id, send; long offset; - extern int schedsteals, scheddonates; if(n <= 0) @@ -890,7 +889,7 @@ for(i=0; i<6 && NUMSIZE*itime[i]; if(i == TReal) - l = MACHP(0)->ticks - l; + l = sys->ticks - l; l = TK2MS(l); readnum(0, tmp+NUMSIZE*i, NUMSIZE, l, NUMSIZE); } @@ -944,11 +943,12 @@ return 0; case Qsysstat: - b = smalloc(MACHMAX*(NUMSIZE*11+2+1) + 1); /* +1 for NUL */ + n = MACHMAX*(NUMSIZE*11+2+1); + b = smalloc(n + 1); /* +1 for NUL */ bp = b; - for(id = 0; id < MACHMAX; id++) { - mp = sys->machptr[id]; - if(mp != nil && mp->online) { + e = bp + n; + for(id = 0; id < MACHMAX; id++) + if((mp = sys->machptr[id]) != nil && mp->online){ readnum(0, bp, NUMSIZE, mp->machno, NUMSIZE); bp += NUMSIZE; readnum(0, bp, NUMSIZE, mp->cs, NUMSIZE); @@ -963,7 +963,7 @@ bp += NUMSIZE; readnum(0, bp, NUMSIZE, mp->tlbpurge, NUMSIZE); bp += NUMSIZE; - readnum(0, bp, NUMSIZE, mp->load, NUMSIZE); + readnum(0, bp, NUMSIZE, sys->load, NUMSIZE); bp += NUMSIZE; readnum(0, bp, NUMSIZE, (mp->perf.avg_inidle*100)/mp->perf.period, @@ -973,22 +973,11 @@ (mp->perf.avg_inintr*100)/mp->perf.period, NUMSIZE); bp += NUMSIZE; - readnum(0, bp, NUMSIZE, (mp->sch - run), NUMSIZE); + readnum(0, bp, NUMSIZE, 0, NUMSIZE); /* sched # */ bp += NUMSIZE; - switch(mp->nixtype){ - case NIXAC: - strcpy(bp, "AC"); - break; - case NIXKC: - strcpy(bp, "KC"); - break; - default: - strcpy(bp, "TC"); - } - bp += 2; + bp = strecpy(bp, e, rolename[mp->nixtype]); *bp++ = '\n'; } - } if(waserror()){ free(b); nexterror(); @@ -1035,9 +1024,7 @@ return n; case Qdebug: - s = seprint(tmp, tmp + sizeof tmp, "steal %d\n", schedsteals); - s = seprint(s, tmp + sizeof tmp, "donate %d\n", scheddonates); - s = seprint(s, tmp + sizeof tmp, "locks %uld\n", lockstats.locks); + s = seprint(tmp, tmp + sizeof tmp, "locks %uld\n", lockstats.locks); s = seprint(s, tmp + sizeof tmp, "glare %uld\n", lockstats.glare); s = seprint(s, tmp + sizeof tmp, "inglare %uld\n", lockstats.inglare); s = seprint(s, tmp + sizeof tmp, "qlock %uld\n", qlockstats.qlock); @@ -1062,7 +1049,6 @@ ulong offset; Cmdbuf *cb; Cmdtab *ct; - extern int schedsteals, scheddonates; a = va; offset = off; @@ -1155,8 +1141,8 @@ case Qsysstat: for(i = 0; i < MACHMAX; i++) - if((mp = sys->machptr[i]) != nil && mp->online != 0){ - mp = MACHP(i); + if((mp = sys->machptr[i]) != nil && mp->online){ + mp = sys->machptr[i]; mp->cs = 0; mp->intr = 0; mp->syscall = 0; @@ -1200,16 +1186,7 @@ buf[n] = 0; if(n > 0 && buf[n-1] == '\n') buf[n-1] = 0; - if(strcmp(buf, "steal") == 0) - schedsteals = 1; - else if(strcmp(buf, "nosteal") == 0) - schedsteals = 0; - else if(strcmp(buf, "donate") == 0) - scheddonates = 1; - else if(strcmp(buf, "nodonate") == 0) - scheddonates = 0; - else - error(Ebadctl); + error(Ebadctl); break; default: print("conswrite: %#llux\n", c->qid.path); @@ -1255,7 +1232,7 @@ { if(randn == 0) seedrand(); - randn = randn*1103515245 + 12345 + MACHP(0)->ticks; + randn = randn*1103515245 + 12345 + sys->ticks; return (randn>>16) % n; } diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/devpipe.c --- a/sys/src/nix/port/devpipe.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/devpipe.c Wed Feb 08 22:33:03 2012 +0100 @@ -44,7 +44,7 @@ enum { - /* Plan 9 default for conf.nmach > 1 */ + /* Plan 9 default for nmach > 1 */ Pipeqsize = 256*1024 }; diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/devpmc.c --- a/sys/src/nix/port/devpmc.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/devpmc.c Wed Feb 08 22:33:03 2012 +0100 @@ -114,13 +114,12 @@ _pmcupdate = pmcupdate; ncores = 0; nr = pmcnregs(); - for(i = 0; i < MACHMAX; i++) { - if((mp = sys->machptr[i]) != nil && mp->online != 0){ + for(i = 0; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->online){ ncores++; for(j = 0; j < nr; j++) pmcnull(&mp->pmc[j]); } - } topdirinit(ncores); ctrdirinit(); } diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/devproc.c --- a/sys/src/nix/port/devproc.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/devproc.c Wed Feb 08 22:33:03 2012 +0100 @@ -14,6 +14,7 @@ { Qdir, Qtrace, + Qtracepids, Qargs, Qctl, Qfd, @@ -70,6 +71,7 @@ enum{ Nevents = 0x4000, Emask = Nevents - 1, + Ntracedpids = 1024, }; /* + 6 * 12 for extra NIX counters. */ @@ -160,6 +162,7 @@ static void mntscan(Mntwalk*, Proc*); static Traceevent *tevents; +static char *tpids, *tpidsc, *tpidse; static Lock tlock; static int topens; static int tproduced, tconsumed; @@ -206,7 +209,13 @@ devdir(c, qid, up->genbuf, 0, eve, 0444, dp); return 1; } - + if(s == 1){ + strcpy(up->genbuf, "tracepids"); + mkqid(&qid, Qtracepids, -1, QTFILE); + devdir(c, qid, up->genbuf, 0, eve, 0444, dp); + return 1; + } + s -= 2; if(name != nil){ /* ignore s and use name to find pid */ pid = strtol(name, &ename, 10); @@ -216,7 +225,7 @@ if(s < 0) return -1; } - else if(--s >= conf.nproc) + else if(s >= conf.nproc) return -1; if((p = psincref(s)) == nil || (pid = p->pid) == 0) @@ -239,6 +248,12 @@ devdir(c, qid, up->genbuf, 0, eve, 0444, dp); return 1; } + if(c->qid.path == Qtracepids){ + strcpy(up->genbuf, "tracepids"); + mkqid(&qid, Qtrace, -1, QTFILE); + devdir(c, qid, up->genbuf, 0, eve, 0444, dp); + return 1; + } if(s >= nelem(procdir)) return -1; if(tab) @@ -279,17 +294,24 @@ notrace(Proc*, Tevent, vlong) { } +static Lock tlck; static void _proctrace(Proc* p, Tevent etype, vlong ts) { Traceevent *te; + int tp; + ilock(&tlck); if (p->trace == 0 || topens == 0 || - tproduced - tconsumed >= Nevents) + tproduced - tconsumed >= Nevents){ + iunlock(&tlck); return; + } + tp = tproduced++; + iunlock(&tlck); - te = &tevents[tproduced&Emask]; + te = &tevents[tp&Emask]; te->pid = p->pid; te->etype = etype; if (ts == 0) @@ -297,9 +319,19 @@ else te->time = ts; te->core = m->machno; - tproduced++; } +void +proctracepid(Proc *p) +{ + if(p->trace == 1 && proctrace != notrace){ + p->trace = 2; + ilock(&tlck); + tpidsc = seprint(tpidsc, tpidse, "%d %s\n", p->pid, p->text); + iunlock(&tlck); + } +} + static void procinit(void) { @@ -368,8 +400,12 @@ topens++; if (tevents == nil){ tevents = (Traceevent*)malloc(sizeof(Traceevent) * Nevents); - if(tevents == nil) + tpids = malloc(Ntracedpids * 20); + if(tevents == nil || tpids == nil) error(Enomem); + tpidsc = tpids; + tpidse = tpids + Ntracedpids * 20; + *tpidsc = 0; tproduced = tconsumed = 0; } proctrace = _proctrace; @@ -381,7 +417,14 @@ c->offset = 0; return c; } - + if(QID(c->qid) == Qtracepids){ + if (omode != OREAD) + error(Eperm); + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; + } if((p = psincref(SLOT(c->qid))) == nil) error(Eprocdied); qlock(&p->debug); @@ -699,7 +742,7 @@ procread(Chan *c, void *va, long n, vlong off) { Proc *p; - Mach *ac; + Mach *ac, *wired; long l, r; Waitq *wq; Ureg kur; @@ -741,6 +784,12 @@ return rptr - (uchar*)va; } + if(QID(c->qid) == Qtracepids) + if(tpids == nil) + return 0; + else + return readstr(off, va, n, tpids); + if((p = psincref(SLOT(c->qid))) == nil || p->pid != PID(c->qid)) error(Eprocdied); @@ -768,8 +817,11 @@ case Qcore: i = 0; ac = p->ac; + wired = p->wired; if(ac != nil) i = ac->machno; + else if(wired != nil) + i = wired->machno; snprint(statbuf, sizeof statbuf, "%d\n", i); return readstr(offset, va, n, statbuf); @@ -917,7 +969,7 @@ for(i = 0; i < 6; i++) { l = p->time[i]; if(i == TReal) - l = MACHP(0)->ticks - l; + l = sys->ticks - l; l = TK2MS(l); readnum(0, statbuf+j+NUMSIZE*i, NUMSIZE, l, NUMSIZE); } @@ -1496,10 +1548,10 @@ case CMtrace: switch(cb->nf){ case 1: - p->trace ^= 1; + p->trace = (p->trace?0:1); break; case 2: - p->trace = (atoi(cb->f[1]) != 0); + p->trace = (atoi(cb->f[1])?1:0); break; default: error("args"); diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/devzp.c --- a/sys/src/nix/port/devzp.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/devzp.c Wed Feb 08 22:33:03 2012 +0100 @@ -164,7 +164,6 @@ zqwrite(Zq *q, Kzio io[], int nio) { int i, ei, ri, wi, awake; - Proc *p; lock(q); if(waserror()){ @@ -208,13 +207,8 @@ zqdump(q); poperror(); unlock(q); - if(awake){ - p = wakeup(&q->rr); - - /* if we just wokeup a higher priority process, let it run */ - if(p != nil && p->priority > up->priority) - sched(); - } + if(awake) + wakeup(&q->rr); return nio; } diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/edf.c --- a/sys/src/nix/port/edf.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/edf.c Wed Feb 08 22:33:03 2012 +0100 @@ -132,6 +132,7 @@ { /* Proc reached deadline */ extern int panicking; + Sched *sch; Proc *p; if(panicking || active.exiting) @@ -151,8 +152,8 @@ if(up->trace) proctrace(up, SInts, 0); up->delaysched++; - assert(m->sch); - m->sch->delayedscheds++; + sch = procsched(up); + sch->delayedscheds++; } } @@ -296,8 +297,10 @@ { Edf *e; long tns; + Sched *sch; e = p->edf; + sch = procsched(p); /* Called with edflock held */ if(edfpri){ tns = e->d - now; @@ -306,8 +309,7 @@ * deschedule forthwith */ p->delaysched++; - assert(m->sch); - m->sch->delayedscheds++; + sch->delayedscheds++; e->s = now; return; } diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/lib.h --- a/sys/src/nix/port/lib.h Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/lib.h Wed Feb 08 22:33:03 2012 +0100 @@ -163,6 +163,8 @@ NIXTC = 0, NIXKC, NIXAC, + NIXXC, + NIXROLES, }; /* diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/page.c --- a/sys/src/nix/port/page.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/page.c Wed Feb 08 22:33:03 2012 +0100 @@ -283,7 +283,7 @@ p->ref++; p->va = va; p->modref = 0; - for(i = 0; i < MACHMAX; i++) + for(i = 0; i < nelem(p->cachectl); i++) p->cachectl[i] = ct; unlock(p); unlock(&pga); diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/portdat.h --- a/sys/src/nix/port/portdat.h Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/portdat.h Wed Feb 08 22:33:03 2012 +0100 @@ -676,6 +676,15 @@ RFCCORE = (1<<18), }; +/* execac */ +enum +{ + EXTC = 0, /* exec on time-sharing */ + EXAC, /* want an AC for the exec'd image */ + EXXC, /* want an XC for the exec'd image */ +}; + + /* * process memory segments - NSEG always last ! * HSEG is a potentially huge bss segment. @@ -753,8 +762,6 @@ ulong nrun; /* to compute load */ }; -extern Sched run[]; - typedef union Ar0 Ar0; union Ar0 { int i; diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/portfns.h --- a/sys/src/nix/port/portfns.h Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/portfns.h Wed Feb 08 22:33:03 2012 +0100 @@ -256,6 +256,7 @@ void procrestore(Proc*); void procsave(Proc*); void (*proctrace)(Proc*, int, vlong); +void proctracepid(Proc*); void procwired(Proc*, int); void psdecref(Proc*); Proc* psincref(int); diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/proc.c --- a/sys/src/nix/port/proc.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/proc.c Wed Feb 08 22:33:03 2012 +0100 @@ -13,22 +13,15 @@ { Scaling=2, - /* - * number of schedulers used. - * 1 uses just one, which is the behavior of Plan 9. - */ - Nsched = 16, + AMPmincores = 5, }; Ref noteidalloc; static Ref pidalloc; -/* - * Because machines with many cores are NUMA, we try to use - * a different scheduler per color - */ -Sched run[Nsched]; +static Sched run; + struct Procalloc procalloc; @@ -42,9 +35,6 @@ static void rebalance(void); -int schedsteals = 1; -int scheddonates = 0; - char *statename[] = { /* BUG: generate automatically */ "Dead", @@ -64,30 +54,10 @@ "Down", }; -void -setmachsched(Mach *mp) +Sched* +procsched(Proc *) { - int color; - - color = corecolor(mp->machno); - if(color < 0){ - print("unknown color for cpu%d\n", mp->machno); - color = 0; - } - mp->sch = &run[color%Nsched]; -} - -Sched* -procsched(Proc *p) -{ - Mach *pm; - - pm = p->mp; - if(pm == nil) - pm = m; - if(pm->sch == nil) - setmachsched(pm); - return pm->sch; + return &run; } /* @@ -96,10 +66,7 @@ void procinit0(void) { - int i; - - for(i = 0; i < Nsched; i++) - run[i].schedgain = 30; + run.schedgain = 30; } @@ -112,16 +79,16 @@ Edf *e; m->inidle = 1; - if(m->sch == nil){ - print("schedinit: no sch for cpu%d\n", m->machno); - setmachsched(m); - } - ainc(&m->sch->nmach); + m->proc = nil; + ainc(&run.nmach); setlabel(&m->sched); if(up) { if((e = up->edf) && (e->flags & Admitted)) edfrecord(up); + m->qstart = 0; + m->qexpired = 0; + coherence(); m->proc = 0; switch(up->state) { case Running: @@ -177,9 +144,7 @@ sched(void) { Proc *p; - Sched *sch; - sch = m->sch; if(m->ilockdepth) panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p", m->machno, @@ -209,13 +174,12 @@ || pga.Lock.p == up || procalloc.Lock.p == up){ up->delaysched++; - sch->delayedscheds++; + run.delayedscheds++; return; } up->delaysched = 0; splhi(); - /* statistics */ if(up->nqtrap == 0 && up->nqsyscall == 0) up->nfullq++; @@ -232,21 +196,21 @@ } gotolabel(&m->sched); } + m->inidle = 1; - p = runproc(); + p = runproc(); /* core 0 never returns */ m->inidle = 0; + if(!p->edf){ updatecpu(p); p->priority = reprioritize(p); } - if(p != m->readied) - m->schedticks = m->ticks + HZ/10; - m->readied = 0; up = p; + m->qstart = m->ticks; up->nqtrap = 0; up->nqsyscall = 0; up->state = Running; - up->mach = MACHP(m->machno); + up->mach = m; m->proc = up; mmuswitch(up); @@ -257,31 +221,36 @@ int anyready(void) { - return m->sch->runvec; + return run.runvec; } int anyhigher(void) { - return m->sch->runvec & ~((1<<(up->priority+1))-1); + return run.runvec & ~((1<<(up->priority+1))-1); } /* * here once per clock tick to see if we should resched */ + void hzsched(void) { /* once a second, rebalance will reprioritize ready procs */ - if(m->machno == 0) + if(m->machno == 0){ rebalance(); + return; + } - /* unless preempted, get to run for at least 100ms */ - if(anyhigher() - || (!up->fixedpri && m->ticks > m->schedticks && anyready())){ - m->readied = nil; /* avoid cooperative scheduling */ + /* with <= 4 cores, we use SMP and core 0 does not set qexpired for us */ + if(sys->nmach <= AMPmincores) + if(m->ticks - m->qstart >= HZ/10) + m->qexpired = 1; + + /* unless preempted, get to run */ + if(m->qexpired && anyready()) up->delaysched++; - } } /* @@ -295,7 +264,13 @@ if(up->preempted == 0) if(anyhigher()) if(!active.exiting){ - m->readied = nil; /* avoid cooperative scheduling */ + /* Core 0 is dispatching all interrupts, so no core + * actually running a user process is ever going call preempted, unless + * we consider IPIs for preemption or we distribute interrupts. + * But we are going to use SMP for machines with few cores. + panic("preemted used"); + */ + up->preempted = 1; sched(); splhi(); @@ -353,15 +328,13 @@ if(p->edf) return; - t = MACHP(0)->ticks*Scaling + Scaling/2; + t = sys->ticks*Scaling + Scaling/2; n = t - p->lastupdate; p->lastupdate = t; if(n == 0) return; - if(m->sch == nil) /* may happen during boot */ - return; - D = m->sch->schedgain*HZ*Scaling; + D = run.schedgain*HZ*Scaling; if(n > D) n = D; @@ -379,7 +352,7 @@ /* * On average, p has used p->cpu of a cpu recently. - * Its fair share is conf.nmach/m->load of a cpu. If it has been getting + * Its fair share is nmach/m->load of a cpu. If it has been getting * too much, penalize it. If it has been getting not enough, reward it. * I don't think you can get much more than your fair share that * often, so most of the queues are for using less. Having a priority @@ -391,7 +364,7 @@ { int fairshare, n, load, ratio; - load = MACHP(0)->load; + load = sys->load; if(load == 0) return p->basepri; @@ -400,7 +373,7 @@ * except the decimal point is moved three places * on both load and fairshare. */ - fairshare = (conf.nmach*1000*1000)/load; + fairshare = (sys->nmach*1000*1000)/load; n = p->cpu; if(n == 0) n = 1; @@ -499,16 +472,6 @@ return; } - if(m->nixtype == NIXAC) - MACHP(0)->readied = p; - - /* - * BUG: if schedready is called to rebalance the scheduler, - * for another core, then this is wrong. - */ - if(up != p) - m->readied = p; /* group scheduling */ - updatecpu(p); pri = reprioritize(p); p->priority = pri; @@ -553,22 +516,20 @@ { Mpl pl; int pri, npri, t; - Sched *sch; Schedq *rq; Proc *p; - sch = m->sch; t = m->ticks; - if(t - sch->balancetime < HZ) + if(t - run.balancetime < HZ) return; - sch->balancetime = t; + run.balancetime = t; - for(pri=0, rq=sch->runq; prihead; if(p == nil) continue; - if(p->mp != MACHP(m->machno)) + if(p->mp != m) continue; if(pri == p->basepri) continue; @@ -576,110 +537,159 @@ npri = reprioritize(p); if(npri != pri){ pl = splhi(); - p = dequeueproc(sch, rq, p); + p = dequeueproc(&run, rq, p); if(p) - queueproc(sch, &sch->runq[npri], p, 0); + queueproc(&run, &run.runq[npri], p, 0); splx(pl); goto another; } } } +/* + * Process p is ready to run, but there's no available core. + * Try to make a core available by + * 1. preempting a process with lower priority, or + * 2. preempting one with the same priority that had more than HZ/10, or + * 3. rescheduling one that run more than HZ, in the hope he gets his priority lowered. + */ +static void +preemptfor(Proc *p) +{ + ulong delta; + uint i, rr; + Proc *mup; + Mach *mp; -/* - * Is this scheduler overloaded? - * should it pass processes to any other underloaded scheduler? - */ -static int -overloaded(Sched *sch) -{ - return sch->nmach != 0 && sch->nrdy > sch->nmach; + assert(m->machno == 0); + /* + * try to preempt a lower priority process first, default back to + * round robin otherwise. + */ + for(rr = 0; rr < 2; rr++) + for(i = 0; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->online && mp->nixtype == NIXTC){ + if(mp == m) + continue; + /* + * Caution here: mp->proc can change, even die. + */ + mup = mp->proc; + if(mup == nil) /* one got idle */ + return; + delta = mp->ticks - mp->qstart; + if(mup->priority < p->priority){ + mp->qexpired = 1; + return; + } + if(rr && mup->priority == p->priority && delta > HZ/10){ + mp->qexpired = 1; + return; + } + if(rr & delta > HZ){ + mp->qexpired = 1; + return; + } + } } /* - * Is it reasonable to give processes to this scheduler? + * Scheduling thread run as the main loop of cpu 0 + * Used in AMP sched. */ -static int -underloaded(Sched *sch) +static void +mach0sched(void) { - return sch->nrdy < sch->nmach; -} + Schedq *rq; + Proc *p; + Mach *mp; + ulong start, now; + int n, i; -static void -ipisched(Sched *sch) -{ - Mach* mp; - int i; + assert(m->machno == 0); + acmodeset(NIXKC); /* we don't time share any more */ + n = 0; + start = perfticks(); +loop: - for(i = 0; i < MACHMAX; i++){ - mp = sys->machptr[i]; - if(mp != nil && mp != m && mp->online && mp->sch == sch) - apicipi(mp->apicno); + /* + * find a ready process that we might run. + */ + spllo(); + for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--) + for(p = rq->head; p; p = p->rnext){ + /* + * wired processes may only run when their core is available. + */ + if(p->wired != nil){ + if(p->wired->proc == nil) + goto found; + continue; + } + /* + * find a ready process that did run at an available core + * or one that has not moved for some time. + */ + if(p->mp == nil || p->mp->proc == nil || n>0) + goto found; + } + /* waste time or halt the CPU */ + idlehands(); + /* remember how much time we're here */ + now = perfticks(); + m->perf.inidle += now-start; + start = now; + n++; + goto loop; + +found: + assert(m->machno == 0); + splhi(); + /* + * find a core for this process, but honor wiring. + */ + mp = p->wired; + if(mp != nil){ + if(mp->proc != nil) + goto loop; + }else{ + for(i = 0; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->online && mp->nixtype == NIXTC) + if(mp != m && mp->proc == nil) + break; + if(i == MACHMAX){ + preemptfor(p); + goto loop; + } } + + p = dequeueproc(&run, rq, p); + mp->proc = p; + if(p != nil){ + p->state = Scheding; + p->mp = mp; + } + + n = 0; + goto loop; } /* - * If we are idle, check if another scheduler is overloaded and - * steal a new process from it. But steal low priority processes to - * avoid disturbing high priority ones. + * SMP performs better than AMP with few cores. + * So, leave this here by now. We should probably + * write a unified version of runproc good enough for + * both SMP and AMP. */ static Proc* -steal(void) -{ - static int last; /* donate in round robin */ - int start, i; - Schedq *rq; - Sched *sch; - Proc *p; - - /* - * measures show that stealing is expensive, we are donating - * by now but only when calling exec(). See maydonate(). - */ - if(!schedsteals) - return nil; - - start = last; - for(i = 0; i < Nsched; i++){ - last = (start+i)%Nsched; - sch = &run[last]; - if(sch == m->sch || sch->nmach == 0 || !overloaded(sch)) - continue; - for(rq = &sch->runq[Nrq-1]; rq >= sch->runq; rq--){ - for(p = rq->head; p != nil; p = p->rnext) - if(!p->wired && p->priority < PriKproc) - break; - if(p != nil && dequeueproc(sch, rq, p) != nil) - return p; - } - } - return nil; -} - -/* - * pick a process to run - */ -Proc* -runproc(void) +smprunproc(void) { Schedq *rq; - Sched *sch; Proc *p; ulong start, now; int i; start = perfticks(); - sch = m->sch; - /* cooperative scheduling until the clock ticks */ - if((p=m->readied) && p->mach==0 && p->state==Ready - && sch->runq[Nrq-1].head == nil && sch->runq[Nrq-2].head == nil - && (!p->wired || p->wired == m)){ - sch->skipscheds++; - rq = &sch->runq[p->priority]; - goto found; - } - - sch->preempts++; + run.preempts++; loop: /* @@ -694,19 +704,14 @@ * processor can run given affinity constraints. * */ - for(rq = &sch->runq[Nrq-1]; rq >= sch->runq; rq--){ + for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){ for(p = rq->head; p; p = p->rnext){ - if(p->mp == nil || p->mp == MACHP(m->machno) + if(p->mp == nil || p->mp == sys->machptr[m->machno] || (!p->wired && i > 0)) goto found; } } - p = steal(); - if(p != nil){ - splhi(); - goto stolen; - } /* waste time or halt the CPU */ idlehands(); /* remember how much time we're here */ @@ -717,15 +722,15 @@ found: splhi(); - p = dequeueproc(sch, rq, p); + p = dequeueproc(&run, rq, p); if(p == nil) goto loop; -stolen: + p->state = Scheding; - p->mp = MACHP(m->machno); + p->mp = sys->machptr[m->machno]; if(edflock(p)){ - edfrun(p, rq == &sch->runq[PriEdf]); /* start deadline timer and do admin */ + edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */ edfunlock(); } if(p->trace) @@ -733,6 +738,55 @@ return p; } +/* + * pick a process to run. + * most of this is used in AMP sched. + * (on a quad core or less, we use SMP). + * In the case of core 0 we always return nil, but + * schedule the picked process at any other available TC. + * In the case of other cores we wait until a process is given + * by core 0. + */ +Proc* +runproc(void) +{ + Schedq *rq; + Proc *p; + ulong start, now; + + if(sys->nmach <= AMPmincores) + return smprunproc(); + + start = perfticks(); + run.preempts++; + rq = nil; + if(m->machno != 0){ + do{ + spllo(); + while(m->proc == nil) + idlehands(); + now = perfticks(); + m->perf.inidle += now-start; + start = now; + splhi(); + p = m->proc; + }while(p == nil); + p->state = Scheding; + p->mp = sys->machptr[m->machno]; + + if(edflock(p)){ + edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */ + edfunlock(); + } + if(p->trace) + proctrace(p, SRun, 0); + return p; + } + + mach0sched(); + return nil; /* not reached */ +} + int canpage(Proc *p) { @@ -811,7 +865,7 @@ p->wired = 0; procpriority(p, PriNormal, 0); p->cpu = 0; - p->lastupdate = MACHP(0)->ticks*Scaling; + p->lastupdate = sys->ticks*Scaling; p->edf = nil; p->ntrap = 0; @@ -850,15 +904,15 @@ psdecref(pp); } bm = 0; - for(i=0; inmach; i++) if(nwired[i] < nwired[bm]) bm = i; } else { /* use the virtual machine requested */ - bm = bm % conf.nmach; + bm = bm % sys->nmach; } - p->wired = MACHP(bm); + p->wired = sys->machptr[bm]; p->mp = p->wired; /* @@ -1287,7 +1341,7 @@ stime = up->time[TSys] + up->time[TCSys]; wq->w.time[TUser] = tk2ms(utime); wq->w.time[TSys] = tk2ms(stime); - wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]); + wq->w.time[TReal] = tk2ms(sys->ticks - up->time[TReal]); if(exitstr && exitstr[0]) snprint(wq->w.msg, sizeof(wq->w.msg), "%s %d: %s", up->text, up->pid, exitstr); @@ -1465,6 +1519,7 @@ { int i, ns, nm, nwait; Proc *p; + Mach *mp; /* * tell all processes with this @@ -1479,12 +1534,12 @@ for(ns = 0; ns < NSEG; ns++){ if(p->seg[ns] == s){ p->newtlb = 1; - for(nm = 0; nm < conf.nmach; nm++){ - if(MACHP(nm)->proc == p){ - MACHP(nm)->mmuflush = 1; - nwait++; - } - } + for(nm = 0; nm < MACHMAX; nm++) + if((mp = sys->machptr[nm]) != nil && mp->online) + if(mp->proc == p){ + mp->mmuflush = 1; + nwait++; + } break; } } @@ -1501,31 +1556,29 @@ * In that case we must IPI it, but only if that core is * using this segment. */ - for(nm = 0; nm < conf.nmach; nm++) - if(MACHP(nm) != m) - while(MACHP(nm)->mmuflush) - sched(); + for(i = 0; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->online) + if(mp != m) + while(mp->mmuflush) + sched(); } void scheddump(void) { Proc *p; - Sched *sch; Schedq *rq; - for(sch = run; sch < &run[Nsched]; sch++){ - for(rq = &sch->runq[Nrq-1]; rq >= sch->runq; rq--){ - if(rq->head == 0) - continue; - print("sch%ld rq%ld:", sch - run, rq-sch->runq); - for(p = rq->head; p; p = p->rnext) - print(" %d(%lud)", p->pid, m->ticks - p->readytime); - print("\n"); - delay(150); - } - print("sch%ld: nrdy %d\n", sch - run, sch->nrdy); + for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){ + if(rq->head == 0) + continue; + print("run[%ld]:", rq-run.runq); + for(p = rq->head; p; p = p->rnext) + print(" %d(%lud)", p->pid, m->ticks - p->readytime); + print("\n"); + delay(150); } + print("nrdy %d\n", run.nrdy); } void @@ -1568,7 +1621,7 @@ incref(kpgrp); memset(p->time, 0, sizeof(p->time)); - p->time[TReal] = MACHP(0)->ticks; + p->time[TReal] = sys->ticks; ready(p); /* * since the bss/data segments are now shareable, @@ -1744,20 +1797,19 @@ /* * time accounting called by clock() splhi'd - * only cpu0 computes system load average + * only cpu1 computes system load average + * but the system load average is accounted for cpu0. */ void accounttime(void) { - Sched *sch; Proc *p; ulong n, per; - sch = m->sch; p = m->proc; if(p) { - if(m->machno == 0) - sch->nrun++; + if(m->machno == 1) + run.nrun++; p->time[p->insyscall]++; } @@ -1775,8 +1827,10 @@ m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ; m->perf.inintr = 0; - /* only one processor gets to compute system load averages */ - if(m->machno != 0) + /* only one processor gets to compute system load averages. + * it has to be mach 1 when we use AMP. + */ + if(sys->nmach > 1 && m->machno != 1) return; /* @@ -1788,16 +1842,16 @@ * approximately the load over the last second, * with a tail lasting about 5 seconds. */ - n = sch->nrun; - sch->nrun = 0; - n = (sch->nrdy+n)*1000; - m->load = (m->load*(HZ-1)+n)/HZ; + n = run.nrun; + run.nrun = 0; + n = (run.nrdy+n)*1000; + sys->load = (sys->load*(HZ-1)+n)/HZ; } void halt(void) { - if(m->sch->nrdy != 0) + if(run.nrdy != 0) return; hardhalt(); } diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/qio.c --- a/sys/src/nix/port/qio.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/qio.c Wed Feb 08 22:33:03 2012 +0100 @@ -1174,7 +1174,6 @@ qbwrite(Queue *q, Block *b) { int n, dowakeup; - Proc *p; n = BLEN(b); @@ -1236,13 +1235,8 @@ q->kick(q->arg); /* wakeup anyone consuming at the other end */ - if(dowakeup){ - p = wakeup(&q->rr); - - /* if we just wokeup a higher priority process, let it run */ - if(p != nil && p->priority > up->priority) - sched(); - } + if(dowakeup) + wakeup(&q->rr); /* * flow control, wait for queue to get below the limit diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/sysproc.c --- a/sys/src/nix/port/sysproc.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/sysproc.c Wed Feb 08 22:33:03 2012 +0100 @@ -107,7 +107,8 @@ } } - p->trace = up->trace; + if(up->trace) + p->trace = 1; p->scallnr = up->scallnr; memmove(p->arg, up->arg, sizeof(up->arg)); p->nerrlab = 0; @@ -206,7 +207,7 @@ pid = p->pid; memset(p->time, 0, sizeof(p->time)); - p->time[TReal] = MACHP(0)->ticks; + p->time[TReal] = sys->ticks; if(flag & (RFPREPAGE|RFCPREPAGE)){ p->prepagemem = flag&RFPREPAGE; @@ -265,45 +266,18 @@ uvlong hdr[1]; } Hdr; +/* + * flags can ONLY specify that you want an AC for you, or + * that you want an XC for you. + * + */ static void -donate(Proc *p) -{ - static int coreno; - int core, i; - Mach *mp; - extern int scheddonates; - - if(!scheddonates || p->wired) - return; - - for(i = 0; i < MACHMAX; i++){ - core = i; - mp = MACHP(core); - if(mp == m || mp == nil || mp->online == 0 || mp->sch == nil) - continue; - if(mp->nixtype != NIXTC || mp->sch == m->sch) - continue; - if(mp->sch->nrdy > m->sch->nrdy)/* more loaded than us, ignore */ - continue; - p->mp = mp; - p->color = corecolor(mp->machno); - if(p->color < 0) - p->color = 0; - coreno = core + 1; -iprint("donate %d -> %d\n", m->machno, mp->machno); - sched(); - return; - } - /* no core preferred, don't change the process color */ -} - -static void -execac(Ar0* ar0, int core, char *ufile, char **argv) +execac(Ar0* ar0, int flags, char *ufile, char **argv) { Hdr hdr; Fgrp *f; Tos *tos; - Chan *chan; + Chan *chan, *ichan; Image *img; Segment *s; int argc, i, n; @@ -311,45 +285,44 @@ char line[sizeof(Exec)], *progarg[sizeof(Exec)/2+1]; long hdrsz, magic, textsz, datasz, bsssz; uintptr textlim, datalim, bsslim, entry, stack; - Mach *mp; static int colorgen; + + file = nil; + elem = nil; + switch(flags){ + case EXTC: + case EXXC: + break; + case EXAC: + up->ac = getac(up, -1); + break; + default: + error("unknown execac flag"); + } + if(waserror()){ + DBG("execac: failing: %s\n", up->errstr); + free(file); + free(elem); + if(flags == EXAC && up->ac != nil) + up->ac->proc = nil; + up->ac = nil; + nexterror(); + } + /* * Open the file, remembering the final element and the full name. */ - file = nil; - elem = nil; - chan = nil; - mp = nil; - if(waserror()){ - DBG("execac: failing: %s\n", up->errstr); - if(file) - free(file); - if(elem) - free(elem); - if(chan) - cclose(chan); - if(core > 0 && mp != nil) - mp->proc = nil; - if(core != 0) - up->ac = nil; - nexterror(); - } - - if(core != 0){ - up->ac = getac(up, core); - mp = up->ac; - /* - * This variable is not used later, so take the address - * to make it go to memory for the waserror. - */ - USED(&mp); - } - argc = 0; file = validnamedup(ufile, 1); DBG("execac: up %#p file %s\n", up, file); - chan = namec(file, Aopen, OEXEC, 0); + if(up->trace) + proctracepid(up); + ichan = namec(file, Aopen, OEXEC, 0); + if(waserror()){ + cclose(ichan); + nexterror(); + } kstrdup(&elem, up->genbuf); /* @@ -359,7 +332,7 @@ * The #! line must be less than sizeof(Exec) in size, * including the terminating \n. */ - hdrsz = chan->dev->read(chan, &hdr, sizeof(Hdr), 0); + hdrsz = ichan->dev->read(ichan, &hdr, sizeof(Hdr), 0); if(hdrsz < 2) error(Ebadexec); p = (char*)&hdr; @@ -381,15 +354,22 @@ */ p = progarg[0]; progarg[0] = elem; - cclose(chan); chan = nil; /* in case namec errors out */ USED(chan); chan = namec(p, Aopen, OEXEC, 0); hdrsz = chan->dev->read(chan, &hdr, sizeof(Hdr), 0); if(hdrsz < 2) error(Ebadexec); + }else{ + chan = ichan; + incref(ichan); } + /* chan is the chan to use, initial or not. ichan is irrelevant now */ + cclose(ichan); + poperror(); + + /* * #! has had its chance, now we need a real binary. */ @@ -427,10 +407,8 @@ || datalim < textlim || bsslim < datalim) error(Ebadexec); - if(core != 0) - up->color = corecolor(core); - else - donate(up); + if(up->ac != nil && up->ac != m) + up->color = corecolor(up->ac->machno); /* * The new stack is created in ESEG, temporarily mapped elsewhere. @@ -600,7 +578,7 @@ } /* Text. Shared. Attaches to cache image if possible - * but prepaged if core > 0. + * but prepaged if EXAC */ img = attachimage(SG_TEXT|SG_RONLY, chan, up->color, UTZERO, (textlim-UTZERO)/BIGPGSZ); s = img->s; @@ -656,7 +634,7 @@ * space and needs to be flushed */ mmuflush(); - if(up->prepagemem || core > 0) + if(up->prepagemem || flags == EXAC) nixprepage(-1); qlock(&up->debug); up->nnote = 0; @@ -670,7 +648,7 @@ ar0->v = sysexecregs(entry, TSTKTOP - PTR2UINT(argv), argc); - if(core > 0){ + if(flags == EXAC){ up->procctl = Proc_toac; up->prepagemem = 1; } @@ -684,19 +662,19 @@ void sysexecac(Ar0* ar0, va_list list) { - int core; + int flags; char *file, **argv; /* - * void* execac(int core, char* name, char* argv[]); + * void* execac(int flags, char* name, char* argv[]); */ - core = va_arg(list, unsigned int); + flags = va_arg(list, unsigned int); file = va_arg(list, char*); file = validaddr(file, 1, 0); argv = va_arg(list, char**); evenaddr(PTR2UINT(argv)); - execac(ar0, core, file, argv); + execac(ar0, flags, file, argv); } void @@ -711,7 +689,7 @@ file = validaddr(file, 1, 0); argv = va_arg(list, char**); evenaddr(PTR2UINT(argv)); - execac(ar0, 0, file, argv); + execac(ar0, EXTC, file, argv); } void @@ -1192,59 +1170,13 @@ sleep(&phore, semawoke, &phore); poperror(); } - semdequeue(s, &phore); - coherence(); /* not strictly necessary due to lock in semdequeue */ - if(!phore.waiting) - semwakeup(s, addr, 1); - if(!acquired) - nexterror(); - return 1; -} - -/* Acquire semaphore or time-out */ -static int -tsemacquire(Segment* s, int* addr, ulong ms) -{ - int acquired; - Sema phore; - int timedout; - ulong t; - - if(canacquire(addr)) - return 1; - if(ms == 0) - return 0; - - acquired = 0; - timedout = 0; - semqueue(s, addr, &phore); - for(;;){ - phore.waiting = 1; - coherence(); - if(canacquire(addr)){ - acquired = 1; - break; - } - if(waserror()) - break; - t = m->ticks; - tsleep(&phore, semawoke, &phore, ms); - if(TK2MS(m->ticks-t) >= ms) { - timedout = 1; - poperror(); - break; - } - ms -= TK2MS(m->ticks-t); - poperror(); - } semdequeue(s, &phore); coherence(); /* not strictly necessary due to lock in semdequeue */ if(!phore.waiting) semwakeup(s, addr, 1); - if(timedout) - return 0; if(!acquired) nexterror(); + return 1; } @@ -1273,30 +1205,6 @@ } void -systsemacquire(Ar0* ar0, va_list list) -{ - Segment *s; - int *addr, ms; - - /* - * int tsemacquire(long* addr, ulong ms); - * should be (and will be implemented below as) perhaps - * int tsemacquire(int* addr, ulong ms); - */ - addr = va_arg(list, int*); - addr = validaddr(addr, sizeof(int), 1); - evenaddr(PTR2UINT(addr)); - ms = va_arg(list, ulong); - - if((s = seg(up, PTR2UINT(addr), 0)) == nil) - error(Ebadarg); - if(*addr < 0) - error(Ebadarg); - - ar0->i = tsemacquire(s, addr, ms); -} - -void syssemrelease(Ar0* ar0, va_list list) { Segment *s; diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/taslock.c --- a/sys/src/nix/port/taslock.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/taslock.c Wed Feb 08 22:33:03 2012 +0100 @@ -143,7 +143,7 @@ lockstats.inglare++; i = 0; while(l->key){ - if(conf.nmach < 2 && up && up->edf && (up->edf->flags & Admitted)){ + if(sys->nmach < 2 && up && up->edf && (up->edf->flags & Admitted)){ /* * Priority inversion, yield on a uniprocessor; on a * multiprocessor, the other processor will unlock @@ -216,7 +216,7 @@ l->pc = pc; l->p = up; l->isilock = 1; - l->m = MACHP(m->machno); + l->m = m; if(LOCKCYCLES) cycles(&l->lockcycles); } @@ -236,7 +236,7 @@ up->lastlock = l; l->pc = getcallerpc(&l); l->p = up; - l->m = MACHP(m->machno); + l->m = m; l->isilock = 0; if(LOCKCYCLES) cycles(&l->lockcycles); diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/tcklock.c --- a/sys/src/nix/port/tcklock.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/tcklock.c Wed Feb 08 22:33:03 2012 +0100 @@ -197,7 +197,7 @@ } l->pc = pc; l->p = up; - l->m = MACHP(m->machno); + l->m = m; l->isilock = 0; if(up) up->lastlock = l; @@ -234,7 +234,7 @@ l->pc = pc; l->p = up; l->isilock = 1; - l->m = MACHP(m->machno); + l->m = m; if(l != &waitstatslk) addwaitstat(pc, t0, WSlock); } @@ -266,7 +266,7 @@ goto Cant; l->pc = pc; l->p = up; - l->m = MACHP(m->machno); + l->m = m; if(up) up->lastlock = l; l->isilock = 0; diff -r d2175c0b7b6c -r 29403dfab2b1 sys/src/nix/port/tod.c --- a/sys/src/nix/port/tod.c Wed Feb 08 18:52:52 2012 +0000 +++ b/sys/src/nix/port/tod.c Wed Feb 08 22:33:03 2012 +0100 @@ -107,7 +107,7 @@ if(delta > 0 && n > delta) n = delta; delta = delta/n; - tod.sstart = MACHP(0)->ticks; + tod.sstart = sys->ticks; tod.send = tod.sstart + n; tod.delta = delta; } @@ -138,7 +138,7 @@ /* add in correction */ if(tod.sstart != tod.send){ - t = MACHP(0)->ticks; + t = sys->ticks; if(t >= tod.send) t = tod.send; tod.off = tod.off + tod.delta*(t - tod.sstart);