# HG changeset patch # User Francisco J Ballesteros # Date 1322907330 0 # Node ID c3b70ce3c9a50ec3a99fcf285efc0a28ebdc097b # Parent 3dd2bfb35f8163fdd89f8e17bed639d706f35107 lsub: try to get googlecode updated wrt lsub This include the file changes as of now in /sys in lsub wrt google code. The main change is getting SMP working. Other changes are removing unused linux emulation code, restoring the syscall path now that linux emulation is gone, several fixes in the kernel, un update of the experimental IX implementation, changes in the profiler for the scheduler, changes in the scheduler. If we submit this at least we'll be again in sync. (there might be files that have to be hg added or removed if I made a mistake, but I think this list in the CL is ok). R=nixiedev, noah.evans, nemo CC=nix-dev http://codereview.appspot.com/5451061 diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/include/trace.h --- a/sys/include/trace.h Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/include/trace.h Sat Dec 03 10:15:30 2011 +0000 @@ -13,12 +13,14 @@ SInts, /* Interrupt start */ SInte, /* Interrupt end */ SUser, /* user event */ + SLock, /* blocked on a queue or lock */ Nevent, } Tevent; typedef struct Traceevent Traceevent; struct Traceevent { - ulong pid; - ulong etype; /* Event type */ - vlong time; /* time stamp */ + u32int pid; + u32int etype; /* Event type */ + u64int time; /* time stamp */ + u32int core; /* core number */ }; diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/man/3/arch --- a/sys/man/3/arch Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/man/3/arch Sat Dec 03 10:15:30 2011 +0000 @@ -18,7 +18,8 @@ .PP Reads from .I cputype -recover the processor type and clock rate in MHz. +recover the processor type and clock rate in MHz in the first line, followed +by further lines with CPU information values as provided by the processor. Reads from .I archctl yield at least data of this form: diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/man/3/cons --- a/sys/man/3/cons Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/man/3/cons Sat Dec 03 10:15:30 2011 +0000 @@ -289,13 +289,14 @@ .PP The .B sysstat -file holds 10 numbers: +file holds 11 numbers: processor number, context switches, interrupts, system calls, page faults, -TLB faults, TLB purges, load average, idle time and time spent servicing interrupts. +TLB faults, TLB purges, load average, idle time, time spent servicing interrupts, and +scheduler number. The load average is in units of milli-CPUs and is decayed over time; idle time and interrupt time are percentage units; the others are total counts from boot time. -Afer these 10 numbers, the number of core and the role for the core (TC, AC, or KC) +Afer these 10 numbers, the role for the processor (TC, AC, or KC) is shown. If the machine is a multiprocessor, .B sysstat diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/man/3/waitstat --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/man/3/waitstat Sat Dec 03 10:15:30 2011 +0000 @@ -0,0 +1,51 @@ +.TH WAITSTAT 3 +.SH NAME +waitstat \- kernel waiting times profiling +.SH SYNOPSIS +.nf +.B bind -a #W /dev +.sp +.B /dev/wsctl +.B /dev/wsdata +.fi +.SH DESCRIPTION +The +.I waitstat +device provides profiling +data for the operating system kernel regarding waiting times (usecs) for locks, mostly. +.PP +The file +.B wsdata +holds one line per program counter where a wait of interest happen. The fields +report the type of waiting event, the program counter, the number of times waiting +did happen there, the maximum waiting time, and the total waiting time. +.PP +The file +.B wsdata +controls profiling for this device. +Writing the string +.B start +to +.B wsctl +begins profiling; +.B stop +terminates it. The message +.B clear +restarts profiling after zeroing the counts. +.SH EXAMPLE +The following +.IR rc (1) +script runs a test program while profiling the kernel +and reports the results. +.sp +.EX + bind -a '#W' /dev + echo start > /dev/wsctl + runtest + echo stop > /dev/wsctl + cat /dev/wsdata +.EE +.SH SOURCE +.B /sys/src/9/port/devws.c +.SH SEE ALSO +.IR kprof (3) diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/cmd/trace.c --- a/sys/src/cmd/trace.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/cmd/trace.c Sat Dec 03 10:15:30 2011 +0000 @@ -10,6 +10,24 @@ #include #include "trace.h" +#define GBIT8(p) ((p)[0]) +#define GBIT16(p) ((p)[0]|((p)[1]<<8)) +#define GBIT32(p) ((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)) +#define GBIT64(p) ((u32int)((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)) |\ + ((vlong)((p)[4]|((p)[5]<<8)|((p)[6]<<16)|((p)[7]<<24)) << 32)) + +#define PBIT8(p,v) (p)[0]=(v) +#define PBIT16(p,v) (p)[0]=(v);(p)[1]=(v)>>8 +#define PBIT32(p,v) (p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24 +#define PBIT64(p,v) (p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24;\ + (p)[4]=(v)>>32;(p)[5]=(v)>>40;(p)[6]=(v)>>48;(p)[7]=(v)>>56 + +#define BIT8SZ 1 +#define BIT16SZ 2 +#define BIT32SZ 4 +#define BIT64SZ 8 + +#pragma varargck type "t" uvlong #pragma varargck type "t" vlong #pragma varargck type "U" uvlong @@ -69,6 +87,7 @@ void drawtrace(void); int schedparse(char*, char*, char*); int timeconv(Fmt*); +static void tracefile(int); char *schedstatename[] = { [SAdmit] = "Admit", @@ -85,6 +104,7 @@ [SInte] = "Inte", [SUser] = "User", [SYield] = "Yield", + [SLock] = "Lock", }; struct { @@ -125,18 +145,26 @@ static void usage(void) { - fprint(2, "Usage: %s [-d profdev] [-w] [-v] [-t triggerproc] [processes]\n", argv0); + fprint(2, "Usage: %s [-f file [-g]] [-d profdev] [-w] [-v] [-t triggerproc] [processes]\n", argv0); exits(nil); } void threadmain(int argc, char **argv) { - int fd, i; + int fd, i, justfile, graph; char fname[80]; fmtinstall('t', timeconv); + justfile = graph = 0; ARGBEGIN { + case 'f': + justfile = 1; + profdev = EARGF(usage()); + break; + case 'g': + graph = 1; + break; case 'd': profdev = EARGF(usage()); break; @@ -153,7 +181,13 @@ usage(); } ARGEND; - + if(justfile){ + if(argc != 0) + usage(); + tracefile(graph); + exits(nil); + } + fname[sizeof fname - 1] = 0; for(i = 0; i < argc; i++){ snprint(fname, sizeof fname - 2, "/proc/%s/ctl", @@ -173,6 +207,101 @@ drawtrace(); } +static +struct{ + int pid; + int state; +}graphs[16]; + +static void +addtograph(Traceevent *t) +{ + int i; + + for(i = 0; i < nelem(graphs); i++){ + if(graphs[i].pid == t->pid) + break; + if(graphs[i].pid == 0){ + graphs[i].pid = t->pid; + break; + } + } + if(i == nelem(graphs)) + return; + graphs[i].state = t->etype; +} + +static void +printgraph(Biobuf *bout, int pid, int core, uvlong time) +{ + int i; + static char *schar[] = { + [SAdmit] = "!a", + [SSleep] = ".s", + [SDead] = "xd", + [SDeadline] = "??", + [SEdf] = "??", + [SExpel] = "??", + [SReady] = "!r", + [SRelease] = "??", + [SRun] = "|R", + [SSlice] = "??", + [SInts] = "!i", + [SInte] = "|e", + [SUser] = "|u", + [SYield] = "!y", + [SLock] = "!l", + }; + + Bprint(bout, "%20.20lld %02d", time, core); + for(i = 0; i < nelem(graphs); i++){ + if(graphs[i].pid == 0) + break; + Bprint(bout, "\t%c", schar[graphs[i].state][0]); + if(graphs[i].pid == pid) + Bputc(bout, schar[graphs[i].state][1]); + } + Bprint(bout, "\n"); +} + +static void +tracefile(int graph) +{ + int logfd; + Traceevent t; + Biobuf bout; + uchar buf[BIT32SZ+BIT32SZ+BIT64SZ+BIT32SZ]; + uvlong t0; + + if((logfd = open(profdev, OREAD)) < 0) + sysfatal("%s: open: %r", profdev); + if(Binit(&bout, 1, OWRITE) < 0) + sysfatal("stdout: Binit: %r"); + while(read(logfd, buf, sizeof buf) == sizeof buf){ + t.pid = GBIT32(buf); + t.etype = GBIT32(buf+BIT32SZ); + t.time = GBIT64(buf+BIT32SZ+BIT32SZ); + t.core = GBIT32(buf+BIT32SZ+BIT32SZ+BIT64SZ); + if(t.pid == 0) + continue; + if(t.etype >= nelem(schedstatename) || schedstatename[t.etype] == nil){ + fprint(2, "unknown state %ud\n", t.etype); + continue; + } + if(graph == 0) + Bprint(&bout, "%ud\t%-10.10s\t%ulld\t%ud\n", + t.pid, schedstatename[t.etype], t.time, t.core); + else{ + addtograph(&t); + if(t0 == 0) + t0 = t.time; + printgraph(&bout, t.pid, t.core, t.time-t0); + } + } + Bterm(&bout); + close(logfd); +} + static void mkcol(int i, int c0, int c1, int c2) { @@ -263,8 +392,8 @@ s = now - t->tstart; if(t->tevents[SRelease]) snprint(buf, sizeof(buf), " per %t — avg: %t max: %t", - (vlong)(s/t->tevents[SRelease]), - (vlong)(t->runtime/t->tevents[SRelease]), + (uvlong)(s/t->tevents[SRelease]), + (uvlong)(t->runtime/t->tevents[SRelease]), t->runmax); else if((s /=1000000000LL) != 0) snprint(buf, sizeof(buf), " per 1s — avg: %t total: %t", @@ -538,7 +667,7 @@ } break; case SDead: -print("task died %ld %t %s\n", event->pid, event->time, schedstatename[event->etype & 0xffff]); +print("task died %d %t %s\n", event->pid, event->time, schedstatename[event->etype & 0xffff]); free(t->events); free(t->name); ntasks--; @@ -695,12 +824,12 @@ nevents = n / sizeof(Traceevent); for (ep = eventbuf; ep < eventbuf + nevents; ep++){ if ((ep->etype & 0xffff) >= Nevent){ - print("%ld %t Illegal event %ld\n", + print("%ud %t Illegal event %ud\n", ep->pid, ep->time, ep->etype & 0xffff); continue; } if (verbose) - print("%ld %t %s\n", + print("%ud %t %s\n", ep->pid, ep->time, schedstatename[ep->etype & 0xffff]); for(i = 0; i < ntasks; i++) diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/libc/9sys/upsem.c --- a/sys/src/libc/9sys/upsem.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/libc/9sys/upsem.c Sat Dec 03 10:15:30 2011 +0000 @@ -60,7 +60,7 @@ typesok(); /* busy wait */ for(i = 0; *s <= 0 && i < semtrytimes; i++) - sleep(0); + ; // sleep(0); if(*s <= 0 && dontblock) return -1; diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/libc/port/malloc.c --- a/sys/src/libc/port/malloc.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/libc/port/malloc.c Sat Dec 03 10:15:30 2011 +0000 @@ -46,6 +46,7 @@ oomexits = yn; } +#define TESTSEGBRK /* * we do minimal bookkeeping so we can tell pool * whether two blocks are adjacent and thus mergeable. @@ -56,9 +57,20 @@ ulong *x; n += 2*sizeof(ulong); /* two longs for us */ +#ifdef TESTSEGBRK +static char *top; + if(top == nil) + top = segbrk(0,0); + x = segbrk(top, top+n); + if(x == 0 || x == (void*)-1) + return nil; + x = (ulong*)top; + top += n; +#else x = sbrk(n); if(x == (void*)-1) return nil; +#endif x[0] = (n+7)&~7; /* sbrk rounds size up to mult. of 8 */ x[1] = 0xDeadBeef; return x+2; diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/libc/port/mkfile --- a/sys/src/libc/port/mkfile Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/libc/port/mkfile Sat Dec 03 10:15:30 2011 +0000 @@ -37,6 +37,7 @@ lock.c\ log.c\ lrand.c\ +# qmalloc.c\ malloc.c\ memccpy.c\ memchr.c\ diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/libthread/channel.acid --- a/sys/src/libthread/channel.acid Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/libthread/channel.acid Sat Dec 03 10:15:30 2011 +0000 @@ -35,21 +35,21 @@ Runesync = 128; Runeself = 128; Runeerror = 65533; -sizeofFmt = 80; +sizeofFmt = 48; aggr Fmt { 'b' 0 runes; - 'X' 8 start; - 'X' 16 to; - 'X' 24 stop; - 'X' 32 flush; - 'X' 40 farg; - 'D' 48 nfmt; - 'X' 56 args; - 'D' 64 r; - 'D' 68 width; - 'D' 72 prec; - 'U' 76 flags; + 'X' 4 start; + 'X' 8 to; + 'X' 12 stop; + 'X' 16 flush; + 'X' 20 farg; + 'D' 24 nfmt; + 'X' 28 args; + 'D' 32 r; + 'D' 36 width; + 'D' 40 prec; + 'U' 44 flags; }; defn @@ -120,7 +120,7 @@ Profkernel = 2; Proftime = 3; Profsample = 4; -sizeofLock = 8; +sizeofLock = 4; aggr Lock { 'D' 0 val; @@ -132,12 +132,12 @@ print(" val ", addr.val, "\n"); }; -sizeofQLp = 24; +sizeofQLp = 12; aggr QLp { 'D' 0 inuse; - 'A' QLp 8 next; - 'C' 16 state; + 'A' QLp 4 next; + 'C' 8 state; }; defn @@ -148,13 +148,13 @@ print(" state ", addr.state, "\n"); }; -sizeofQLock = 32; +sizeofQLock = 16; aggr QLock { Lock 0 lock; - 'D' 8 locked; - 'A' QLp 16 $head; - 'A' QLp 24 $tail; + 'D' 4 locked; + 'A' QLp 8 $head; + 'A' QLp 12 $tail; }; defn @@ -168,14 +168,14 @@ print(" $tail ", addr.$tail\X, "\n"); }; -sizeofRWLock = 32; +sizeofRWLock = 20; aggr RWLock { Lock 0 lock; - 'D' 8 readers; - 'D' 12 writer; - 'A' QLp 16 $head; - 'A' QLp 24 $tail; + 'D' 4 readers; + 'D' 8 writer; + 'A' QLp 12 $head; + 'A' QLp 16 $tail; }; defn @@ -190,12 +190,12 @@ print(" $tail ", addr.$tail\X, "\n"); }; -sizeofRendez = 24; +sizeofRendez = 12; aggr Rendez { 'A' QLock 0 l; - 'A' QLp 8 $head; - 'A' QLp 16 $tail; + 'A' QLp 4 $head; + 'A' QLp 8 $tail; }; defn @@ -206,18 +206,18 @@ print(" $tail ", addr.$tail\X, "\n"); }; -sizeofNetConnInfo = 72; +sizeofNetConnInfo = 36; aggr NetConnInfo { 'X' 0 dir; - 'X' 8 root; - 'X' 16 spec; - 'X' 24 lsys; - 'X' 32 lserv; - 'X' 40 rsys; - 'X' 48 rserv; - 'X' 56 laddr; - 'X' 64 raddr; + 'X' 4 root; + 'X' 8 spec; + 'X' 12 lsys; + 'X' 16 lserv; + 'X' 20 rsys; + 'X' 24 rserv; + 'X' 28 laddr; + 'X' 32 raddr; }; defn @@ -246,6 +246,10 @@ RFCFDG = 4096; RFREND = 8192; RFNOMNT = 16384; +RFPREPAGE = 32768; +RFCPREPAGE = 65536; +RFCORE = 131072; +RFCCORE = 262144; sizeofQid = 16; aggr Qid { @@ -262,7 +266,7 @@ print(" type ", addr.type, "\n"); }; -sizeofDir = 80; +sizeofDir = 60; aggr Dir { 'u' 0 type; @@ -271,11 +275,11 @@ 'U' 24 mode; 'U' 28 atime; 'U' 32 mtime; - 'V' 40 length; - 'X' 48 name; - 'X' 56 uid; - 'X' 64 gid; - 'X' 72 muid; + 'V' 36 length; + 'X' 44 name; + 'X' 48 uid; + 'X' 52 gid; + 'X' 56 muid; }; defn @@ -296,7 +300,7 @@ print(" muid ", addr.muid\X, "\n"); }; -sizeofWaitmsg = 24; +sizeofWaitmsg = 20; aggr Waitmsg { 'D' 0 pid; @@ -312,11 +316,11 @@ print(" msg ", addr.msg\X, "\n"); }; -sizeofIOchunk = 16; +sizeofIOchunk = 8; aggr IOchunk { 'X' 0 addr; - 'U' 8 len; + 'U' 4 len; }; defn @@ -326,11 +330,31 @@ print(" len ", addr.len, "\n"); }; +sizeofZio = 8; +aggr Zio +{ + 'X' 0 data; + 'U' 4 size; +}; + +defn +Zio(addr) { + complex Zio addr; + print(" data ", addr.data\X, "\n"); + print(" size ", addr.size, "\n"); +}; + +NIXTC = 0; +NIXKC = 1; +NIXAC = 2; +PmcOs = 1; +PmcUser = 2; +PmcEnable = 4; Nqwds = 2; Nqshift = 5; Nqmask = -1; Nqbits = 64; -sizeofChannel = 48; +sizeofChannel = 36; aggr Channel { 'D' 0 s; @@ -338,10 +362,10 @@ 'U' 8 n; 'D' 12 e; 'D' 16 freed; - 'X' 24 qentry; - 'D' 32 nentry; - 'D' 36 closed; - 'a' 40 v; + 'X' 20 qentry; + 'D' 24 nentry; + 'D' 28 closed; + 'a' 32 v; }; defn @@ -363,15 +387,15 @@ CHANRCV = 2; CHANNOP = 3; CHANNOBLK = 4; -sizeofAlt = 48; +sizeofAlt = 24; aggr Alt { 'A' Channel 0 c; - 'X' 8 v; - 'D' 16 op; - 'X' 24 err; - 'A' Channel 32 tag; - 'D' 40 entryno; + 'X' 4 v; + 'D' 8 op; + 'X' 12 err; + 'A' Channel 16 tag; + 'D' 20 entryno; }; defn @@ -385,7 +409,7 @@ print(" entryno ", addr.entryno, "\n"); }; -sizeofRef = 8; +sizeofRef = 4; aggr Ref { 'D' 0 ref; @@ -408,11 +432,11 @@ RENDHASH = 13; Printsize = 2048; NPRIV = 8; -sizeofRgrp = 112; +sizeofRgrp = 56; aggr Rgrp { Lock 0 lock; - 'a' 8 hash; + 'a' 4 hash; }; defn @@ -424,12 +448,12 @@ print(" hash ", addr.hash, "\n"); }; -sizeofTqueue = 24; +sizeofTqueue = 12; aggr Tqueue { 'D' 0 asleep; - 'X' 8 $head; - 'X' 16 $tail; + 'X' 4 $head; + 'X' 8 $tail; }; defn @@ -440,31 +464,31 @@ print(" $tail ", addr.$tail\X, "\n"); }; -sizeofThread = 216; +sizeofThread = 120; aggr Thread { Lock 0 lock; - 'a' 8 sched; - 'D' 24 id; - 'D' 28 grp; - 'D' 32 moribund; - 'D' 36 state; - 'D' 40 nextstate; - 'X' 48 stk; - 'U' 56 stksize; - 'A' Thread 64 next; - 'X' 72 proc; - 'A' Thread 80 nextt; - 'D' 88 ret; - 'X' 96 cmdname; - 'D' 104 inrendez; - 'A' Thread 112 rendhash; - 'X' 120 rendtag; - 'X' 128 rendval; - 'D' 136 rendbreak; - 'D' 140 chan; - 'A' Alt 144 alt; - 'a' 152 udata; + 'a' 4 sched; + 'D' 12 id; + 'D' 16 grp; + 'D' 20 moribund; + 'D' 24 state; + 'D' 28 nextstate; + 'X' 32 stk; + 'U' 36 stksize; + 'A' Thread 40 next; + 'X' 44 proc; + 'A' Thread 48 nextt; + 'D' 52 ret; + 'X' 56 cmdname; + 'D' 60 inrendez; + 'A' Thread 64 rendhash; + 'X' 68 rendtag; + 'X' 72 rendval; + 'D' 76 rendbreak; + 'D' 80 chan; + 'A' Alt 84 alt; + 'a' 88 udata; }; defn @@ -496,12 +520,12 @@ print(" udata ", addr.udata, "\n"); }; -sizeofExecargs = 24; +sizeofExecargs = 16; aggr Execargs { 'X' 0 prog; - 'X' 8 args; - 'a' 16 fd; + 'X' 4 args; + 'a' 8 fd; }; defn @@ -512,34 +536,34 @@ print(" fd ", addr.fd, "\n"); }; -sizeofProc = 2504; +sizeofProc = 2424; aggr Proc { Lock 0 lock; - 'a' 8 sched; - 'D' 24 pid; - 'D' 28 splhi; - 'A' Thread 32 thread; - 'D' 40 needexec; - Execargs 48 exec; - 'A' Proc 72 newproc; - 'a' 80 exitstr; - 'D' 208 rforkflag; - 'D' 212 nthreads; - Tqueue 216 threads; - Tqueue 240 ready; - Lock 264 readylock; - 'a' 272 printbuf; - 'D' 2320 blocked; - 'D' 2324 pending; - 'D' 2328 nonotes; - 'U' 2332 nextID; - 'A' Proc 2336 next; - 'X' 2344 arg; - 'a' 2352 str; - 'X' 2480 wdata; - 'X' 2488 udata; - 'C' 2496 threadint; + 'a' 4 sched; + 'D' 12 pid; + 'D' 16 splhi; + 'A' Thread 20 thread; + 'D' 24 needexec; + Execargs 28 exec; + 'A' Proc 44 newproc; + 'a' 48 exitstr; + 'D' 176 rforkflag; + 'D' 180 nthreads; + Tqueue 184 threads; + Tqueue 196 ready; + Lock 208 readylock; + 'a' 212 printbuf; + 'D' 2260 blocked; + 'D' 2264 pending; + 'D' 2268 nonotes; + 'U' 2272 nextID; + 'A' Proc 2276 next; + 'X' 2280 arg; + 'a' 2284 str; + 'X' 2412 wdata; + 'X' 2416 udata; + 'C' 2420 threadint; }; defn @@ -582,12 +606,12 @@ print(" threadint ", addr.threadint, "\n"); }; -sizeofPqueue = 24; +sizeofPqueue = 12; aggr Pqueue { Lock 0 lock; - 'A' Proc 8 $head; - 'A' Proc 16 $tail; + 'A' Proc 4 $head; + 'A' Proc 8 $tail; }; defn @@ -600,18 +624,18 @@ print(" $tail ", addr.$tail\X, "\n"); }; -sizeofIoproc = 192; +sizeofIoproc = 160; aggr Ioproc { 'D' 0 tid; - 'A' Channel 8 c; - 'A' Channel 16 creply; - 'D' 24 inuse; - 'X' 32 op; - 'X' 40 arg; - 'D' 48 ret; - 'a' 52 err; - 'A' Ioproc 184 next; + 'A' Channel 4 c; + 'A' Channel 8 creply; + 'D' 12 inuse; + 'X' 16 op; + 'X' 20 arg; + 'D' 24 ret; + 'a' 28 err; + 'A' Ioproc 156 next; }; defn diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/libthread/sched.acid --- a/sys/src/libthread/sched.acid Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/libthread/sched.acid Sat Dec 03 10:15:30 2011 +0000 @@ -35,21 +35,21 @@ Runesync = 128; Runeself = 128; Runeerror = 65533; -sizeofFmt = 80; +sizeofFmt = 48; aggr Fmt { 'b' 0 runes; - 'X' 8 start; - 'X' 16 to; - 'X' 24 stop; - 'X' 32 flush; - 'X' 40 farg; - 'D' 48 nfmt; - 'X' 56 args; - 'D' 64 r; - 'D' 68 width; - 'D' 72 prec; - 'U' 76 flags; + 'X' 4 start; + 'X' 8 to; + 'X' 12 stop; + 'X' 16 flush; + 'X' 20 farg; + 'D' 24 nfmt; + 'X' 28 args; + 'D' 32 r; + 'D' 36 width; + 'D' 40 prec; + 'U' 44 flags; }; defn @@ -120,7 +120,7 @@ Profkernel = 2; Proftime = 3; Profsample = 4; -sizeofLock = 8; +sizeofLock = 4; aggr Lock { 'D' 0 val; @@ -132,12 +132,12 @@ print(" val ", addr.val, "\n"); }; -sizeofQLp = 24; +sizeofQLp = 12; aggr QLp { 'D' 0 inuse; - 'A' QLp 8 next; - 'C' 16 state; + 'A' QLp 4 next; + 'C' 8 state; }; defn @@ -148,13 +148,13 @@ print(" state ", addr.state, "\n"); }; -sizeofQLock = 32; +sizeofQLock = 16; aggr QLock { Lock 0 lock; - 'D' 8 locked; - 'A' QLp 16 $head; - 'A' QLp 24 $tail; + 'D' 4 locked; + 'A' QLp 8 $head; + 'A' QLp 12 $tail; }; defn @@ -168,14 +168,14 @@ print(" $tail ", addr.$tail\X, "\n"); }; -sizeofRWLock = 32; +sizeofRWLock = 20; aggr RWLock { Lock 0 lock; - 'D' 8 readers; - 'D' 12 writer; - 'A' QLp 16 $head; - 'A' QLp 24 $tail; + 'D' 4 readers; + 'D' 8 writer; + 'A' QLp 12 $head; + 'A' QLp 16 $tail; }; defn @@ -190,12 +190,12 @@ print(" $tail ", addr.$tail\X, "\n"); }; -sizeofRendez = 24; +sizeofRendez = 12; aggr Rendez { 'A' QLock 0 l; - 'A' QLp 8 $head; - 'A' QLp 16 $tail; + 'A' QLp 4 $head; + 'A' QLp 8 $tail; }; defn @@ -206,18 +206,18 @@ print(" $tail ", addr.$tail\X, "\n"); }; -sizeofNetConnInfo = 72; +sizeofNetConnInfo = 36; aggr NetConnInfo { 'X' 0 dir; - 'X' 8 root; - 'X' 16 spec; - 'X' 24 lsys; - 'X' 32 lserv; - 'X' 40 rsys; - 'X' 48 rserv; - 'X' 56 laddr; - 'X' 64 raddr; + 'X' 4 root; + 'X' 8 spec; + 'X' 12 lsys; + 'X' 16 lserv; + 'X' 20 rsys; + 'X' 24 rserv; + 'X' 28 laddr; + 'X' 32 raddr; }; defn @@ -246,6 +246,10 @@ RFCFDG = 4096; RFREND = 8192; RFNOMNT = 16384; +RFPREPAGE = 32768; +RFCPREPAGE = 65536; +RFCORE = 131072; +RFCCORE = 262144; sizeofQid = 16; aggr Qid { @@ -262,7 +266,7 @@ print(" type ", addr.type, "\n"); }; -sizeofDir = 80; +sizeofDir = 60; aggr Dir { 'u' 0 type; @@ -271,11 +275,11 @@ 'U' 24 mode; 'U' 28 atime; 'U' 32 mtime; - 'V' 40 length; - 'X' 48 name; - 'X' 56 uid; - 'X' 64 gid; - 'X' 72 muid; + 'V' 36 length; + 'X' 44 name; + 'X' 48 uid; + 'X' 52 gid; + 'X' 56 muid; }; defn @@ -296,7 +300,7 @@ print(" muid ", addr.muid\X, "\n"); }; -sizeofWaitmsg = 24; +sizeofWaitmsg = 20; aggr Waitmsg { 'D' 0 pid; @@ -312,11 +316,11 @@ print(" msg ", addr.msg\X, "\n"); }; -sizeofIOchunk = 16; +sizeofIOchunk = 8; aggr IOchunk { 'X' 0 addr; - 'U' 8 len; + 'U' 4 len; }; defn @@ -326,11 +330,31 @@ print(" len ", addr.len, "\n"); }; +sizeofZio = 8; +aggr Zio +{ + 'X' 0 data; + 'U' 4 size; +}; + +defn +Zio(addr) { + complex Zio addr; + print(" data ", addr.data\X, "\n"); + print(" size ", addr.size, "\n"); +}; + +NIXTC = 0; +NIXKC = 1; +NIXAC = 2; +PmcOs = 1; +PmcUser = 2; +PmcEnable = 4; Nqwds = 2; Nqshift = 5; Nqmask = -1; Nqbits = 64; -sizeofChannel = 48; +sizeofChannel = 36; aggr Channel { 'D' 0 s; @@ -338,10 +362,10 @@ 'U' 8 n; 'D' 12 e; 'D' 16 freed; - 'X' 24 qentry; - 'D' 32 nentry; - 'D' 36 closed; - 'a' 40 v; + 'X' 20 qentry; + 'D' 24 nentry; + 'D' 28 closed; + 'a' 32 v; }; defn @@ -363,15 +387,15 @@ CHANRCV = 2; CHANNOP = 3; CHANNOBLK = 4; -sizeofAlt = 48; +sizeofAlt = 24; aggr Alt { 'A' Channel 0 c; - 'X' 8 v; - 'D' 16 op; - 'X' 24 err; - 'A' Channel 32 tag; - 'D' 40 entryno; + 'X' 4 v; + 'D' 8 op; + 'X' 12 err; + 'A' Channel 16 tag; + 'D' 20 entryno; }; defn @@ -385,7 +409,7 @@ print(" entryno ", addr.entryno, "\n"); }; -sizeofRef = 8; +sizeofRef = 4; aggr Ref { 'D' 0 ref; @@ -408,11 +432,11 @@ RENDHASH = 13; Printsize = 2048; NPRIV = 8; -sizeofRgrp = 112; +sizeofRgrp = 56; aggr Rgrp { Lock 0 lock; - 'a' 8 hash; + 'a' 4 hash; }; defn @@ -424,12 +448,12 @@ print(" hash ", addr.hash, "\n"); }; -sizeofTqueue = 24; +sizeofTqueue = 12; aggr Tqueue { 'D' 0 asleep; - 'X' 8 $head; - 'X' 16 $tail; + 'X' 4 $head; + 'X' 8 $tail; }; defn @@ -440,31 +464,31 @@ print(" $tail ", addr.$tail\X, "\n"); }; -sizeofThread = 216; +sizeofThread = 120; aggr Thread { Lock 0 lock; - 'a' 8 sched; - 'D' 24 id; - 'D' 28 grp; - 'D' 32 moribund; - 'D' 36 state; - 'D' 40 nextstate; - 'X' 48 stk; - 'U' 56 stksize; - 'A' Thread 64 next; - 'X' 72 proc; - 'A' Thread 80 nextt; - 'D' 88 ret; - 'X' 96 cmdname; - 'D' 104 inrendez; - 'A' Thread 112 rendhash; - 'X' 120 rendtag; - 'X' 128 rendval; - 'D' 136 rendbreak; - 'D' 140 chan; - 'A' Alt 144 alt; - 'a' 152 udata; + 'a' 4 sched; + 'D' 12 id; + 'D' 16 grp; + 'D' 20 moribund; + 'D' 24 state; + 'D' 28 nextstate; + 'X' 32 stk; + 'U' 36 stksize; + 'A' Thread 40 next; + 'X' 44 proc; + 'A' Thread 48 nextt; + 'D' 52 ret; + 'X' 56 cmdname; + 'D' 60 inrendez; + 'A' Thread 64 rendhash; + 'X' 68 rendtag; + 'X' 72 rendval; + 'D' 76 rendbreak; + 'D' 80 chan; + 'A' Alt 84 alt; + 'a' 88 udata; }; defn @@ -496,12 +520,12 @@ print(" udata ", addr.udata, "\n"); }; -sizeofExecargs = 24; +sizeofExecargs = 16; aggr Execargs { 'X' 0 prog; - 'X' 8 args; - 'a' 16 fd; + 'X' 4 args; + 'a' 8 fd; }; defn @@ -512,34 +536,34 @@ print(" fd ", addr.fd, "\n"); }; -sizeofProc = 2504; +sizeofProc = 2424; aggr Proc { Lock 0 lock; - 'a' 8 sched; - 'D' 24 pid; - 'D' 28 splhi; - 'A' Thread 32 thread; - 'D' 40 needexec; - Execargs 48 exec; - 'A' Proc 72 newproc; - 'a' 80 exitstr; - 'D' 208 rforkflag; - 'D' 212 nthreads; - Tqueue 216 threads; - Tqueue 240 ready; - Lock 264 readylock; - 'a' 272 printbuf; - 'D' 2320 blocked; - 'D' 2324 pending; - 'D' 2328 nonotes; - 'U' 2332 nextID; - 'A' Proc 2336 next; - 'X' 2344 arg; - 'a' 2352 str; - 'X' 2480 wdata; - 'X' 2488 udata; - 'C' 2496 threadint; + 'a' 4 sched; + 'D' 12 pid; + 'D' 16 splhi; + 'A' Thread 20 thread; + 'D' 24 needexec; + Execargs 28 exec; + 'A' Proc 44 newproc; + 'a' 48 exitstr; + 'D' 176 rforkflag; + 'D' 180 nthreads; + Tqueue 184 threads; + Tqueue 196 ready; + Lock 208 readylock; + 'a' 212 printbuf; + 'D' 2260 blocked; + 'D' 2264 pending; + 'D' 2268 nonotes; + 'U' 2272 nextID; + 'A' Proc 2276 next; + 'X' 2280 arg; + 'a' 2284 str; + 'X' 2412 wdata; + 'X' 2416 udata; + 'C' 2420 threadint; }; defn @@ -582,12 +606,12 @@ print(" threadint ", addr.threadint, "\n"); }; -sizeofPqueue = 24; +sizeofPqueue = 12; aggr Pqueue { Lock 0 lock; - 'A' Proc 8 $head; - 'A' Proc 16 $tail; + 'A' Proc 4 $head; + 'A' Proc 8 $tail; }; defn @@ -600,18 +624,18 @@ print(" $tail ", addr.$tail\X, "\n"); }; -sizeofIoproc = 192; +sizeofIoproc = 160; aggr Ioproc { 'D' 0 tid; - 'A' Channel 8 c; - 'A' Channel 16 creply; - 'D' 24 inuse; - 'X' 32 op; - 'X' 40 arg; - 'D' 48 ret; - 'a' 52 err; - 'A' Ioproc 184 next; + 'A' Channel 4 c; + 'A' Channel 8 creply; + 'D' 12 inuse; + 'X' 16 op; + 'X' 20 arg; + 'D' 24 ret; + 'a' 28 err; + 'A' Ioproc 156 next; }; defn @@ -631,15 +655,71 @@ complex Pqueue _threadpq; complex Channel _threadwaitchan; complex Rgrp _threadrgrp; -sizeof_2_ = 40; +sizeofNixcall = 12; +aggr Nixcall +{ + 'X' 0 tag; + 'D' 4 scall; + 'X' 8 sarg; +}; + +defn +Nixcall(addr) { + complex Nixcall addr; + print(" tag ", addr.tag\X, "\n"); + print(" scall ", addr.scall, "\n"); + print(" sarg ", addr.sarg\X, "\n"); +}; + +sizeofNixret = 12; +aggr Nixret +{ + 'X' 0 tag; + 'D' 4 sret; + 'X' 8 err; +}; + +defn +Nixret(addr) { + complex Nixret addr; + print(" tag ", addr.tag\X, "\n"); + print(" sret ", addr.sret, "\n"); + print(" err ", addr.err\X, "\n"); +}; + +sizeofCallq = 12308; +aggr Callq +{ + 'D' 0 ksleep; + 'U' 4 qr; + 'U' 8 qw; + 'U' 12 rr; + 'U' 16 rw; + 'a' 20 q; + 'a' 6164 r; +}; + +defn +Callq(addr) { + complex Callq addr; + print(" ksleep ", addr.ksleep, "\n"); + print(" qr ", addr.qr, "\n"); + print(" qw ", addr.qw, "\n"); + print(" rr ", addr.rr, "\n"); + print(" rw ", addr.rw, "\n"); + print(" q ", addr.q, "\n"); + print(" r ", addr.r, "\n"); +}; + +sizeof_2_ = 24; aggr _2_ { 'X' 0 pp; - 'X' 8 next; - 'X' 16 last; - 'X' 24 first; - 'U' 32 pid; - 'U' 36 what; + 'X' 4 next; + 'X' 8 last; + 'X' 12 first; + 'U' 16 pid; + 'U' 20 what; }; defn @@ -653,15 +733,18 @@ print(" what ", addr.what, "\n"); }; -sizeofTos = 72; +sizeofTos = 12372; aggr Tos { _2_ 0 prof; - 'W' 40 cyclefreq; - 'V' 48 kcycles; - 'V' 56 pcycles; - 'U' 64 pid; - 'U' 68 clock; + 'W' 24 cyclefreq; + 'V' 32 kcycles; + 'V' 40 pcycles; + 'U' 48 pid; + 'U' 52 clock; + 'D' 56 nixtype; + 'D' 60 core; + Callq 64 callq; }; defn @@ -675,6 +758,11 @@ print(" pcycles ", addr.pcycles, "\n"); print(" pid ", addr.pid, "\n"); print(" clock ", addr.clock, "\n"); + print(" nixtype ", addr.nixtype, "\n"); + print(" core ", addr.core, "\n"); + print("Callq callq {\n"); + Callq(addr.callq); + print("}\n"); }; complex Tos _tos; diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/libtube/tube.c --- a/sys/src/libtube/tube.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/libtube/tube.c Sat Dec 03 10:15:30 2011 +0000 @@ -78,7 +78,7 @@ c = (uchar*)&t[1]; c += (1+t->msz) * n; while(*c == 0) - sleep(0); + ; /* could yield */ memmove(p, c+1, t->msz); coherence(); *c = 0; diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/Linux --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/nix/k10/Linux Sat Dec 03 10:15:30 2011 +0000 @@ -0,0 +1,2 @@ +Linux support was removed from this kernel. +It may be found in /n/nixdump/2011/1114/sys/src/nix diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/acore.c --- a/sys/src/nix/k10/acore.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/acore.c Sat Dec 03 10:15:30 2011 +0000 @@ -217,7 +217,6 @@ m->load = 0; mwait(&m->icc->fn); - m->load = 100; if(m->icc->flushtlb) acmmuswitch(); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/arch.c --- a/sys/src/nix/k10/arch.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/arch.c Sat Dec 03 10:15:30 2011 +0000 @@ -51,6 +51,9 @@ /* * Save the mach dependent part of the process state. + * NB: the caller should mmuflushtlb after procsave(). + * procsave/procrestore don't touch the mmu, they + * care about fpu, mostly. */ void procsave(Proc *p) @@ -61,10 +64,6 @@ p->pcycles += t; fpuprocsave(p); - - /* - */ - mmuflushtlb(m->pml4->pa); } static void @@ -94,10 +93,14 @@ /* * put the processor in the halt state if we've no processes to run. * an interrupt will get us going again. + * The boot TC in nix can't halt, because it must stay alert in + * case an AC makes a handler process ready. + * We should probably use mwait in that case. */ void idlehands(void) { - if(conf.nmach == 1) +if(0) + if(m->machno != 0) halt(); } diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/dat.h --- a/sys/src/nix/k10/dat.h Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/dat.h Sat Dec 03 10:15:30 2011 +0000 @@ -39,14 +39,23 @@ /* * machine dependent definitions used by ../port/portdat.h */ + + struct Lock { - u32int key; + union{ + u64int key; + struct{ + u32int ticket; + u32int users; + }; + }; int isilock; Mpl pl; uintptr pc; Proc* p; Mach* m; + uvlong lockcycles; }; struct Label @@ -113,7 +122,6 @@ usize upages; /* user page pool */ ulong copymode; /* 0 is copy on write, 1 is copy on reference */ ulong ialloc; /* max interrupt time allocation in bytes */ - ulong pipeqsize; /* size in bytes of pipe queues */ ulong nimage; /* number of page cache image headers */ ulong nswap; /* number of swap pages */ int nswppo; /* max # of pageouts per segment pass */ @@ -271,20 +279,23 @@ int mmuflush; /* make current proc flush it's mmu state */ int ilockdepth; Perf perf; /* performance counters */ - + int inidle; /* profiling */ int lastintr; Lock apictimerlock; uvlong cyclefreq; /* Frequency of user readable cycle counter */ vlong cpuhz; int cpumhz; + u64int rdtsc; + + Sched* sch; /* scheduler used */ + + Lock pmclock; + PmcCtr pmc[PmcMaxCtrs]; MFPU; MCPU; - Lock pmclock; - PmcCtr pmc[PmcMaxCtrs]; - NIX; }; @@ -325,6 +336,7 @@ uintptr vmunused; /* 1st unused va */ uintptr vmunmapped; /* 1st unmapped va */ uintptr vmend; /* 1st unusable va */ + u64int epoch; /* crude time synchronisation */ }; uchar syspage[4*KiB]; }; diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/devacpi.c --- a/sys/src/nix/k10/devacpi.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/devacpi.c Sat Dec 03 10:15:30 2011 +0000 @@ -773,6 +773,27 @@ return 0; } +/* + * we use mp->machno (or index in Mach array) as the identifier, + * but ACPI relies on the apic identifier. + */ +int +corecolor(int core) +{ + Srat *sl; + Mach *m; + + if(core < 0 || core >= MACHMAX) + return -1; + m = MACHP(core); + if(m == nil) + return -1; + for(sl = srat; sl != nil; sl = sl->next) + if(sl->type == SRlapic && sl->lapic.apic == m->apicno) + return sl->lapic.dom; + return -1; +} + static void dumpmadt(Madt *apics) { diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/devarch.c --- a/sys/src/nix/k10/devarch.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/devarch.c Sat Dec 03 10:15:30 2011 +0000 @@ -516,64 +516,25 @@ static long cputyperead(Chan*, void *a, long n, vlong off) { - char str[32]; + char buf[512], *s, *e; + int i, k; - snprint(str, sizeof(str), "%s %ud\n", "AMD64", m->cpumhz); - return readstr(off, a, n, str); -} - -static long -cpuidread(Chan*, void *a, long n, vlong off) -{ - char str[64]; - int nstr, ns; - int i; - - nstr = sizeof(str); - for(i = ns = 0; i < 4; i++) - ns += snprint(str+ns, nstr-ns, "%08x ", m->cpuinfo[1][i]); - - return readstr(off, a, n, str); -} - -static long -linuxread(Chan*, void* a, long n, vlong offset) -{ - char str[32]; - - snprint(str, sizeof(str), "linuxexec %d linux %d", up->linuxexec, up->linux); - - return readstr(offset, a, n, str); -} - -static long -linuxwrite(Chan*, void* a, long, vlong offset) -{ - char *cp; - unsigned int val; - - if (offset) - error("linuxwrite: offset 0 set"); - - cp = a; - val = strtoul(cp, 0, 16); - if (val == 0) - up->linuxexec = up->linux = 0; - else if (val & 1) - up->linuxexec = val; - else if (val & 2) - up->linux = val; - else error("linuxwrite: val 0 or val&3 == 0"); - - return 1; + e = buf+sizeof buf; + s = seprint(buf, e, "%s %ud\n", "AMD64", m->cpumhz); + k = m->ncpuinfoe - m->ncpuinfos; + if(k > 4) + k = 4; + for(i = 0; i < k; i++) + s = seprint(s, e, "%#8.8ux %#8.8ux %#8.8ux %#8.8ux\n", + m->cpuinfo[i][0], m->cpuinfo[i][1], + m->cpuinfo[i][2], m->cpuinfo[i][3]); + return readstr(off, a, n, buf); } void archinit(void) { addarchfile("cputype", 0444, cputyperead, nil); - addarchfile("cpuidread", 0444, cpuidread, nil); - addarchfile("linux", 0644, linuxread, linuxwrite); } void diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/fns.h --- a/sys/src/nix/k10/fns.h Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/fns.h Sat Dec 03 10:15:30 2011 +0000 @@ -27,8 +27,10 @@ void cgaconsputs(char*, int); void cgainit(void); void cgapost(int); +void checkpa(char*, uintmem); #define clearmmucache() /* x86 doesn't have one */ void (*coherence)(void); +int corecolor(int); u32int cpuid(u32int, u32int, u32int[4]); int dbgprint(char*, ...); int decref(Ref*); @@ -180,11 +182,14 @@ int cas32(void*, u32int, u32int); int cas64(void*, u64int, u64int); int tas32(void*); +u64int fas64(u64int*, u64int); #define CASU(p, e, n) cas64((p), (u64int)(e), (u64int)(n)) #define CASV(p, e, n) cas64((p), (u64int)(e), (u64int)(n)) +#define CASP(p, e, n) cas64((p), (u64int)(e), (u64int)(n)) #define CASW(p, e, n) cas32((p), (e), (n)) #define TAS(addr) tas32((addr)) +#define FASP(p, v) ((void*)fas64((u64int*)(p), (u64int)(v))) void touser(uintptr); void syscallentry(void); @@ -223,7 +228,7 @@ * archk10.c */ extern void millidelay(int); -void k10mwait(void *); +extern void k10mwait(void*); /* * i8259.c diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/k8cpu --- a/sys/src/nix/k10/k8cpu Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/k8cpu Sat Dec 03 10:15:30 2011 +0000 @@ -17,6 +17,7 @@ acpi tube zp + ws # add to get cec in the kernel # cec @@ -135,7 +136,6 @@ i8254 i8259 kbd - linuxsyscall linuxarchsys main map memory @@ -160,7 +160,6 @@ image latin1 nixcall - linuxsysemu page pager parse diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/k8cpufs --- a/sys/src/nix/k10/k8cpufs Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/k8cpufs Sat Dec 03 10:15:30 2011 +0000 @@ -134,7 +134,6 @@ i8254 i8259 kbd - linuxsyscall main map memory @@ -179,7 +178,6 @@ syssem syszio nixcall - linuxsysemu # #dir diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/l64syscall.s --- a/sys/src/nix/k10/l64syscall.s Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/l64syscall.s Sat Dec 03 10:15:30 2011 +0000 @@ -25,28 +25,8 @@ */ TEXT syscallentry(SB), 1, $-4 SWAPGS - /* above all, do no harm. Basically, push enough here to test the mode, then jump to linux if we're in that mode. - * sadly, this adds a bit of overhead, but there's not much we can do. We also want to make this decision very early - * in the game, it just gets harder and harder to clean up the user stack pointer the longer we delay it. - */ - PUSHQ R15 - PUSHQ R14 - PUSHQ AX - /* - * RMACH: R15; RUSER: r14. - * First thing in here trashes them both; need to be saved for Linux. - */ BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ MOVQ 16(RMACH), RUSER /* m->proc */ - /* are we a linux proc? - * Note conservative behavior on AX here -- maybe not needed - */ - MOVL 0xdac(R14),AX - CMPL AX,$0x0 - JEQ plan9call - JMP dolinuxsyscall -plan9call: - ADDQ $(3*8), SP MOVQ SP, R13 MOVQ 16(RUSER), SP /* m->proc->kstack */ ADDQ $KSTACK, SP @@ -56,7 +36,6 @@ PUSHQ $SSEL(SiUCS, SsRPL3) /* old code segment */ PUSHQ CX /* old ip */ - /* registers saved at this point: R15, R14, R13, AX (user stack); SS, R13, R11, CS, CX (kernel stack) */ SUBQ $(18*8), SP /* unsaved registers */ MOVW $SSEL(SiUDS, SsRPL3), (15*8+0)(SP) @@ -91,109 +70,3 @@ TEXT sysrforkret(SB), 1, $-4 MOVQ $0, AX JMP _syscallreturn - -/* just push it all on the linux stack. It's what the linux code *should* have - * done - */ -dolinuxsyscall: - /* restore AX (system call #)*/ - POPQ AX - /* don't bother saving it, it gets trashed on returned */ - /* stack is now R15 and R14 at TOS */ - /* for linux support */ - /* arg order is di, si, dx, r10, r8, r9 */ - PUSHQ R13 - PUSHQ R12 - PUSHQ R11 - PUSHQ R10 - PUSHQ R9 - PUSHQ R8 - PUSHQ BP - PUSHQ DI - PUSHQ SI - PUSHQ DX - PUSHQ CX - PUSHQ BX - MOVQ SP, R13 - MOVQ 16(RUSER), SP /* m->proc->kstack */ - ADDQ $KSTACK, SP - PUSHQ $SSEL(SiUDS, SsRPL3) /* old stack segment */ - /* note the "user SP is not right at this point. Need to readjust it. */ - PUSHQ R13 /* old sp */ - PUSHQ R11 /* old flags */ - PUSHQ $SSEL(SiUCS, SsRPL3) /* old code segment */ - PUSHQ CX /* old ip */ - - /* USER SP is now in R13 */ - /* we need to get a few things from it ... */ - SUBQ $(18*8), SP /* unsaved registers */ - MOVQ AX, (0*8)(SP) - MOVQ DI,(5*8)(SP) - MOVQ SI, (4*8)(SP) - MOVQ DX, (3*8)(SP) - MOVQ R10, (9*8)(SP) - MOVQ R8, (7*8)(SP) - MOVQ R9, (8*8)(SP) - MOVL $FSbase, RARG - CALL rdmsr(SB) - MOVL AX, (10*8+0)(SP) // use the unused R11 slot - MOVW DS, (15*8+0)(SP) - MOVW ES, (15*8+2)(SP) - MOVW FS, (15*8+4)(SP) - MOVW GS, (15*8+6)(SP) - - PUSHQ SP /* Ureg* */ - PUSHQ AX /* system call number */ - CALL linuxsyscall(SB) - -TEXT linuxsyscallreturn(SB), 1, $-4 - /* TODO: make sure syscall return is in the right place. */ - MOVQ 16(SP), AX /* Ureg.ax */ - MOVQ (16+6*8)(SP), BP /* Ureg.bp */ -_linuxsyscallreturn: - MOVL (16+10*8)(SP), R11 /* R11 for wrmsr below */ - ADDQ $(17*8), SP /* registers + arguments */ - CLI - SWAPGS - MOVW 0(SP), DS - MOVW 2(SP), ES - MOVW 4(SP), FS - MOVW 6(SP), GS - PUSHQ AX - MOVL $FSbase, RARG - XORQ CX, CX - MOVL R11, CX - PUSHQ CX - /* dummy */ - PUSHQ RARG - CALL wrmsr(SB) - POPQ AX - POPQ AX - POPQ AX - - MOVQ 24(SP), CX /* ip */ - MOVQ 40(SP), R11 /* flags */ - - MOVQ 48(SP), SP /* sp */ - - /* now we have to pop. */ - POPQ BX - POPQ CX - POPQ DX - POPQ SI - POPQ DI - POPQ BP - POPQ R8 - POPQ R9 - POPQ R10 - POPQ R11 - POPQ R12 - POPQ R13 - POPQ R14 - POPQ R15 - - BYTE $0x48; SYSRET /* SYSRETQ */ - -TEXT linuxsysrforkret(SB), 1, $-4 - MOVQ $0, AX - JMP _linuxsyscallreturn diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/l64v.s --- a/sys/src/nix/k10/l64v.s Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/l64v.s Sat Dec 03 10:15:30 2011 +0000 @@ -308,6 +308,11 @@ XCHGL AX, (RARG) /* */ RET +TEXT fas64(SB), 1, $-4 + MOVQ p+8(FP), AX + LOCK; XCHGQ AX, (RARG) /* */ + RET + TEXT cas32(SB), 1, $-4 MOVL exp+8(FP), AX MOVL new+16(FP), BX @@ -349,21 +354,6 @@ MOVL $0, AX /* return 0 */ RET -/* - * Wait for something to happen. - */ -TEXT halt(SB), 1, $-4 - CLI - CMPL nrdy(SB), $0 - JEQ _nothingready - STI - RET - -_nothingready: - STI - HLT - RET - TEXT hardhalt(SB), 1, $-4 STI HLT diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/linuxarchsys.c --- a/sys/src/nix/k10/linuxarchsys.c Thu Dec 01 13:47:08 2011 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -#include "u.h" -#include "../port/lib.h" -#include "mem.h" -#include "dat.h" -#include "fns.h" - -#include "../port/error.h" -#include -#include "ureg.h" -#include "amd64.h" - -/* from linux */ -#define ARCH_SET_GS 0x1001 -#define ARCH_SET_FS 0x1002 -#define ARCH_GET_FS 0x1003 -#define ARCH_GET_GS 0x1004 - -void -arch_prctl(Ar0*ar0, Ureg *ureg, va_list list) -{ - uintptr va; - int code; - code = va_arg(list, int); - va = va_arg(list, uintptr); - if (up->linux & 128) print("%d:arch_prctl code %x va %p: ", up->pid, code, va); - /* always make sure it's a valid address, no matter what the command */ - validaddr((void *)va, 8, code > ARCH_SET_FS); - switch(code) { - case ARCH_SET_GS: - case ARCH_GET_GS: - error("not yet"); - break; - case ARCH_SET_FS: - memmove(&ureg->r11, &va, 4); - ar0->i = 0; - break; - case ARCH_GET_FS: - memmove((void *)va, &ureg->r11, 4); - ar0->i = 0; - break; - default: - error("Bad code"); - break; - } -} - diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/linuxsyscall.c --- a/sys/src/nix/k10/linuxsyscall.c Thu Dec 01 13:47:08 2011 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,251 +0,0 @@ -#include "u.h" -#include "../port/lib.h" -#include "mem.h" -#include "dat.h" -#include "fns.h" - -#include "../port/error.h" - -#include "/sys/src/libc/9syscall/sys.h" -#include "linuxsystab.h" - -#include -#include "amd64.h" -#include "ureg.h" - -/* linux calling convention is callee-save. We are caller-save. - * But that issue is covered in trap, which saves everything. - * so we only need to know the calling conventions. - * when we call a(1,2,3,4,5,6). NO on-stack params. - movl $6, %r9d - movl $5, %r8d - movl $4, %r10 - movl $3, %edx - movl $2, %esi - movl $1, %edi - * syscall is in %ax however. - * return is in %ax - */ - -void -linuxsyscall(unsigned int, Ureg* ureg) -{ - void noted(Ureg*, uintptr); - void arch_prctl(Ar0 *ar, Ureg *ureg, va_list list); - unsigned int scallnr; - void notify(Ureg *); - char *e; - uintptr sp; - int s; - Ar0 ar0; - static Ar0 zar0; - int i; - uintptr linuxargs[6]; - -//print("linuxsyscall: wrong %d\n", wrong); -//dumpstack(); - if(!userureg(ureg)) - panic("syscall: cs %#llux\n", ureg->cs); - - cycles(&up->kentry); - - m->syscall++; - up->nsyscall++; - up->nqsyscall++; - up->insyscall = 1; - up->pc = ureg->ip; - up->dbgreg = ureg; - - if(up->procctl == Proc_tracesyscall){ - up->procctl = Proc_stopme; - procctl(up); - } - scallnr = ureg->ax; -//print("# %d\n", scallnr); - up->scallnr = scallnr; - - if(scallnr == 56) - fpusysrfork(ureg); - spllo(); - - sp = ureg->sp; - up->nerrlab = 0; - ar0 = zar0; - if(!waserror()){ - int printarg; - char *name = scallnr < nlinuxsyscall ? linuxsystab[scallnr].n : "Unknown"; - if(scallnr >= nlinuxsyscall || linuxsystab[scallnr].f == nil){ - pprint("bad linux sys call number %d(%s) pc %#ullx max %d\n", - scallnr, name, ureg->ip, nlinuxsyscall); - postnote(up, 1, "sys: bad sys call", NDebug); - error(Ebadarg); - } - - if(sp < (USTKTOP-BIGPGSZ) || sp > (USTKTOP-sizeof(up->arg)-BY2SE)) - validaddr(UINT2PTR(sp), sizeof(up->arg)+BY2SE, 0); - - up->psstate = linuxsystab[scallnr].n; - - /* note: arch_prctl needs ureg. Unless someone thinks of a better way. - * one way is to change the way we construct linuxargs, - * and add ureg is scallnr == 158. The current if below is a hack, - * I know. - */ - linuxargs[0] = ureg->di; - linuxargs[1] = ureg->si; - linuxargs[2] = ureg->dx; - linuxargs[3] = ureg->r10; - linuxargs[4] = ureg->r8; - linuxargs[5] = ureg->r9; - - if (up->linux & 16) {print("%d:linux: %s: pc %#p ", up->pid, linuxsystab[scallnr].n,(void *)ureg->ip); - for(printarg = 0; printarg < linuxsystab[scallnr].narg; printarg++) - print("%p ", (void *)linuxargs[printarg]); - print("\n"); - } - if (up->linux&32) dumpregs(ureg); - /* this one is special .. sigh */ - if (scallnr == 158) - arch_prctl(&ar0, ureg, (va_list)linuxargs); - else - linuxsystab[scallnr].f(&ar0, (va_list)linuxargs); - if (up->linux & 64){print("AFTER: ");dumpregs(ureg);} - poperror(); - }else{ - /* failure: save the error buffer for errstr */ - if (up->linux & 16){ - int i; - print("Error path in linuxsyscall: %#ux, %s\n", scallnr, up->syserrstr ? up->syserrstr : "no errstr"); - for(i = 0; i < nelem(linuxargs); i++) - print("%d: %#p\n", i, linuxargs[i]); - dumpregs(ureg); - } - e = up->syserrstr; - up->syserrstr = up->errstr; - up->errstr = e; - if (scallnr < nlinuxsyscall) - ar0 = linuxsystab[scallnr].r; - else - ar0.i = -1; - } - - /* normal amd64 kernel does not have this; remove? */ - if(up->nerrlab){ - print("bad errstack [%d]: %d extra\n", scallnr, up->nerrlab); - for(i = 0; i < NERR; i++) - print("sp=%#ullx pc=%#ullx\n", - up->errlab[i].sp, up->errlab[i].pc); - panic("error stack"); - } - - /* - * NIX: for the execac() syscall, what follows is done within - * the system call, because it never returns. - * See acore.c:/^retfromsyscall - */ - - noerrorsleft(); - /* - * Put return value in frame. - */ - ureg->ax = ar0.p; - if (up->linux & 16)print("%d:Ret from syscall %#lx\n", up->pid, (unsigned long) ar0.p); - if(up->procctl == Proc_tracesyscall){ - up->procctl = Proc_stopme; - s = splhi(); - procctl(up); - splx(s); - }else if(up->procctl == Proc_totc || up->procctl == Proc_toac) - procctl(up); - - - up->insyscall = 0; - up->psstate = 0; - - if(scallnr == NOTED) - noted(ureg, *(uintptr*)(sp+BY2SE)); - - splhi(); - - if(scallnr != 56 && (up->procctl || up->nnote)) - notify(ureg); - - /* if we delayed sched because we held a lock, sched now */ - if(up->delaysched){ - sched(); - splhi(); - } - kexit(ureg); -} - -void* -linuxsysexecregs(uintptr entry, ulong ssize, ulong nargs) -{ - int i; - uvlong *l; - Ureg *ureg; - uintptr *sp; - - if(!up->linux) - panic("linuxsysexecregs: up->linux %d\n", up->linux); - - /* need to figure out linux exec conventions :-( */ - sp = (uintptr*)(USTKTOP - ssize); - *--sp = nargs; - - ureg = up->dbgreg; - l = &ureg->bp; - print("Starting linux proc pc %#ullx sp %p nargs %ld\n", - ureg->ip, sp+1, nargs); - - /* set up registers for linux */ - /* we are dying in getenv. */ - /* because glibc does not follow the PPC ABI. */ - /* you have to push the env, then the args. */ - /* so to do this, well, we'll push an empty env on stack, i.e. shift - * the args down one. stack grows down. We already made space - * when we pushed nargs. - */ - memmove(sp, sp+1, nargs * sizeof(*sp)); - sp[nargs] = 0; - *--sp = nargs; - for(i = 7; i < 16; i++) - *l++ = 0xdeadbeef + (i*0x110); - - ureg->sp = PTR2UINT(sp); - ureg->ip = entry; - print("Starting linux proc pc %#ullx\n", ureg->ip); - - /* - */ - return UINT2PTR(nargs); -} - -void -linuxsysrforkchild(Proc* child, Proc* parent) -{ - Ureg *cureg; - - /* don't clear linux any more. linux procs can now fork */ - child->linuxexec = 0; - /* - * Add 3*BY2SE to the stack to account for - * - the return PC - * - trap's arguments (syscallnr, ureg) - */ - child->sched.sp = PTR2UINT(child->kstack+KSTACK-(sizeof(Ureg)+3*BY2SE)); - child->sched.pc = PTR2UINT(sysrforkret); - - cureg = (Ureg*)(child->sched.sp+3*BY2SE); - memmove(cureg, parent->dbgreg, sizeof(Ureg)); - - /* Things from bottom of syscall which were never executed */ - child->psstate = 0; - child->insyscall = 0; - - cureg->ax = 0; - child->hang = 1; - - dumpregs(cureg); - fpusysrforkchild(child, parent); -} diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/linuxsystab.h --- a/sys/src/nix/k10/linuxsystab.h Thu Dec 01 13:47:08 2011 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,363 +0,0 @@ -typedef void Syscall(Ar0*, va_list); - -Syscall linuxuname; -Syscall linuxbrk; -Syscall linuxopen; -Syscall linuxclose; -Syscall syssegbrk; -Syscall linuxwritev; -Syscall linuxsocketcall; -Syscall linuxgeteuid; -Syscall linuxmmap; -Syscall linuxexit; -Syscall linuxsettid; -Syscall sigaction; -Syscall rt_sigprocmask; -Syscall fstat64; -Syscall returnok; -Syscall futex; -Syscall linuxgetpersonality; -Syscall linuxr2c; -Syscall linuxc2r; -Syscall linuxprocid; -Syscall linuxranks2coords; -Syscall linuxmprotect; -Syscall linuxclone; -Syscall gasm; -Syscall linuxcga; -Syscall getrusage; -void arch_prctl(Ar0*,Ureg*,va_list); //see linux_syscall.c -extern Syscall sys_write; -extern Syscall sys_read; -extern Syscall sysopen; -extern Syscall syspread; -extern Syscall sysclose; -struct syscall { - char* n; - Syscall*f; - int narg; - Ar0 r; -}; - -struct syscall linuxsystab[] = { - [0] {"read", sys_read, 3, {.i = 0}}, - [1] {"write", sys_write, 3, {.i = -1}}, - [2] {"linuxopen", sysopen, 2, {.i = -1}},/* note: can just use sysopen instead of linuxopen! */ - [3] {"linuxclose", sysclose, 1, {.p = (void *)-1}}, - [102] {"getuid", linuxgeteuid, 0, {.i = -1}}, - [12] {"linuxbrk", linuxbrk, 1, {.i = -1}}, - [104] {"getgid", linuxgeteuid, 0, {.i = -1}}, - [107] {"geteuid", linuxgeteuid, 0, {.i = -1}}, - [108] {"getegid", linuxgeteuid, 0, {.i = -1}}, - [98] {"getrusage", getrusage, 1, {.i = 0}}, - [9] {"mmap", linuxmmap, 5, {.i = -1}}, - [63] {"linuxuname", linuxuname, 1, {.i = -1}}, - [13] {"sigaction", sigaction, 2, {.i = -1}}, - [14] {"rt_sigprocmask", rt_sigprocmask, 4, {.l = -1}}, - [60] {"linuxexit", linuxexit, 1, {.i = -1}}, - [17] {"pread64", syspread, 5, {.i = 0}}, -// [102] {"socketcall", linuxsocketcall, 1, {.i = 0}}, - [56] {"clone", linuxclone, 1, {.i = 0}}, - [10] {"mprotect", linuxmprotect, 1, {.i = 0}}, - [20] {"writev", linuxwritev, 1, {.i = 0}}, -// [197] {"fstat64", fstat64, 1, {.i = -1}}, -// [221] {"futex", futex, 1, {.i = 0}}, - [158] {"arch_prctl", /* fix later */futex, 2, {.p = (void *)-1}}, - -/* leave blank lines for things you move up above -- the holes make it easier to see what's been supported. */ - [4] {"stat", nil, 1, {.p = (void *)-1}}, - [5] {"fstat", nil, 1, {.p = (void *)-1}}, - [6] {"lstat", nil, 1, {.p = (void *)-1}}, - [7] {"poll", nil, 1, {.p = (void *)-1}}, - [8] {"lseek", nil, 1, {.p = (void *)-1}}, - - [11] {"munmap", nil, 1, {.p = (void *)-1}}, - - [15] {"rt_sigreturn", nil, 1, {.p = (void *)-1}}, - [16] {"ioctl", nil, 1, {.p = (void *)-1}}, - - [18] {"pwrite64", nil, 1, {.p = (void *)-1}}, - [19] {"readv", nil, 1, {.p = (void *)-1}}, - - [21] {"access", nil, 1, {.p = (void *)-1}}, - [22] {"pipe", nil, 1, {.p = (void *)-1}}, - [23] {"select", nil, 1, {.p = (void *)-1}}, - [24] {"sched_yield", nil, 1, {.p = (void *)-1}}, - [25] {"mremap", nil, 1, {.p = (void *)-1}}, - [26] {"msync", nil, 1, {.p = (void *)-1}}, - [27] {"mincore", nil, 1, {.p = (void *)-1}}, - [28] {"madvise", nil, 1, {.p = (void *)-1}}, - [29] {"shmget", nil, 1, {.p = (void *)-1}}, - [30] {"shmat", nil, 1, {.p = (void *)-1}}, - [31] {"shmctl", nil, 1, {.p = (void *)-1}}, - [32] {"dup", nil, 1, {.p = (void *)-1}}, - [33] {"dup2", nil, 1, {.p = (void *)-1}}, - [34] {"pause", nil, 1, {.p = (void *)-1}}, - [35] {"nanosleep", nil, 1, {.p = (void *)-1}}, - [36] {"getitimer", nil, 1, {.p = (void *)-1}}, - [37] {"alarm", nil, 1, {.p = (void *)-1}}, - [38] {"setitimer", nil, 1, {.p = (void *)-1}}, - [39] {"getpid", nil, 1, {.p = (void *)-1}}, - [40] {"sendfile", nil, 1, {.p = (void *)-1}}, - [41] {"socket", nil, 1, {.p = (void *)-1}}, - [42] {"connect", nil, 1, {.p = (void *)-1}}, - [43] {"accept", nil, 1, {.p = (void *)-1}}, - [44] {"sendto", nil, 1, {.p = (void *)-1}}, - [45] {"recvfrom", nil, 1, {.p = (void *)-1}}, - [46] {"sendmsg", nil, 1, {.p = (void *)-1}}, - [47] {"recvmsg", nil, 1, {.p = (void *)-1}}, - [48] {"shutdown", nil, 1, {.p = (void *)-1}}, - [49] {"bind", nil, 1, {.p = (void *)-1}}, - [50] {"listen", nil, 1, {.p = (void *)-1}}, - [51] {"getsockname", nil, 1, {.p = (void *)-1}}, - [52] {"getpeername", nil, 1, {.p = (void *)-1}}, - [53] {"socketpair", nil, 1, {.p = (void *)-1}}, - [54] {"setsockopt", nil, 1, {.p = (void *)-1}}, - [55] {"getsockopt", nil, 1, {.p = (void *)-1}}, - - [57] {"fork", nil, 1, {.p = (void *)-1}}, - [58] {"vfork", nil, 1, {.p = (void *)-1}}, - [59] {"execve", nil, 1, {.p = (void *)-1}}, - - [61] {"wait4", nil, 1, {.p = (void *)-1}}, - [62] {"kill", nil, 1, {.p = (void *)-1}}, - - [64] {"semget", nil, 1, {.p = (void *)-1}}, - [65] {"semop", nil, 1, {.p = (void *)-1}}, - [66] {"semctl", nil, 1, {.p = (void *)-1}}, - [67] {"shmdt", nil, 1, {.p = (void *)-1}}, - [68] {"msgget", nil, 1, {.p = (void *)-1}}, - [69] {"msgsnd", nil, 1, {.p = (void *)-1}}, - [70] {"msgrcv", nil, 1, {.p = (void *)-1}}, - [71] {"msgctl", nil, 1, {.p = (void *)-1}}, - [72] {"fcntl", nil, 1, {.p = (void *)-1}}, - [73] {"flock", nil, 1, {.p = (void *)-1}}, - [74] {"fsync", nil, 1, {.p = (void *)-1}}, - [75] {"fdatasync", nil, 1, {.p = (void *)-1}}, - [76] {"truncate", nil, 1, {.p = (void *)-1}}, - [77] {"ftruncate", nil, 1, {.p = (void *)-1}}, - [78] {"getdents", nil, 1, {.p = (void *)-1}}, - [79] {"getcwd", nil, 1, {.p = (void *)-1}}, - [80] {"chdir", nil, 1, {.p = (void *)-1}}, - [81] {"fchdir", nil, 1, {.p = (void *)-1}}, - [82] {"rename", nil, 1, {.p = (void *)-1}}, - [83] {"mkdir", nil, 1, {.p = (void *)-1}}, - [84] {"rmdir", nil, 1, {.p = (void *)-1}}, - [85] {"creat", nil, 1, {.p = (void *)-1}}, - [86] {"link", nil, 1, {.p = (void *)-1}}, - [87] {"unlink", nil, 1, {.p = (void *)-1}}, - [88] {"symlink", nil, 1, {.p = (void *)-1}}, - [89] {"readlink", nil, 1, {.p = (void *)-1}}, - [90] {"chmod", nil, 1, {.p = (void *)-1}}, - [91] {"fchmod", nil, 1, {.p = (void *)-1}}, - [92] {"chown", nil, 1, {.p = (void *)-1}}, - [93] {"fchown", nil, 1, {.p = (void *)-1}}, - [94] {"lchown", nil, 1, {.p = (void *)-1}}, - [95] {"umask", nil, 1, {.p = (void *)-1}}, - [96] {"gettimeofday", nil, 1, {.p = (void *)-1}}, - [97] {"getrlimit", nil, 1, {.p = (void *)-1}}, - - [99] {"sysinfo", nil, 1, {.p = (void *)-1}}, - [100] {"times", nil, 1, {.p = (void *)-1}}, - [101] {"ptrace", nil, 1, {.p = (void *)-1}}, - - [103] {"syslog", nil, 1, {.p = (void *)-1}}, - - [105] {"setuid", nil, 1, {.p = (void *)-1}}, - [106] {"setgid", nil, 1, {.p = (void *)-1}}, - - [109] {"setpgid", nil, 1, {.p = (void *)-1}}, - [110] {"getppid", nil, 1, {.p = (void *)-1}}, - [111] {"getpgrp", nil, 1, {.p = (void *)-1}}, - [112] {"setsid", nil, 1, {.p = (void *)-1}}, - [113] {"setreuid", nil, 1, {.p = (void *)-1}}, - [114] {"setregid", nil, 1, {.p = (void *)-1}}, - [115] {"getgroups", nil, 1, {.p = (void *)-1}}, - [116] {"setgroups", nil, 1, {.p = (void *)-1}}, - [117] {"setresuid", nil, 1, {.p = (void *)-1}}, - [118] {"getresuid", nil, 1, {.p = (void *)-1}}, - [119] {"setresgid", nil, 1, {.p = (void *)-1}}, - [120] {"getresgid", nil, 1, {.p = (void *)-1}}, - [121] {"getpgid", nil, 1, {.p = (void *)-1}}, - [122] {"setfsuid", nil, 1, {.p = (void *)-1}}, - [123] {"setfsgid", nil, 1, {.p = (void *)-1}}, - [124] {"getsid", nil, 1, {.p = (void *)-1}}, - [125] {"capget", nil, 1, {.p = (void *)-1}}, - [126] {"capset", nil, 1, {.p = (void *)-1}}, - [127] {"rt_sigpending", nil, 1, {.p = (void *)-1}}, - [128] {"rt_sigtimedwait", nil, 1, {.p = (void *)-1}}, - [129] {"rt_sigqueueinfo", nil, 1, {.p = (void *)-1}}, - [130] {"rt_sigsuspend", nil, 1, {.p = (void *)-1}}, - [131] {"sigaltstack", nil, 1, {.p = (void *)-1}}, - [132] {"utime", nil, 1, {.p = (void *)-1}}, - [133] {"mknod", nil, 1, {.p = (void *)-1}}, - [134] {"uselib", nil, 1, {.p = (void *)-1}}, - [135] {"personality", nil, 1, {.p = (void *)-1}}, - [136] {"ustat", nil, 1, {.p = (void *)-1}}, - [137] {"statfs", nil, 1, {.p = (void *)-1}}, - [138] {"fstatfs", nil, 1, {.p = (void *)-1}}, - [139] {"sysfs", nil, 1, {.p = (void *)-1}}, - [140] {"getpriority", nil, 1, {.p = (void *)-1}}, - [141] {"setpriority", nil, 1, {.p = (void *)-1}}, - [142] {"sched_setparam", nil, 1, {.p = (void *)-1}}, - [143] {"sched_getparam", nil, 1, {.p = (void *)-1}}, - [144] {"sched_setscheduler", nil, 1, {.p = (void *)-1}}, - [145] {"sched_getscheduler", nil, 1, {.p = (void *)-1}}, - [146] {"sched_get_priority_max", nil, 1, {.p = (void *)-1}}, - [147] {"sched_get_priority_min", nil, 1, {.p = (void *)-1}}, - [148] {"sched_rr_get_interval", nil, 1, {.p = (void *)-1}}, - [149] {"mlock", nil, 1, {.p = (void *)-1}}, - [150] {"munlock", nil, 1, {.p = (void *)-1}}, - [151] {"mlockall", nil, 1, {.p = (void *)-1}}, - [152] {"munlockall", nil, 1, {.p = (void *)-1}}, - [153] {"vhangup", nil, 1, {.p = (void *)-1}}, - [154] {"modify_ldt", nil, 1, {.p = (void *)-1}}, - [155] {"pivot_root", nil, 1, {.p = (void *)-1}}, - [156] {"_sysctl", nil, 1, {.p = (void *)-1}}, - [157] {"prctl", nil, 1, {.p = (void *)-1}}, - - [159] {"adjtimex", nil, 1, {.p = (void *)-1}}, - [160] {"setrlimit", nil, 1, {.p = (void *)-1}}, - [161] {"chroot", nil, 1, {.p = (void *)-1}}, - [162] {"sync", nil, 1, {.p = (void *)-1}}, - [163] {"acct", nil, 1, {.p = (void *)-1}}, - [164] {"settimeofday", nil, 1, {.p = (void *)-1}}, - [165] {"mount", nil, 1, {.p = (void *)-1}}, - [166] {"umount2", nil, 1, {.p = (void *)-1}}, - [167] {"swapon", nil, 1, {.p = (void *)-1}}, - [168] {"swapoff", nil, 1, {.p = (void *)-1}}, - [169] {"reboot", nil, 1, {.p = (void *)-1}}, - [170] {"sethostname", nil, 1, {.p = (void *)-1}}, - [171] {"setdomainname", nil, 1, {.p = (void *)-1}}, - [172] {"iopl", nil, 1, {.p = (void *)-1}}, - [173] {"ioperm", nil, 1, {.p = (void *)-1}}, - [174] {"create_module", nil, 1, {.p = (void *)-1}}, - [175] {"init_module", nil, 1, {.p = (void *)-1}}, - [176] {"delete_module", nil, 1, {.p = (void *)-1}}, - [177] {"get_kernel_syms", nil, 1, {.p = (void *)-1}}, - [178] {"query_module", nil, 1, {.p = (void *)-1}}, - [179] {"quotactl", nil, 1, {.p = (void *)-1}}, - [180] {"nfsservctl", nil, 1, {.p = (void *)-1}}, - [181] {"getpmsg", nil, 1, {.p = (void *)-1}}, - [182] {"putpmsg", nil, 1, {.p = (void *)-1}}, - [183] {"afs_syscall", nil, 1, {.p = (void *)-1}}, - [184] {"tuxcall", nil, 1, {.p = (void *)-1}}, - [185] {"security", nil, 1, {.p = (void *)-1}}, - [186] {"gettid", nil, 1, {.p = (void *)-1}}, - [187] {"readahead", nil, 1, {.p = (void *)-1}}, - [188] {"setxattr", nil, 1, {.p = (void *)-1}}, - [189] {"lsetxattr", nil, 1, {.p = (void *)-1}}, - [190] {"fsetxattr", nil, 1, {.p = (void *)-1}}, - [191] {"getxattr", nil, 1, {.p = (void *)-1}}, - [192] {"lgetxattr", nil, 1, {.p = (void *)-1}}, - [193] {"fgetxattr", nil, 1, {.p = (void *)-1}}, - [194] {"listxattr", nil, 1, {.p = (void *)-1}}, - [195] {"llistxattr", nil, 1, {.p = (void *)-1}}, - [196] {"flistxattr", nil, 1, {.p = (void *)-1}}, - [197] {"removexattr", nil, 1, {.p = (void *)-1}}, - [198] {"lremovexattr", nil, 1, {.p = (void *)-1}}, - [199] {"fremovexattr", nil, 1, {.p = (void *)-1}}, - [200] {"tkill", nil, 1, {.p = (void *)-1}}, - [201] {"time", nil, 1, {.p = (void *)-1}}, - [202] {"futex", nil, 1, {.p = (void *)-1}}, - [203] {"sched_setaffinity", nil, 1, {.p = (void *)-1}}, - [204] {"sched_getaffinity", nil, 1, {.p = (void *)-1}}, - [205] {"set_thread_area", nil, 1, {.p = (void *)-1}}, - [206] {"io_setup", nil, 1, {.p = (void *)-1}}, - [207] {"io_destroy", nil, 1, {.p = (void *)-1}}, - [208] {"io_getevents", nil, 1, {.p = (void *)-1}}, - [209] {"io_submit", nil, 1, {.p = (void *)-1}}, - [210] {"io_cancel", nil, 1, {.p = (void *)-1}}, - [211] {"get_thread_area", nil, 1, {.p = (void *)-1}}, - [212] {"lookup_dcookie", nil, 1, {.p = (void *)-1}}, - [213] {"epoll_create", nil, 1, {.p = (void *)-1}}, - [214] {"epoll_ctl_old", nil, 1, {.p = (void *)-1}}, - [215] {"epoll_wait_old", nil, 1, {.p = (void *)-1}}, - [216] {"remap_file_pages", nil, 1, {.p = (void *)-1}}, - [217] {"getdents64", nil, 1, {.p = (void *)-1}}, - [218] {"set_tid_address", nil, 1, {.p = (void *)-1}}, - [219] {"restart_syscall", nil, 1, {.p = (void *)-1}}, - [220] {"semtimedop", nil, 1, {.p = (void *)-1}}, - [221] {"fadvise64", nil, 1, {.p = (void *)-1}}, - [222] {"timer_create", nil, 1, {.p = (void *)-1}}, - [223] {"timer_settime", nil, 1, {.p = (void *)-1}}, - [224] {"timer_gettime", nil, 1, {.p = (void *)-1}}, - [225] {"timer_getoverrun", nil, 1, {.p = (void *)-1}}, - [226] {"timer_delete", nil, 1, {.p = (void *)-1}}, - [227] {"clock_settime", nil, 1, {.p = (void *)-1}}, - [228] {"clock_gettime", nil, 1, {.p = (void *)-1}}, - [229] {"clock_getres", nil, 1, {.p = (void *)-1}}, - [230] {"clock_nanosleep", nil, 1, {.p = (void *)-1}}, - [231] {"exit_group", nil, 1, {.p = (void *)-1}}, - [232] {"epoll_wait", nil, 1, {.p = (void *)-1}}, - [233] {"epoll_ctl", nil, 1, {.p = (void *)-1}}, - [234] {"tgkill", nil, 1, {.p = (void *)-1}}, - [235] {"utimes", nil, 1, {.p = (void *)-1}}, - [236] {"vserver", nil, 1, {.p = (void *)-1}}, - [237] {"mbind", nil, 1, {.p = (void *)-1}}, - [238] {"set_mempolicy", nil, 1, {.p = (void *)-1}}, - [239] {"get_mempolicy", nil, 1, {.p = (void *)-1}}, - [240] {"mq_open", nil, 1, {.p = (void *)-1}}, - [241] {"mq_unlink", nil, 1, {.p = (void *)-1}}, - [242] {"mq_timedsend", nil, 1, {.p = (void *)-1}}, - [243] {"mq_timedreceive", nil, 1, {.p = (void *)-1}}, - [244] {"mq_notify", nil, 1, {.p = (void *)-1}}, - [245] {"mq_getsetattr", nil, 1, {.p = (void *)-1}}, - [246] {"kexec_load", nil, 1, {.p = (void *)-1}}, - [247] {"waitid", nil, 1, {.p = (void *)-1}}, - [248] {"add_key", nil, 1, {.p = (void *)-1}}, - [249] {"request_key", nil, 1, {.p = (void *)-1}}, - [250] {"keyctl", nil, 1, {.p = (void *)-1}}, - [251] {"ioprio_set", nil, 1, {.p = (void *)-1}}, - [252] {"ioprio_get", nil, 1, {.p = (void *)-1}}, - [253] {"inotify_init", nil, 1, {.p = (void *)-1}}, - [254] {"inotify_add_watch", nil, 1, {.p = (void *)-1}}, - [255] {"inotify_rm_watch", nil, 1, {.p = (void *)-1}}, - [256] {"migrate_pages", nil, 1, {.p = (void *)-1}}, - [257] {"openat", nil, 1, {.p = (void *)-1}}, - [258] {"mkdirat", nil, 1, {.p = (void *)-1}}, - [259] {"mknodat", nil, 1, {.p = (void *)-1}}, - [260] {"fchownat", nil, 1, {.p = (void *)-1}}, - [261] {"futimesat", nil, 1, {.p = (void *)-1}}, - [262] {"newfstatat", nil, 1, {.p = (void *)-1}}, - [263] {"unlinkat", nil, 1, {.p = (void *)-1}}, - [264] {"renameat", nil, 1, {.p = (void *)-1}}, - [265] {"linkat", nil, 1, {.p = (void *)-1}}, - [266] {"symlinkat", nil, 1, {.p = (void *)-1}}, - [267] {"readlinkat", nil, 1, {.p = (void *)-1}}, - [268] {"fchmodat", nil, 1, {.p = (void *)-1}}, - [269] {"faccessat", nil, 1, {.p = (void *)-1}}, - [270] {"pselect6", nil, 1, {.p = (void *)-1}}, - [271] {"ppoll", nil, 1, {.p = (void *)-1}}, - [272] {"unshare", nil, 1, {.p = (void *)-1}}, - [273] {"set_robust_list", nil, 1, {.p = (void *)-1}}, - [274] {"get_robust_list", nil, 1, {.p = (void *)-1}}, - [275] {"splice", nil, 1, {.p = (void *)-1}}, - [276] {"tee", nil, 1, {.p = (void *)-1}}, - [277] {"sync_file_range", nil, 1, {.p = (void *)-1}}, - [278] {"vmsplice", nil, 1, {.p = (void *)-1}}, - [279] {"move_pages", nil, 1, {.p = (void *)-1}}, - [280] {"utimensat", nil, 1, {.p = (void *)-1}}, - [281] {"epoll_pwait", nil, 1, {.p = (void *)-1}}, - [282] {"signalfd", nil, 1, {.p = (void *)-1}}, - [283] {"timerfd_create", nil, 1, {.p = (void *)-1}}, - [284] {"eventfd", nil, 1, {.p = (void *)-1}}, - [285] {"fallocate", nil, 1, {.p = (void *)-1}}, - [286] {"timerfd_settime", nil, 1, {.p = (void *)-1}}, - [287] {"timerfd_gettime", nil, 1, {.p = (void *)-1}}, - [288] {"accept4", nil, 1, {.p = (void *)-1}}, - [289] {"signalfd4", nil, 1, {.p = (void *)-1}}, - [290] {"eventfd2", nil, 1, {.p = (void *)-1}}, - [291] {"epoll_create1", nil, 1, {.p = (void *)-1}}, - [292] {"dup3", nil, 1, {.p = (void *)-1}}, - [293] {"pipe2", nil, 1, {.p = (void *)-1}}, - [294] {"inotify_init1", nil, 1, {.p = (void *)-1}}, - [295] {"preadv", nil, 1, {.p = (void *)-1}}, - [296] {"pwritev", nil, 1, {.p = (void *)-1}}, - [297] {"rt_tgsigqueueinfo", nil, 1, {.p = (void *)-1}}, - [298] {"perf_event_open", nil, 1, {.p = (void *)-1}}, - [299] {"recvmmsg", nil, 1, {.p = (void *)-1}}, - -}; - -int nlinuxsyscall = nelem(linuxsystab); \ No newline at end of file diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/main.c --- a/sys/src/nix/k10/main.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/main.c Sat Dec 03 10:15:30 2011 +0000 @@ -7,7 +7,10 @@ #include "init.h" #include "io.h" - +enum +{ + InitialTCs = 32 /* default # of TCs */ +}; Conf conf; /* XXX - must go - gag */ @@ -76,13 +79,20 @@ } } +extern void setmachsched(Mach*); + void squidboy(int apicno) { + char *n[] = { + [NIXAC] "AC", + [NIXTC] "TC", + [NIXKC] "KC" + }; vlong hz; sys->machptr[m->machno] = m; - + setmachsched(m); /* * Need something for initial delays * until a timebase is worked out. @@ -110,7 +120,6 @@ mmuinit(); if(!apiconline()) ndnr(); - fpuinit(); acmodeset(m->nixtype); @@ -123,14 +132,17 @@ DBG("Wait for the thunderbirds!\n"); while(!active.thunderbirdsarego) ; + wrmsr(0x10, sys->epoch); + m->rdtsc = rdtsc(); - DBG("mach %d is go\n", m->machno); + print("cpu%d color %d role %s tsc %lld\n", + m->machno, corecolor(m->machno), n[m->nixtype], m->rdtsc); switch(m->nixtype){ case NIXAC: acmmuswitch(); acinit(); adec(&active.nbooting); -// ainc(&active.nonline); + ainc(&active.nonline); /* this was commented out */ acsched(); panic("squidboy"); break; @@ -149,9 +161,9 @@ timersinit(); adec(&active.nbooting); - ainc(&active.nonline); + ainc(&active.nonline); /* this was commented out */ - ndnr(); schedinit(); + schedinit(); break; } panic("squidboy returns (type %d)", m->nixtype); @@ -163,26 +175,19 @@ int i; Mach *mp; extern void testicc(int); - char *n[] = { - [NIXAC] "AC", - [NIXTC] "TC", - [NIXKC] "KC" - }; /* setup arguments for all */ for(i = 1; i < MACHMAX; i++) - if((mp = sys->machptr[i]) != nil && mp->online != 0){ - print("cpu%d machno %d role %s\n", - i, mp->machno, n[mp->nixtype]); - if(mp->nixtype == NIXAC) - testicc(i); - } + if((mp = sys->machptr[i]) != nil && mp->online != 0) + if(mp->nixtype == NIXAC) + testicc(i); print("bootcore: all cores done\n"); } /* * Rendezvous with other cores. Set roles for those that came * up online, and wait until they are initialized. + * Sync TSC with them. * We assume other processors that could boot had time to * set online to 1 by now. */ @@ -201,10 +206,16 @@ */ mp->icc = mallocalign(sizeof *m->icc, ICCLNSZ, 0, 0); mp->icc->fn = nil; - if(i < 4) + if(i < InitialTCs){ + conf.nmach++; mp->nixtype = NIXTC; + } ainc(&active.nbooting); } + sys->epoch = rdtsc(); + mfence(); + wrmsr(0x10, sys->epoch); + m->rdtsc = rdtsc(); active.thunderbirdsarego = 1; start = fastticks2us(fastticks(nil)); do{ @@ -278,7 +289,7 @@ conf.nmach = 1; fmtinit(); - print("\nNIX with 2M pages\n"); + print("\nNIX\n"); if(vflag){ print("&ax = %#p, ax = %#ux, bx = %#ux\n", &ax, ax, bx); multiboot(ax, bx, vflag); @@ -314,6 +325,7 @@ * things like that completely broken). */ acpiinit(); + umeminit(); trapinit(); printinit(); @@ -326,6 +338,7 @@ */ i8259init(32); + procinit0(); mpsinit(maxcores); apiconline(); sipi(); @@ -462,7 +475,7 @@ s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BIGPGSZ); p->seg[SSEG] = s; - pg = newpage(1, 0, USTKTOP-BIGPGSZ, BIGPGSZ); + pg = newpage(1, 0, USTKTOP-BIGPGSZ, BIGPGSZ, -1); segpage(s, pg); k = kmap(pg); bootargs(VA(k)); @@ -474,7 +487,7 @@ s = newseg(SG_TEXT, UTZERO, 1); s->flushme++; p->seg[TSEG] = s; - pg = newpage(1, 0, UTZERO, BIGPGSZ); + pg = newpage(1, 0, UTZERO, BIGPGSZ, -1); memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl)); segpage(s, pg); k = kmap(s->map[0]->pages[0]); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/mem.h --- a/sys/src/nix/k10/mem.h Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/mem.h Sat Dec 03 10:15:30 2011 +0000 @@ -86,7 +86,6 @@ - /* * Hierarchical Page Tables. * For example, traditional IA-32 paging structures have 2 levels, @@ -97,3 +96,4 @@ */ #define PTLX(v, l) (((v)>>(((l)*PTSHFT)+PGSHFT)) & ((1<1 processor; * replace vmap with newer version (no PDMAP); - * mmuinit machno != fix; * mmuptcopy (PteSHARED trick?); * calculate and map up to TMFM (conf crap); */ @@ -113,11 +112,12 @@ for(i = 3; i > 0; i--){ print("mmuptp[%d]:\n", i); for(pg = p->mmuptp[i]; pg != nil; pg = pg->next) - print("\tva %#ullx ppn %#ullx d %#ulx\n", - pg->va, pg->pa, pg->daddr); + print("\tpg %#p = va %#ullx pa %#ullx" + " daddr %#ulx next %#p prev %#p\n", + pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev); } print("pml4 %#ullx\n", m->pml4->pa); - dumpptepg(4, m->pml4->pa); + if(0)dumpptepg(4, m->pml4->pa); } void @@ -162,6 +162,8 @@ page->prev = page->next = nil; memset(UINT2PTR(page->va), 0, PTSZ); + if(page->pa == 0) + panic("mmuptpalloc: free page with pa == 0"); return page; } unlock(&mmuptpfreelist); @@ -182,6 +184,8 @@ page->pa = PADDR(va); page->ref = 1; + if(page->pa == 0) + panic("mmuptpalloc: no pa"); return page; } @@ -213,6 +217,7 @@ pte[page->daddr] = PPN(page->pa)|PteU|PteRW|PteP; if(page->daddr >= m->pml4->daddr) m->pml4->daddr = page->daddr+1; + page->prev = m->pml4; } tssrsp0(STACKALIGN(PTR2UINT(proc->kstack+KSTACK))); @@ -252,23 +257,90 @@ int l; PTE *pte, *pml4; u64int addr; + char buf[240], *s; addr = PTR2UINT(a); pml4 = UINT2PTR(m->pml4->va); pte = 0; + s = buf; + *s = 0; if((l = mmuwalk(pml4, addr, 3, &pte, nil)) < 0 || (*pte&PteP) == 0) goto Panic; - else if((l = mmuwalk(pml4, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0) + s = seprint(buf, buf+sizeof buf, + "check3: l%d pte %#p = %llux\n", + l, pte, pte?*pte:~0); + if((l = mmuwalk(pml4, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0) goto Panic; - else if(*pte&PtePS) + s = seprint(s, buf+sizeof buf, + "check2: l%d pte %#p = %llux\n", + l, pte, pte?*pte:~0); + if(*pte&PtePS) return; - else if((l = mmuwalk(pml4, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0) + if((l = mmuwalk(pml4, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0) goto Panic; + seprint(s, buf+sizeof buf, + "check1: l%d pte %#p = %llux\n", + l, pte, pte?*pte:~0); return; Panic: - panic("cpu%d: checkpte l%d ppn %#ullx kadr %#ullx pte %#p = %llux\n", - m->machno, l, ppn, KADDR(ppn), pte, *pte); + + seprint(s, buf+sizeof buf, + "checkpte: l%d addr %#p ppn %#ullx kaddr %#p pte %#p = %llux", + l, a, ppn, KADDR(ppn), pte, pte?*pte:~0); + print("%s\n", buf); + seprint(buf, buf+sizeof buf, "start %#ullx unused %#ullx" + " unmap %#ullx end %#ullx\n", + sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend); + panic("%s", buf); +} + +static void +mmuptpcheck(Proc *proc) +{ + int lvl, npgs, i; + Page *lp, *p, *pgs[16], *fp; + uint idx[16]; + + if(proc == nil) + return; + lp = m->pml4; + for(lvl = 3; lvl >= 2; lvl--){ + npgs = 0; + for(p = proc->mmuptp[lvl]; p != nil; p = p->next){ + for(fp = proc->mmuptp[0]; fp != nil; fp = fp->next) + if(fp == p){ + dumpmmu(proc); + panic("ptpcheck: using free page"); + } + for(i = 0; i < npgs; i++){ + if(pgs[i] == p){ + dumpmmu(proc); + panic("ptpcheck: dup page"); + } + if(idx[i] == p->daddr){ + dumpmmu(proc); + panic("ptcheck: dup daddr"); + } + } + if(npgs >= nelem(pgs)) + panic("ptpcheck: pgs is too small"); + idx[npgs] = p->daddr; + pgs[npgs++] = p; + if(lvl == 3 && p->prev != lp){ + dumpmmu(proc); + panic("ptpcheck: wrong prev"); + } + } + + } + npgs = 0; + for(fp = proc->mmuptp[0]; fp != nil; fp = fp->next){ + for(i = 0; i < npgs; i++) + if(pgs[i] == fp) + panic("ptpcheck: dup free page"); + pgs[npgs++] = fp; + } } /* @@ -284,12 +356,19 @@ PTE *pte; Page *page, *prev; Mpl pl; - uintmem pa; + uintmem pa, ppn; + char buf[80]; -uintmem ppn; + ppn = 0; + pa = pg->pa; + if(pa == 0) + panic("mmuput: zero pa"); - pa = pg->pa; - DBG("up %#p mmuput %#p %#Px %#ux\n", up, va, pa, attr); + if(DBGFLG){ + snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#ux\n", + m->machno, up, va, pa, attr); + print("%s", buf); + } assert(pg->pgszi >= 0); pgsz = m->pgsz[pg->pgszi]; if(pa & (pgsz-1)) @@ -297,12 +376,17 @@ if(attr & ~(PTEVALID|PTEWRITE|PTERONLY|PTEUSER|PTEUNCACHED)) panic("mmuput: wrong attr bits: %#ux\n", attr); pa |= attr; + pl = splhi(); + if(DBGFLG) + mmuptpcheck(up); user = (va < KZERO); x = PTLX(va, 3); + pte = UINT2PTR(m->pml4->va); pte += x; prev = m->pml4; + for(lvl = 3; lvl >= 0; lvl--){ if(user){ if(pgsz == 2*MiB && lvl == 1) /* use 2M */ @@ -310,10 +394,15 @@ if(pgsz == 1ull*GiB && lvl == 2) /* use 1G */ break; } - for(page = up->mmuptp[lvl]; page != nil; page = page->next){ - if(page->prev == prev && page->daddr == x) + for(page = up->mmuptp[lvl]; page != nil; page = page->next) + if(page->prev == prev && page->daddr == x){ + if(*pte == 0){ + print("mmu: jmk and nemo had fun\n"); + *pte = PPN(page->pa)|PteU|PteRW|PteP; + } break; - } + } + if(page == nil){ if(up->mmuptp[0] == nil) page = mmuptpalloc(); @@ -330,16 +419,20 @@ m->pml4->daddr = x+1; } x = PTLX(va, lvl-1); -ppn = PPN(*pte); + + ppn = PPN(*pte); + if(ppn == 0) + panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte); pte = UINT2PTR(KADDR(ppn)); pte += x; -ppn += x; prev = page; } -checkpte(ppn, pte); + if(DBGFLG) + checkpte(ppn, pte); *pte = pa|PteU; + if(user) switch(pgsz){ case 2*MiB: @@ -350,7 +443,12 @@ panic("mmuput: user pages must be 2M or 1G"); } splx(pl); - DBG("up %#p new pte %#p = %#llux\n", up, pte, *pte); + + if(DBGFLG){ + snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llux\n", + m->machno, up, pte, pte?*pte:~0); + print("%s", buf); + } invlpg(va); /* only if old entry valid? */ } @@ -666,6 +764,7 @@ */ p = UINT2PTR(m->stack); p += MACHSTKSZ; + memmove(p, UINT2PTR(mach0pml4.va), PTSZ); m->pml4 = &m->pml4kludge; m->pml4->va = PTR2UINT(p); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/physalloc.c --- a/sys/src/nix/k10/physalloc.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/physalloc.c Sat Dec 03 10:15:30 2011 +0000 @@ -189,16 +189,14 @@ xphystag(Bal *b, uintmem data) { uint i; - Buddy *l, *p; - Buddy *blocks, *avail; + Buddy *blocks; DBG("phystag\n"); blocks = b->blocks; - avail = b->avail; if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/) - return; + return nil; i = INDEX(b,data); return blocks[BLOCK(b,i)].p; } @@ -338,6 +336,7 @@ m = b->memory + b->bminsz*BLOCK(b,i); assert(m >= b->base && m < b->base + b->size); blocks[BLOCK(b,i)].p = tag; + return m; } @@ -483,7 +482,8 @@ * This code assumes that a domain may be extended later and * that there is no interleaving of domains. Ok by now. */ - DBG("physmem block dom %d addr %#ullx size %#ullx\n", dom, addr, len); + DBG("physmem block dom %d addr %#ullx size %#ullx\n", + dom, addr, len); if(dom < 0 || dom >= Ndoms){ print("physinit: invalid dom %d\n", dom); dom = 0; @@ -523,6 +523,7 @@ panic("physinit: doms overlap"); } assert(addr >= b->base && addr+len <= b->base + b->size); + iimbchunk(b, addr, addr+len, 0); } diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/syscall.c --- a/sys/src/nix/k10/syscall.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/syscall.c Sat Dec 03 10:15:30 2011 +0000 @@ -219,18 +219,11 @@ } } -int -linuxproc(void) -{ - return (up->linux != 0); -} - /* it should be unsigned. FIXME */ void -syscall( int badscallnr, Ureg* ureg) +syscall(int badscallnr, Ureg* ureg) { unsigned int scallnr = (unsigned int) badscallnr; - void linuxsyscall(int, Ureg*); char *e; uintptr sp; int s; @@ -238,18 +231,6 @@ Ar0 ar0; static Ar0 zar0; - /* make this merge with linuxsyscall much tighter. - * lots of overlap here. - */ - if(linuxproc()){ - linuxsyscall(scallnr, ureg); - /* there are just enough differences to make it - * hard to merge the two paths. If you start to do it, - * make sure you know what you are doing! - */ - return; - } - if(!userureg(ureg)) panic("syscall: cs %#llux\n", ureg->cs); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/k10/trap.c --- a/sys/src/nix/k10/trap.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/k10/trap.c Sat Dec 03 10:15:30 2011 +0000 @@ -18,6 +18,7 @@ static void faultamd64(Ureg*, void*); static void doublefault(Ureg*, void*); static void unexpected(Ureg*, void*); +static void expected(Ureg*, void*); static void dumpstackwithureg(Ureg*); static Lock vctllock; @@ -185,6 +186,7 @@ trapenable(VectorBPT, debugbpt, 0, "#BP"); trapenable(VectorPF, faultamd64, 0, "#PF"); trapenable(Vector2F, doublefault, 0, "#DF"); + trapenable(IdtIPI, expected, 0, "#IPI"); trapenable(Vector15, unexpected, 0, "#15"); nmienable(); @@ -565,6 +567,11 @@ } static void +expected(Ureg*, void*) +{ +} + +static void faultamd64(Ureg* ureg, void*) { u64int addr; @@ -602,11 +609,8 @@ * (up->nerrlab != 0) if this is a system call, if not then * the game's a bogey. */ - if(!user && (!insyscall || up->nerrlab == 0)){ - dumpregs(ureg); - dumpmmuwalk(m->cr2); + if(!user && (!insyscall || up->nerrlab == 0)) panic("fault: %#llux\n", addr); - } sprint(buf, "sys: trap: fault %s addr=%#llux", read? "read": "write", addr); postnote(up, 1, buf, NDebug); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/devcons.c --- a/sys/src/nix/port/devcons.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/devcons.c Sat Dec 03 10:15:30 2011 +0000 @@ -667,6 +667,7 @@ Qtime, Quser, Qzero, + Qdebug, }; enum @@ -698,6 +699,7 @@ "time", {Qtime}, NUMSIZE+3*VLNUMSIZE, 0664, "user", {Quser}, 0, 0666, "zero", {Qzero}, 0, 0444, + "debug", {Qdebug}, 0, 0666, }; int @@ -822,6 +824,8 @@ char tmp[512]; /* Qswap is 381 bytes at clu */ int i, k, id, send; long offset; + extern int schedsteals, scheddonates; + if(n <= 0) return n; @@ -940,7 +944,7 @@ return 0; case Qsysstat: - b = smalloc(MACHMAX*(NUMSIZE*10+2+1) + 1); /* +1 for NUL */ + b = smalloc(MACHMAX*(NUMSIZE*11+2+1) + 1); /* +1 for NUL */ bp = b; for(id = 0; id < MACHMAX; id++) { mp = sys->machptr[id]; @@ -969,6 +973,8 @@ (mp->perf.avg_inintr*100)/mp->perf.period, NUMSIZE); bp += NUMSIZE; + readnum(0, bp, NUMSIZE, (mp->sch - run), NUMSIZE); + bp += NUMSIZE; switch(mp->nixtype){ case NIXAC: strcpy(bp, "AC"); @@ -1028,6 +1034,16 @@ n = readstr(offset, buf, n, tmp); return n; + case Qdebug: + s = seprint(tmp, tmp + sizeof tmp, "steal %d\n", schedsteals); + s = seprint(s, tmp + sizeof tmp, "donate %d\n", scheddonates); + s = seprint(s, tmp + sizeof tmp, "locks %uld\n", lockstats.locks); + s = seprint(s, tmp + sizeof tmp, "glare %uld\n", lockstats.glare); + s = seprint(s, tmp + sizeof tmp, "inglare %uld\n", lockstats.inglare); + s = seprint(s, tmp + sizeof tmp, "qlock %uld\n", qlockstats.qlock); + seprint(s, tmp + sizeof tmp, "qlockq %uld\n", qlockstats.qlockq); + return readstr(offset, buf, n, tmp); + break; default: print("consread %#llux\n", c->qid.path); error(Egreg); @@ -1046,7 +1062,7 @@ ulong offset; Cmdbuf *cb; Cmdtab *ct; - + extern int schedsteals, scheddonates; a = va; offset = off; @@ -1177,6 +1193,24 @@ kstrdup(&sysname, buf); break; + case Qdebug: + if(n >= sizeof(buf)) + n = sizeof(buf)-1; + strncpy(buf, a, n); + buf[n] = 0; + if(n > 0 && buf[n-1] == '\n') + buf[n-1] = 0; + if(strcmp(buf, "steal") == 0) + schedsteals = 1; + else if(strcmp(buf, "nosteal") == 0) + schedsteals = 0; + else if(strcmp(buf, "donate") == 0) + scheddonates = 1; + else if(strcmp(buf, "nodonate") == 0) + scheddonates = 0; + else + error(Ebadctl); + break; default: print("conswrite: %#llux\n", c->qid.path); error(Egreg); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/devkprof.c --- a/sys/src/nix/port/devkprof.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/devkprof.c Sat Dec 03 10:15:30 2011 +0000 @@ -16,6 +16,7 @@ int nbuf; int time; ulong *buf; + Lock; }kprof; enum{ @@ -29,35 +30,6 @@ "kpctl", {Kprofctlqid}, 0, 0600, }; -static void -_kproftimer(uintptr pc) -{ - if(kprof.time == 0) - return; - /* - * if the pc is coming out of spllo or splx, - * use the pc saved when we went splhi. - */ - if(pc>=PTR2UINT(spllo) && pc<=PTR2UINT(spldone)) - pc = m->splpc; - - kprof.buf[0] += TK2MS(1); - if(kprof.minpc<=pc && pc>= LRES; - kprof.buf[pc] += TK2MS(1); - }else - kprof.buf[1] += TK2MS(1); -} - -static void -kprofinit(void) -{ - if(SZ != sizeof kprof.buf[0]) - panic("kprof size"); - kproftimer = _kproftimer; -} - static Chan* kprofattach(char *spec) { @@ -77,6 +49,43 @@ return devattach('K', spec); } +static void +_kproftimer(uintptr pc) +{ + if(kprof.time == 0) + return; + + /* + * if the pc corresponds to the idle loop, don't consider it. + */ + if(m->inidle) + return; + /* + * if the pc is coming out of spllo or splx, + * use the pc saved when we went splhi. + */ + if(pc>=PTR2UINT(spllo) && pc<=PTR2UINT(spldone)) + pc = m->splpc; + +// ilock(&kprof); + kprof.buf[0] += TK2MS(1); + if(kprof.minpc<=pc && pc>= LRES; + kprof.buf[pc] += TK2MS(1); + }else + kprof.buf[1] += TK2MS(1); +// iunlock(&kprof); +} + +static void +kprofinit(void) +{ + if(SZ != sizeof kprof.buf[0]) + panic("kprof size"); + kproftimer = _kproftimer; +} + static Walkqid* kprofwalk(Chan *c, Chan *nc, char **name, int nname) { diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/devpipe.c --- a/sys/src/nix/port/devpipe.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/devpipe.c Sat Dec 03 10:15:30 2011 +0000 @@ -41,15 +41,16 @@ #define PIPEID(x) ((((unsigned)x))>>5) #define PIPEQID(i, t) ((((unsigned)i)<<5)|(t)) + +enum +{ + /* Plan 9 default for conf.nmach > 1 */ + Pipeqsize = 256*1024 +}; + static void pipeinit(void) { - if(conf.pipeqsize == 0){ - if(conf.nmach > 1) - conf.pipeqsize = 256*1024; - else - conf.pipeqsize = 32*1024; - } } /* @@ -67,12 +68,12 @@ exhausted("memory"); p->ref = 1; - p->q[0] = qopen(conf.pipeqsize, 0, 0, 0); + p->q[0] = qopen(Pipeqsize, 0, 0, 0); if(p->q[0] == 0){ free(p); exhausted("memory"); } - p->q[1] = qopen(conf.pipeqsize, 0, 0, 0); + p->q[1] = qopen(Pipeqsize, 0, 0, 0); if(p->q[1] == 0){ free(p->q[0]); free(p); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/devproc.c --- a/sys/src/nix/port/devproc.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/devproc.c Sat Dec 03 10:15:30 2011 +0000 @@ -163,7 +163,9 @@ static Lock tlock; static int topens; static int tproduced, tconsumed; -void (*proctrace)(Proc*, int, vlong); +static void notrace(Proc*, int, vlong); + +void (*proctrace)(Proc*, int, vlong) = notrace; static void profclock(Ureg *ur, Timer *) @@ -274,6 +276,11 @@ } static void +notrace(Proc*, Tevent, vlong) +{ +} + +static void _proctrace(Proc* p, Tevent etype, vlong ts) { Traceevent *te; @@ -289,6 +296,7 @@ te->time = todget(nil); else te->time = ts; + te->core = m->machno; tproduced++; } @@ -629,7 +637,7 @@ if(topens > 0) topens--; if(topens == 0) - proctrace = nil; + proctrace = notrace; unlock(&tlock); } if(QID(c->qid) == Qns && c->aux != 0) @@ -702,6 +710,7 @@ int i, j, navail, ne, pid, rsize; char flag[10], *sps, *srv, statbuf[NSEG*64]; uintptr offset; + int tesz; if(c->qid.type & QTDIR) return devdirread(c, va, n, 0, 0, procgen); @@ -713,20 +722,21 @@ return 0; rptr = va; + tesz = BIT32SZ + BIT32SZ + BIT64SZ + BIT32SZ; navail = tproduced - tconsumed; - if(navail > n / sizeof(Traceevent)) - navail = n / sizeof(Traceevent); + if(navail > n / tesz) + navail = n / tesz; while(navail > 0) { - if((tconsumed & Emask) + navail > Nevents) - ne = Nevents - (tconsumed & Emask); - else - ne = navail; - i = ne * sizeof(Traceevent); - memmove(rptr, &tevents[tconsumed & Emask], i); - - tconsumed += ne; - rptr += i; - navail -= ne; + PBIT32(rptr, tevents[tconsumed & Emask].pid); + rptr += BIT32SZ; + PBIT32(rptr, tevents[tconsumed & Emask].etype); + rptr += BIT32SZ; + PBIT64(rptr, tevents[tconsumed & Emask].time); + rptr += BIT64SZ; + PBIT32(rptr, tevents[tconsumed & Emask].core); + rptr += BIT32SZ; + tconsumed++; + navail--; } return rptr - (uchar*)va; } @@ -1383,7 +1393,6 @@ Cmdtab *ct; vlong time; char *e; - void (*pt)(Proc*, int, vlong); if(p->kp) /* no ctl requests to kprocs */ error(Eperm); @@ -1480,7 +1489,9 @@ procstopwait(p, 0); break; case CMwired: - procwired(p, atoi(cb->f[1])); + core = atoi(cb->f[1]); + procwired(p, core); + sched(); break; case CMtrace: switch(cb->nf){ @@ -1545,9 +1556,8 @@ edfstop(p); break; case CMevent: - pt = proctrace; - if(up->trace && pt) - pt(up, SUser, 0); + if(up->trace) + proctrace(up, SUser, 0); break; case CMcore: core = atoi(cb->f[1]); @@ -1612,7 +1622,7 @@ s->steal--; nexterror(); } - if(fixfault(s, offset, read, 0) == 0) + if(fixfault(s, offset, read, 0, s->color) == 0) break; poperror(); s->steal--; diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/devtube.c --- a/sys/src/nix/port/devtube.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/devtube.c Sat Dec 03 10:15:30 2011 +0000 @@ -44,7 +44,7 @@ { Tio *ft; - if(dbgflg[_DBGC_] == 0) + if(DBGFLG == 0) return; qlock(&ftiolck); for(ft = ftio; ft != nil; ft = ft->next) diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/devws.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/nix/port/devws.c Sat Dec 03 10:15:30 2011 +0000 @@ -0,0 +1,173 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + + +/* + * reported times can be translated to a more readable format by + * using something like: + * awk '{printf("print(\"%s: %s times; %s us worst; %s ws total\");\nsrc(%s)\n", + * $1, $3, $4, $5, $2); }' | acid ../k10/9k8cpu + * on the wsdata file, after doing a sort +2nr on it. + */ + +enum{ + WSdirqid, + WSdataqid, + WSctlqid, +}; + +Dirtab Wstab[]={ + ".", {WSdirqid, 0, QTDIR},0, DMDIR|0550, + "wsdata", {WSdataqid}, 0, 0600, + "wsctl", {WSctlqid}, 0, 0600, +}; + + +/* + * waitstats functions are in taslock.c, because they use Locks but + * callers in taslock.c must not call them to avoid + * a loop. + * This is only the user interface. + */ + +static char* +collect(void) +{ + extern Lock waitstatslk; + char *buf, *s; + int i, n; + static char *wname[] = { + [WSlock] "lock", + [WSqlock] "qlock", + [WSslock] "slock", + }; + + n = waitstats.npcs * (strlen("slock") + 1 + 19 * 3 + 1) + 1; + buf = smalloc(n); + s = buf; + lock(&waitstatslk); + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] != 0) + s = seprint(s, buf+n, "%s %#llux %d %#llud %#llud\n", + wname[waitstats.type[i]], + waitstats.pcs[i], waitstats.ns[i], waitstats.wait[i], + waitstats.total[i]); + unlock(&waitstatslk); + if(s == buf + n) + print("collect: fix devws.c, buffer was too short"); + return buf; +} + +static Chan* +wsattach(char *spec) +{ + return devattach('W', spec); +} + +static Walkqid* +wswalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, Wstab, nelem(Wstab), devgen); +} + +static long +wsstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, Wstab, nelem(Wstab), devgen); +} + +static Chan* +wsopen(Chan *c, int omode) +{ + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Eperm); + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + c->aux = nil; + if(c->qid.path == WSdataqid) + c->aux = collect(); + return c; +} + +static void +wsclose(Chan *c) +{ + free(c->aux); +} + +static long +wsread(Chan *c, void *va, long n, vlong off) +{ + + switch((int)c->qid.path){ + case WSdirqid: + n = devdirread(c, va, n, Wstab, nelem(Wstab), devgen); + break; + case WSdataqid: + n = readstr(off, va, n, c->aux); + break; + default: + n = 0; + } + return n; +} + +static long +wswrite(Chan *c, void *a, long n, vlong) +{ + char *buf; + + switch((int)(c->qid.path)){ + case WSctlqid: + buf = smalloc(n + 1); + memmove(buf, a, n); + buf[n] = 0; + if(n > 0 && buf[n-1] == '\n') + buf[n-1] = 0; + if(strcmp(buf, "clear") == 0){ + lockstats.locks = lockstats.glare = lockstats.inglare = 0; + qlockstats.qlock = qlockstats.qlockq = 0; + clearwaitstats(); + }else if(strcmp(buf, "start") == 0) + startwaitstats(1); + else if(strcmp(buf, "stop") == 0) + startwaitstats(0); + else{ + free(buf); + error(Ebadctl); + } + free(buf); + break; + default: + error(Ebadusefd); + } + return n; +} + +Dev wsdevtab = { + 'W', + "waitstats", + + devreset, + devinit, + devshutdown, + wsattach, + wswalk, + wsstat, + wsopen, + devcreate, + wsclose, + wsread, + devbread, + wswrite, + devbwrite, + devremove, + devwstat, +}; diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/edf.c --- a/sys/src/nix/port/edf.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/edf.c Sat Dec 03 10:15:30 2011 +0000 @@ -17,10 +17,6 @@ #define DPRINT if(Dontprint){}else print static long now; /* Low order 32 bits of time in µs */ -extern ulong delayedscheds; -extern Schedq runq[Nrq]; -extern int nrdy; -extern ulong runvec; /* Statistics stuff */ ulong nilcount; @@ -137,7 +133,6 @@ /* Proc reached deadline */ extern int panicking; Proc *p; - void (*pt)(Proc*, int, vlong); if(panicking || active.exiting) return; @@ -153,10 +148,11 @@ * returns to user space */ if(p == up){ - if(up->trace && (pt = proctrace)) - pt(up, SInts, 0); + if(up->trace) + proctrace(up, SInts, 0); up->delaysched++; - delayedscheds++; + assert(m->sch); + m->sch->delayedscheds++; } } @@ -165,7 +161,6 @@ { /* Called with edflock held */ Edf *e; - void (*pt)(Proc*, int, vlong); long n; vlong nowns; @@ -196,10 +191,10 @@ e->S = e->C; DPRINT("%lud release %d[%s], r=%lud, d=%lud, t=%lud, S=%lud\n", now, p->pid, statename[p->state], e->r, e->d, e->t, e->S); - if(pt = proctrace){ + if(p->trace){ nowns = todget(nil); - pt(p, SRelease, nowns); - pt(p, SDeadline, nowns + 1000LL*e->D); + proctrace(p, SRelease, nowns); + proctrace(p, SDeadline, nowns + 1000LL*e->D); } }else{ DPRINT("%lud release %d[%s], too late t=%lud, called from %#p\n", @@ -212,6 +207,7 @@ { Proc *p; extern int panicking; + Sched *sch; Schedq *rq; if(panicking || active.exiting) @@ -220,6 +216,7 @@ p = t->ta; if((edflock(p)) == nil) return; + sch = procsched(p); DPRINT("%lud releaseintr %d[%s]\n", now, p->pid, statename[p->state]); switch(p->state){ default: @@ -227,8 +224,8 @@ return; case Ready: /* remove proc from current runq */ - rq = &runq[p->priority]; - if(dequeueproc(rq, p) != p){ + rq = &sch->runq[p->priority]; + if(dequeueproc(sch, rq, p) != p){ DPRINT("releaseintr: can't find proc or lock race\n"); release(p); /* It'll start best effort */ edfunlock(); @@ -245,7 +242,7 @@ ready(p); if(up){ up->delaysched++; - delayedscheds++; + sch->delayedscheds++; } return; case Running: @@ -260,7 +257,7 @@ p->trend = nil; if(up){ up->delaysched++; - delayedscheds++; + sch->delayedscheds++; } return; } @@ -272,7 +269,6 @@ { long used; Edf *e; - void (*pt)(Proc*, int, vlong); if((e = edflock(p)) == nil) return; @@ -283,8 +279,8 @@ e->extraused += used; if(e->S > 0){ if(e->S <= used){ - if(pt = proctrace) - pt(p, SSlice, 0); + if(p->trace) + proctrace(p, SSlice, 0); DPRINT("%lud edfrecord slice used up\n", now); e->d = now; e->S = 0; @@ -299,7 +295,6 @@ edfrun(Proc *p, int edfpri) { Edf *e; - void (*pt)(Proc*, int, vlong); long tns; e = p->edf; @@ -311,7 +306,8 @@ * deschedule forthwith */ p->delaysched++; - delayedscheds++; + assert(m->sch); + m->sch->delayedscheds++; e->s = now; return; } @@ -325,8 +321,8 @@ }else{ DPRINT("v"); } - if(p->trace && (pt = proctrace)) - pt(p, SInte, todget(nil) + e->tns); + if(p->trace) + proctrace(p, SInte, todget(nil) + e->tns); e->tmode = Trelative; e->tf = deadlineintr; e->ta = p; @@ -344,7 +340,6 @@ Edf *e; int i; Proc *r; - void (*pt)(Proc*, int, vlong); long tns; e = p->edf; @@ -372,8 +367,8 @@ edflock(p); - if(p->trace && (pt = proctrace)) - pt(p, SAdmit, 0); + if(p->trace) + proctrace(p, SAdmit, 0); /* Look for another proc with the same period to synchronize to */ for(i=0; (r = psincref(i)) != nil; i++) { @@ -439,12 +434,11 @@ edfstop(Proc *p) { Edf *e; - void (*pt)(Proc*, int, vlong); if(e = edflock(p)){ DPRINT("%lud edfstop %d[%s]\n", now, p->pid, statename[p->state]); - if(p->trace && (pt = proctrace)) - pt(p, SExpel, 0); + if(p->trace) + proctrace(p, SExpel, 0); e->flags &= ~Admitted; if(e->tt) timerdel(e); @@ -464,13 +458,12 @@ { /* sleep until next release */ Edf *e; - void (*pt)(Proc*, int, vlong); long n; if((e = edflock(up)) == nil) return; - if(up->trace && (pt = proctrace)) - pt(up, SYield, 0); + if(up->trace) + proctrace(up, SYield, 0); if((n = now - e->t) > 0){ if(n < e->T) e->t += e->T; @@ -500,9 +493,9 @@ edfready(Proc *p) { Edf *e; + Sched *sch; Schedq *rq; Proc *l, *pp; - void (*pt)(Proc*, int, vlong); long n; if((e = edflock(p)) == nil) @@ -563,9 +556,10 @@ } edfunlock(); DPRINT("^"); - rq = &runq[PriEdf]; + sch = procsched(p); + rq = &sch->runq[PriEdf]; /* insert in queue in earliest deadline order */ - lock(runq); + lock(sch); l = nil; for(pp = rq->head; pp; pp = pp->rnext){ if(pp->edf->d > e->d) @@ -580,14 +574,14 @@ if(pp == nil) rq->tail = p; rq->n++; - nrdy++; - runvec |= 1 << PriEdf; + sch->nrdy++; + sch->runvec |= 1 << PriEdf; p->priority = PriEdf; p->readytime = m->ticks; p->state = Ready; - unlock(runq); - if(p->trace && (pt = proctrace)) - pt(p, SReady, 0); + unlock(sch); + if(p->trace) + proctrace(p, SReady, 0); return 1; } diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/edf.h --- a/sys/src/nix/port/edf.h Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/edf.h Sat Dec 03 10:15:30 2011 +0000 @@ -52,3 +52,7 @@ /* Interface: */ Edf* edflock(Proc*); void edfunlock(void); + + +/* sched interface, used only by edf */ +Sched* procsched(Proc*); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/fault.c --- a/sys/src/nix/port/fault.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/fault.c Sat Dec 03 10:15:30 2011 +0000 @@ -5,11 +5,19 @@ #include "fns.h" #include "../port/error.h" +/* + * Fault calls fixfault which ends up calling newpage, which + * might fail to allocate a page for the right color. So, we + * might enter a loop and retry forever. + * We first try with the desired color, and then with any + * other one, if we failed for some time. + */ int fault(uintptr addr, int read) { Segment *s; char *sps; + int i, color; if(up->nlocks) print("fault nlocks %d\n", up->nlocks); @@ -18,8 +26,8 @@ spllo(); m->pfault++; - for(;;) { - s = seg(up, addr, 1); /* leaves s->lk qlocked if seg != nil */ + for(i = 0;; i++) { + s = seg(up, addr, 1); /* leaves s->lk qlocked if seg != nil */ if(s == 0) { up->psstate = sps; return -1; @@ -31,13 +39,19 @@ return -1; } - if(fixfault(s, addr, read, 1) == 0) + color = s->color; + if(i > 3) + color = -1; + if(fixfault(s, addr, read, 1, color) == 0) break; /* * See the comment in newpage that describes * how to get here. */ + + if(i > 0 && (i%1000) == 0) + print("fault: tried %d times\n", i); } up->psstate = sps; @@ -62,7 +76,7 @@ int -fixfault(Segment *s, uintptr addr, int read, int dommuput) +fixfault(Segment *s, uintptr addr, int read, int dommuput, int color) { int type; int ref; @@ -74,7 +88,6 @@ Page *(*fn)(Segment*, uintptr); pgsz = m->pgsz[s->pgszi]; - addr &= ~(pgsz-1); soff = addr-s->base; p = &s->map[soff/PTEMAPMEM]; @@ -98,7 +111,7 @@ case SG_TEXT: /* Demand load */ if(pagedout(*pg)) - pio(s, addr, soff, pg); + pio(s, addr, soff, pg, color); mmuattr = PTERONLY|PTEVALID; (*pg)->modref = PG_REF; @@ -108,7 +121,7 @@ case SG_SHARED: /* Zero fill on demand */ case SG_STACK: if(*pg == 0) { - new = newpage(1, &s, addr, pgsz); + new = newpage(1, &s, addr, pgsz, color); if(s == 0) return -1; @@ -119,7 +132,7 @@ case SG_DATA: common: /* Demand load/pagein/copy on write */ if(pagedout(*pg)) - pio(s, addr, soff, pg); + pio(s, addr, soff, pg, color); /* * It's only possible to copy on write if @@ -138,7 +151,7 @@ if(ref > 1) { unlock(lkp); - new = newpage(0, &s, addr, pgsz); + new = newpage(0, &s, addr, pgsz, color); if(s == 0) return -1; *pg = new; @@ -189,7 +202,7 @@ } void -pio(Segment *s, uintptr addr, ulong soff, Page **p) +pio(Segment *s, uintptr addr, ulong soff, Page **p, int color) { Page *new; KMap *k; @@ -222,7 +235,7 @@ qunlock(&s->lk); - new = newpage(0, 0, addr, pgsz); + new = newpage(0, 0, addr, pgsz, color); k = kmap(new); kaddr = (char*)VA(k); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/image.c --- a/sys/src/nix/port/image.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/image.c Sat Dec 03 10:15:30 2011 +0000 @@ -202,7 +202,7 @@ } Image* -attachimage(int type, Chan *c, uintptr base, usize len) +attachimage(int type, Chan *c, int color, uintptr base, usize len) { Image *i, **l; @@ -249,6 +249,7 @@ i->qid = c->qid; i->mqid = c->mqid; i->mchan = c->mchan; + i->color = color; l = &ihash(c->qid.path); i->hash = *l; *l = i; @@ -264,6 +265,7 @@ } i->s = newseg(type, base, len); i->s->image = i; + i->s->color = color; i->ref++; poperror(); } diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/linuxsysemu.c --- a/sys/src/nix/port/linuxsysemu.c Thu Dec 01 13:47:08 2011 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,764 +0,0 @@ -#include "u.h" -#include "../port/lib.h" -#include "mem.h" -#include "dat.h" -#include "fns.h" - -#include "../port/error.h" -#include -#include "ureg.h" - -/* from linux */ - -struct iovec { - void *base; - int len; -}; - - -struct utsname { - char sysname[65]; - char nodename[65]; - char release[65]; - char version[65]; - char machine[65]; - char gnu[65]; -}; - - -struct timeval - { - u32int tv_sec; /* Seconds. */ - u32int tv_usec; /* Microseconds. */ - }; - -struct rusage { - struct timeval ru_utime; /* user time used */ - struct timeval ru_stime; /* system time used */ - long ru_maxrss; /* maximum resident set size */ - long ru_ixrss; /* integral shared memory size */ - long ru_idrss; /* integral unshared data size */ - long ru_isrss; /* integral unshared stack size */ - long ru_minflt; /* page reclaims */ - long ru_majflt; /* page faults */ - long ru_nswap; /* swaps */ - long ru_inblock; /* block input operations */ - long ru_oublock; /* block output operations */ - long ru_msgsnd; /* messages sent */ - long ru_msgrcv; /* messages received */ - long ru_nsignals; /* signals received */ - long ru_nvcsw; /* voluntary context switches */ - long ru_nivcsw; /* involuntary " */ -}; - -struct utsname linuxutsname = { - /* from a real system */ - "Linux", - "nix", - "2.6.35-30-generic", - "#54-Ubuntu SMP Tue Jun 7 18:41:54 UTC 2011", - "x86_64", -}; - -void -linuxexit(Ar0*, va_list list) -{ - int val; - char exitstr[32] = ""; - - val = va_arg(list, int); - if (val) - snprint(exitstr, sizeof(exitstr), "%d", val); - if (up->linux & 128) print("%d:linuxexit %d\n", up->pid, val); - up->linux = 0; - pexit(exitstr, 1); -} - -void -linuxuname(Ar0*ar, va_list list) -{ - void *va; - va = va_arg(list, void *); - if (up->linux & 128) print("%d:linuxuname va %p\n", up->pid, va); - validaddr(va, 1, 1); - memmove(va, &linuxutsname, sizeof(linuxutsname)); - if (up->linux&128) print("Returns %s\n", linuxutsname.release); - ar->i = 0; -} - -/* this was in port/sysseg.c and was copied here. */ -/* There are a few special bits for CNK needs. */ -void -linuxsbrk(Ar0* ar0, va_list list) -{ - uintptr addr; - uintptr ibrk(uintptr addr, int seg); - extern Segment *heapseg; - int i; - - addr = PTR2UINT(va_arg(list, void*)); - - if (! heapseg){ - print("linuxsbrk: no heap set up yet\n"); - error("No heap set up yet"); - } - - if(addr == 0){ - ar0->p = heapseg->top; - return; - } - - if (addr < heapseg->top){ - print("linuxsbrk: can't shrink heap\n"); - error("can't shrink heap"); - } - - /* now this is a hack ... but we're going to assume this thing is not - * only mapped but the TLB is set up for it. - * - heapseg->top = addr; - ar0->p = heapseg->top; - return; - */ - - /* find the index of the heap segment; call ibrk with that segment. */ - /* consider flagging heapseg by base address or p==v, but it's too soon to know - * if that is a universal test and I hate to do a strcmp on each linuxsbrk - */ - for(i = 0; i < NSEG; i++) { - if (heapseg == up->seg[i]) - break; - } - /* isn't life grand? The heap is already mapped. So just grow the end of heap pointer but no need to - * allocate a page. - */ - if (i < NSEG) - ar0->p = ibrk(addr, i); - if (up->linux & 128) print("%d:linuxsbrk for %p returns %p\n", up->pid, addr, ar0->p); -} - -/* special case: interpretation of '0' is done in USER MODE on Plan 9 */ -/* but old deprecated sysbrk_ does what we need */ -void -linuxbrk(Ar0* ar0, va_list list) -{ -// void linuxsbrk(Ar0* ar0, va_list list); - uintptr ibrk(uintptr addr, int seg); - void sysbrk_(Ar0*, va_list); - uintptr va; - //void *arg[1]; - va = va_arg(list, uintptr); - if (up->linux & 128) print("%d:linuxbrk va %#p: ", up->pid, (void *)va); - //arg[0] = va; - //sysbrk_(ar0, (va_list) arg); - va = ibrk(va, BSEG); - /* it is possible, though unlikely, that libc wants exactly the value it asked for. Plan 9 is returning rounded-up-to-next-page values. */ - if (va) - ar0->v = (void *)va; - if (up->linux & 128) print("returns %#p\n", va); - -} - -void -linuxopen(Ar0 *ar0, va_list list) -{ - char *aname; - int omode; - void sysopen(Ar0 *, va_list); - aname = va_arg(list, char*); - omode = va_arg(list, int); - if (up->linux & 128){ - validaddr(aname, 1, 0); - print("%d:linuxopen (%s,%o):", up->pid, aname, omode); - } - sysopen(ar0, list); - if (up->linux & 128) print("=%d\n", ar0->i); -} - -void -linuxclose(Ar0 *ar0, va_list list) -{ - int fd; - void sysclose(Ar0 *, va_list); - - fd = va_arg(list, int); - if (up->linux & 128) - print("%d:linuxclose (%d):", up->pid, fd); - - sysclose(ar0, list); - if (up->linux & 128) print("=%d\n", ar0->i); -} - -void -linuxwritev(Ar0 *ar0, va_list list) -{ - void sys_write(Ar0* ar0, va_list list); - int fd; - struct iovec *iov; - int iovcnt; - int i; - fd = va_arg(list, int); - iov = va_arg(list, struct iovec *); - iovcnt = va_arg(list, int); - if (up->linux & 128) print("%d:linuxwritev (%d, %p, %d):", up->pid, fd, iov, iovcnt); - validaddr(iov, iovcnt * sizeof(*iov), 0); - /* don't validate all the pointers in the iov; sys_write does this */ - for(i = 0; i < iovcnt; i++) { - Ar0 war0; - uintptr arg[3]; - if (up->linux & 128) print("[%p,%d],", iov[i].base, iov[i].len); - arg[0] = fd; - arg[1] = (uintptr) iov[i].base; - arg[2] = iov[i].len; - sys_write(&war0, (va_list) arg); - if (war0.l < 0) - break; - /* if the first one fails, we get 0 */ - ar0->l += war0.l; - } - if (up->linux & 128) print("\n"); -} - - -void -linuxsocketcall(Ar0 *ar0, va_list list) -{ - int fd; - uintptr *args; - - USED(ar0); - - fd = va_arg(list, int); - args = va_arg(list, uintptr *); - if (up->linux & 128) print("%d:linuxsocketcall (%d, %p):", up->pid, fd, args); - validaddr(args,sizeof(*args), 0); - if (up->linux & 128) print("\n"); -} - - -void -linuxgeteuid(Ar0 *ar0, va_list) -{ - ar0->i = 0; -} - -/* ow this hurts. */ -typedef unsigned long int __ino_t; -typedef long long int __quad_t; -typedef unsigned int __mode_t; -typedef unsigned int __nlink_t; -typedef long int __off_t; -typedef unsigned int __uid_t; -typedef unsigned int __gid_t; -typedef long int __blksize_t; -typedef long int __time_t; -typedef long int __blkcnt_t; - -typedef unsigned long long int __u_quad_t; - -typedef __u_quad_t __dev_t; - -struct timespec - { - __time_t tv_sec; - long int tv_nsec; - }; -/* -# 103 "/bgsys/drivers/V1R2M0_200_2008-080513P/ppc/gnu-linux/lib/gcc/powerpc-bgp-linux/4.1.2/../../../../powerpc-bgp-linux/sys-include/sys/stat.h" 3 4 -*/ -/* how many stat structs does linux have? too many. */ -struct stat { - __dev_t st_dev; - unsigned short int __pad1; - __ino_t st_ino; - __mode_t st_mode; - __nlink_t st_nlink; - __uid_t st_uid; - __gid_t st_gid; - __dev_t st_rdev; - unsigned short int __pad2; - __off_t st_size; - __blksize_t st_blksize; - __blkcnt_t st_blocks; - struct timespec st_atim; - struct timespec st_mtim; - struct timespec st_ctim; - unsigned long int __unused4; - unsigned long int __unused5; -} stupid = { - .st_blksize = 4096, - .st_dev = 1, - .st_gid = 0, - .st_ino = 0x12345, - .st_mode = 0664 | 020000, - .st_nlink = 1, - .st_rdev = 501 -}; - -void -fstat64(Ar0 *ar0, va_list list) -{ - void *v; - int fd; - fd = va_arg(list, int); - v = va_arg(list, void *); - validaddr(v, 1, 0); - switch(fd) { - case 0: - case 1: - case 2: - ar0->i = 0; - memmove(v, &stupid, sizeof(stupid)); - break; - } - -} - - -/* do nothing, succesfully */ -void -returnok(Ar0*, va_list) -{ - - return; -} - -/* void * mmap(void *start, size_t length, int prot , int flags, int fd, - off_t offset); */ -/* They are using this as a poor man's malloc. */ - -void linuxmmap(Ar0 *ar0, va_list list) -{ - void *v; - int length, prot, flags, fd; - ulong offset; - void linuxsbrk(Ar0* ar0, va_list list); - v = va_arg(list, void *); - length = va_arg(list, int); - prot = va_arg(list, int); - flags = va_arg(list, int); - fd = va_arg(list, int); - offset = va_arg(list, ulong); - if (up->linux & 128) print("%d:CNK: mmap %p %#x %#x %#x %d %#ulx\n", up->pid, v, length, prot, flags, fd, offset); - if (fd == -1){ - unsigned char *newv, *oldv; - uintptr args[1]; - args[0] = 0; - linuxsbrk(ar0, (va_list) args); - if (up->linux & 128) print("%d:mmap anon: current is %p\n", up->pid, ar0->v); - oldv =ar0->v; - newv = ((unsigned char *)oldv) + length; - if (up->linux & 128) print("%d:mmap anon: ask for %p\n", up->pid, newv); - args[0] = (uintptr) newv; - linuxsbrk(ar0, (va_list) args); - if (up->linux & 128) print("%d:mmap anon: new is %p\n", up->pid, ar0->v); - /* success means "return the old pointer" ... */ - ar0->v = oldv; - return; - } - - ar0->i = -1; - -} - - -void linuxprocid(Ar0 *ar0, va_list) -{ - ar0->i = 0; -} - -/*Kernel_Ranks2Coords((kernel_coords_t *)_mapcache, _fullSize);*/ - - -/* int sigaction(int sig, const struct sigaction *restrict act, - struct sigaction *restrict oact); */ - -void sigaction(Ar0 *ar0, va_list list) -{ - void *act, *oact; - act = va_arg(list, void *); - oact = va_arg(list, void *); - if (up->linux & 128) print("%d:sigaction, %p %p\n", up->pid, act, oact); - ar0->i = 0; -} - -/*long rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize); */ - -void rt_sigprocmask(Ar0 *ar0, va_list list) -{ - int how; - void *set, *oset; - int size; - how = va_arg(list,int); - set = va_arg(list, void *); - oset = va_arg(list, void *); - size = va_arg(list, int); - if (up->linux & 128) print("%d:sigaction, %d %p %p %d\n", up->pid, how, set, oset, size); - ar0->l = 0; -} - -/* damn. Did not want to do futtocks */ -#define FUTEX_WAIT 0 -#define FUTEX_WAKE 1 -#define FUTEX_FD 2 -#define FUTEX_REQUEUE 3 -#define FUTEX_CMP_REQUEUE 4 -#define FUTEX_WAKE_OP 5 -#define FUTEX_LOCK_PI 6 -#define FUTEX_UNLOCK_PI 7 -#define FUTEX_TRYLOCK_PI 8 -#define FUTEX_WAIT_BITSET 9 -#define FUTEX_WAKE_BITSET 10 - - -void futex(Ar0 *ar0, va_list list) -{ - int *uaddr, op, val; - int *uaddr2, val3; - struct timespec *timeout; - uaddr = va_arg(list,int *); - op = va_arg(list, int); - val = va_arg(list, int); - timeout = va_arg(list, struct timespec *); - uaddr2 = va_arg(list, int *); - val3 = va_arg(list, int); - USED(uaddr); - USED(op); - USED(val); - USED(timeout); - USED(uaddr2); - USED(val3); - if (up->linux & 128) print("%d:futex, uaddr %p op %x val %x uaddr2 %p timeout %p val3 %x\n", up->pid, - uaddr, op, val, uaddr2, timeout, val); - switch(op) { - default: - ar0->i = -1; - break; - case FUTEX_WAIT: - /* - * This operation atomically verifies that the futex address uaddr - * still contains the value val, and sleeps awaiting FUTEX_WAKE on - * this futex address. If the timeout argument is non-NULL, its - * contents describe the maximum duration of the wait, which is - * infinite otherwise. The arguments uaddr2 and val3 are ignored. - */ - validaddr(uaddr, sizeof(*uaddr), 0); - if (up->linux & 128) print("%d:futex: value at %p is %#x, val is %#x\n", up->pid, uaddr, *uaddr, val); - if (*uaddr != val) { - ar0->i = -11; - return; - } - if (timeout) { - validaddr(timeout, sizeof(*timeout), 0); - if (timeout->tv_sec == timeout->tv_nsec == 0) - return; - } - if (up->linux & 128) print("%d:Not going to sleep\n", up->pid); - break; - } -} - -/* mprotect is used to set a stack red zone. We may want to use - * segattach for anon page alloc and then use segdetach for the same purpose. - */ -void linuxmprotect(Ar0 *ar0, va_list list) -{ - u32int addr, len, prot; - addr = va_arg(list, u32int); - len = va_arg(list, u32int); - prot = va_arg(list, u32int); - if (up->linux & 128) print("%d:mprotect: %#x %#x %#x\n", up->pid, addr, len, prot); - ar0->i = 0; -} - -/* this is a hack. */ -Segment* -linuxdupseg(Segment **seg, int segno, int share) -{ - int i, size; - Pte *pte; - Segment *n, *s; - - SET(n); - s = seg[segno]; - - qlock(&s->lk); - if(waserror()){ - qunlock(&s->lk); - nexterror(); - } - switch(s->type&SG_TYPE) { - case SG_TEXT: /* New segment shares pte set */ - case SG_SHARED: - case SG_PHYSICAL: - goto sameseg; - - case SG_STACK: - /* linux wants to share the stack. */ - if(share){ if (up->linux & 128) print("CLONE STACK IS SHARE\n"); - goto sameseg; - } - /* that is all the change */ -if (up->linux & 128) print("CLONE STACK IS NEW\n"); - n = newseg(s->type, s->base, s->size); - break; - - case SG_BSS: /* Just copy on write */ - if(share) - goto sameseg; -if (up->linux & 128) print("CLONE NEW BSS\n"); - n = newseg(s->type, s->base, s->size); - break; - - case SG_DATA: /* Copy on write plus demand load info */ - if(segno == TSEG){ - poperror(); - qunlock(&s->lk); - return data2txt(s); - } - - if(share) - goto sameseg; - n = newseg(s->type, s->base, s->size); - - incref(s->image); - n->image = s->image; - n->fstart = s->fstart; - n->flen = s->flen; - break; - } - size = s->mapsize; - for(i = 0; i < size; i++) - if(pte = s->map[i]) - n->map[i] = ptecpy(n, pte); - - n->flushme = s->flushme; - if(s->ref > 1) - procflushseg(s); - poperror(); - qunlock(&s->lk); - return n; - -sameseg: - incref(s); - poperror(); - qunlock(&s->lk); - return s; -} - -/* the big problem here is that linux clone wants to allow the user to set the - * stack. It's stupid but it's what they do. Linux NPTL pretty much requires it. - * we are going to be dumb here for now and assume we only use - * RFPROC|RFMEM. - * What do we do about stack longer term? It gets a bit weird. - * the child process stack is in the data segment. Should we make the - * child process stack segment a DATA segment, share the data segment, - * and make it a STACK for the child? If we did things right in linuxclone - * we could remove our private dupseg. - */ -void linuxclone(Ar0 *ar0, va_list list) -{ - void linuxsysrforkchild(Proc* child, Proc* parent, uintptr newsp); - u32int flags, stack; - Proc *p; - int flag, i, n, pid; - Mach *wm; - flags = va_arg(list, u32int); - stack = va_arg(list, u32int); - if (up->linux & 128) print("%d:CLONE: %#x %#x\n", up->pid, flags, stack); - if (flags != 0x7d0f00) { - print("%d:CLONE: don't know what to do with flags %#x\n", up->pid, flags); - ar0->i = -1; - return; - } - flag = RFPROC | RFMEM; - - p = newproc(); - - p->trace = up->trace; - p->scallnr = up->scallnr; - memmove(p->arg, up->arg, sizeof(up->arg)); - p->nerrlab = 0; - p->slash = up->slash; - p->dot = up->dot; - incref(p->dot); - - memmove(p->note, up->note, sizeof(p->note)); - p->privatemem = up->privatemem; - p->noswap = up->noswap; - p->nnote = up->nnote; - p->notified = 0; - p->lastnote = up->lastnote; - p->notify = up->notify; - p->ureg = up->ureg; - p->dbgreg = 0; - p->linux = 1; - - /* Make a new set of memory segments */ - n = flag & RFMEM; - qlock(&p->seglock); - if(waserror()){ - qunlock(&p->seglock); - nexterror(); - } - for(i = 0; i < NSEG; i++) - if(up->seg[i]) - p->seg[i] = linuxdupseg(up->seg, i, n); - qunlock(&p->seglock); - poperror(); - - /* File descriptors */ - if(flag & (RFFDG|RFCFDG)) { - if(flag & RFFDG) - p->fgrp = dupfgrp(up->fgrp); - else - p->fgrp = dupfgrp(nil); - } - else { - p->fgrp = up->fgrp; - incref(p->fgrp); - } - - /* Process groups */ - if(flag & (RFNAMEG|RFCNAMEG)) { - p->pgrp = newpgrp(); - if(flag & RFNAMEG) - pgrpcpy(p->pgrp, up->pgrp); - /* inherit noattach */ - p->pgrp->noattach = up->pgrp->noattach; - } - else { - p->pgrp = up->pgrp; - incref(p->pgrp); - } - if(flag & RFNOMNT) - up->pgrp->noattach = 1; - - if(flag & RFREND) - p->rgrp = newrgrp(); - else { - incref(up->rgrp); - p->rgrp = up->rgrp; - } - - /* Environment group */ - if(flag & (RFENVG|RFCENVG)) { - p->egrp = smalloc(sizeof(Egrp)); - p->egrp->ref = 1; - if(flag & RFENVG) - envcpy(p->egrp, up->egrp); - } - else { - p->egrp = up->egrp; - incref(p->egrp); - } - p->hang = up->hang; - p->procmode = up->procmode; - - /* Craft a return frame which will cause the child to pop out of - * the scheduler in user mode with the return register zero - */ - /* fix the stack for linux semantics */ - linuxsysrforkchild(p, up, stack); - - p->parent = up; - p->parentpid = up->pid; - if(flag&RFNOWAIT) - p->parentpid = 0; - else { - lock(&up->exl); - up->nchild++; - unlock(&up->exl); - } - if((flag&RFNOTEG) == 0) - p->noteid = up->noteid; - - pid = p->pid; - memset(p->time, 0, sizeof(p->time)); - p->time[TReal] = MACHP(0)->ticks; - - kstrdup(&p->text, up->text); - kstrdup(&p->user, up->user); - /* - * since the bss/data segments are now shareable, - * any mmu info about this process is now stale - * (i.e. has bad properties) and has to be discarded. - */ - mmuflush(); - p->basepri = up->basepri; - p->priority = up->basepri; - p->fixedpri = up->fixedpri; - p->mp = up->mp; - wm = up->wired; - if(wm) - procwired(p, wm->machno); - ready(p); - sched(); - - ar0->i = pid; -} - -/* get app segment mapping. Not the gasm you think, you dirty-minded person. - */ -/* we are going to deprecate this call. It was only there for libraries (dcmf, MPI) that needed - * the huge physical segment. I think nowadays that is a bad idea. - */ -void gasm(Ar0 *, va_list) -{ -#ifdef NOMORE - void seginfo(int seg, u32int *va, u64int *pa, u32int *len); - u64int *pa; - int whichseg; - int corenum; - u32int *va; - u32int *slen; - - whichseg = va_arg(list, int); - corenum = va_arg(list, int); - va = va_arg(list, u32int *); - pa = va_arg(list, u64int *); - slen = va_arg(list, u32int *); - validaddr(va, sizeof(*va), 1); - validaddr(pa, sizeof(*pa), 1); - validaddr(slen, sizeof(*slen), 1); - - if (up->linux & 128) print("%d:gasm: %#x %#x %p %p %p\n", up->pid, whichseg, corenum, va, pa, slen); - - /* we can not run any more without devsegment. Sorry. */ - seginfo(whichseg, va, pa, slen); - if (up->linux & 128) print("%d:gasm: %#x %#llx %#x\n", up->pid, *va, *pa, *slen); - ar0->i = 0; -#endif -} - -void timeconv(ulong l, struct timeval *t) -{ - u32int ms; - ms = TK2MS(l); - t->tv_sec += ms / 1000; - ms %= 1000; - t->tv_usec += ms * 1000; -} -void getrusage(Ar0 *ar0, va_list list) -{ - int what; - struct rusage *r; - - what = va_arg(list, int); - r = va_arg(list, struct rusage *); - validaddr(r, sizeof(*r), 1); - - if (up->linux & 128) print("%d:getrusage: %s %p:",up->pid, !what? "self" : "kids", r); - memset(r, 0, sizeof(*r)); - if (what) { - timeconv(up->time[3], &r->ru_utime); - timeconv(up->time[4], &r->ru_stime); - } else { - timeconv(up->time[0], &r->ru_utime); - timeconv(up->time[1], &r->ru_stime); - if (up->linux & 128) print("%#lx:%#lx, ", up->time[0], up->time[1]); - } - if (up->linux & 128) print("%#x:%#x\n", r->ru_utime.tv_sec, r->ru_stime.tv_sec); - - ar0->i = 0; -} diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/page.c --- a/sys/src/nix/port/page.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/page.c Sat Dec 03 10:15:30 2011 +0000 @@ -107,11 +107,10 @@ } memset(pg, 0, sizeof *pg); if((pg->pa = physalloc(size, &color, pg)) == 0){ - DBG("pgalloc: physalloc failed for size %#ulx color %d\n", size, color); + DBG("pgalloc: physalloc failed: size %#ulx color %d\n", size, color); free(pg); return nil; } -assert(phystag(pg->pa) == pg); pg->pgszi = si; /* size index */ incref(&pga.pgsza[si].npages); pg->color = color; @@ -188,32 +187,62 @@ pa->freecount++; } +static Page* +findpg(Page *pl, int color) +{ + Page *p; + + for(p = pl; p != nil; p = p->next) + if(color == NOCOLOR || p->color == color) + return p; + return nil; +} /* - * XXX: newpage could receive a hit regarding the color we prefer. - * fault calls newpage to do pio and install new pages. - * Also, processes could keep track of a preferred color, so - * that they try to allocate all their segments of the same color. + * can be called with up == nil during boot. */ Page* -newpage(int clear, Segment **s, uintptr va, usize size) +newpage(int clear, Segment **s, uintptr va, usize size, int color) { Page *p; KMap *k; uchar ct; Pgsza *pa; - int i, color, dontalloc, si; + int i, dontalloc, si; static int once; si = getpgszi(size); pa = &pga.pgsza[si]; - color = -1; - if(s && (*s)->color != NOCOLOR) - color = (*s)->color; lock(&pga); - for(;;){ - if(pa->freecount > 1) + /* + * Beware, new page may enter a loop even if this loop does not + * loop more than once, if the segment is lost and fault calls us + * again. Either way, we accept any color if we failed a couple of times. + */ + for(i = 0;; i++){ + if(i > 3) + color = NOCOLOR; + + /* + * 1. try to reuse a free one. + */ + p = findpg(pa->head, color); + if(p != nil) break; + + /* + * 2. try to allocate a new one from physical memory + */ + p = pgalloc(size, color); + if(p != nil){ + pagechainhead(p); + break; + } + + /* + * 3. out of memory, try with the pager. + * but release the segment (if any) while in the pager. + */ unlock(&pga); dontalloc = 0; @@ -224,9 +253,8 @@ } /* - * Tries 3) flusing images if size is <= 2M, - * 4) releasing bigger pages, and 5) releasing smaller pages. - * in that order. + * Try to get any page of the desired color + * or any color for NOCOLOR. */ kickpager(si, color); @@ -242,17 +270,8 @@ lock(&pga); } - /* First try for our colour */ - for(p = pa->head; p; p = p->next) - if(p->color == color) - break; - - ct = PG_NOFLUSH; - if(p == 0) { - p = pa->head; - p->color = color; - ct = PG_NEWCOL; - } + assert(p != nil); + ct = PG_NEWCOL; pageunchain(p); @@ -274,8 +293,8 @@ memset((void*)VA(k), 0, m->pgsz[p->pgszi]); kunmap(k); } - DBG("newpage: va %#p pa %#ullx pgsz %#ux\n", - p->va, p->pa, m->pgsz[p->pgszi]); + DBG("newpage: va %#p pa %#ullx pgsz %#ux color %d\n", + p->va, p->pa, m->pgsz[p->pgszi], p->color); return p; } diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/pager.c --- a/sys/src/nix/port/pager.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/pager.c Sat Dec 03 10:15:30 2011 +0000 @@ -226,12 +226,27 @@ return -1; } +static int +hascolor(Page *pl, int color) +{ + Page *p; + + lock(&pga); + for(p = pl; p != nil; p = p->next) + if(color == NOCOLOR || p->color == color){ + unlock(&pga); + return 1; + } + unlock(&pga); + return 0; +} + /* - * Someone thinks pages of size m->pgsz[pgszi] are needed - * and is trying to make them available. + * Someone couldn't find pages of the given size index and color. + * (color may be NOCOLOR if the caller is trying to get any page + * and is desperate). * Many processes may be calling this at the same time, - * in which case they will enter one by one. Only when more than - * Minpages are available they will simply return. + * The first one operates as a pager and does what it can. */ void kickpager(int pgszi, int color) @@ -246,51 +261,64 @@ pa = &pga.pgsza[pgszi]; /* - * First try allocating from physical memory. + * 1. did anyone else release one for us in the mean time? */ - tryalloc(pgszi, color); - if(pa->freecount > Minpages) + if(hascolor(pa->head, color)) goto Done; /* - * If pgszi is <= page size for text (assumed to be 2M) - * try to release text pages. + * 2. try allocating from physical memory + */ + tryalloc(pgszi, color); + if(hascolor(pa->head, color)) + goto Done; + + /* + * If pgszi is <= text page size, try releasing text pages. */ if(m->pgsz[pgszi] <= 2*MiB){ pstats.ntext++; DBG("kickpager() up %#p: reclaiming text pages\n", up); pageouttext(pgszi, color); tryalloc(pgszi, color); - if(pa->freecount > Minpages){ + if(hascolor(pa->head, color)){ DBG("kickpager() found %uld free\n", pa->freecount); goto Done; } } /* - * Try releasing memory from one bigger page, perhaps from text - * pages released in the previous step. + * Try releasing memory from bigger pages. */ pstats.nbig++; freepages(pgszi+1, 1); - while(tryalloc(pgszi, color) != -1 && pa->freecount < Minpages) - ; - if(pa->freecount > 1){ + tryalloc(pgszi, color); + if(hascolor(pa->head, color)){ DBG("kickpager() found %uld free\n", pa->freecount); goto Done; } + /* - * Try releasing memory from all pages. + * Really the last resort. Try releasing memory from all pages. */ pstats.nall++; DBG("kickpager() up %#p: releasing all pages\n", up); freepages(0, 0); tryalloc(pgszi, color); - if(pa->freecount > 1){ + if(pa->freecount > 0){ DBG("kickpager() found %uld free\n", pa->freecount); goto Done; } - panic("kickpager(): no physical memory"); + + /* + * What else can we do? + * But don't panic if we are still trying to get memory of + * a particular color and there's none. We'll retry asking + * for any color. + */ + if(color == NOCOLOR) + panic("kickpager(): no physical memory"); + Done: poperror(); qunlock(&pagerlck); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/portdat.h --- a/sys/src/nix/port/portdat.h Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/portdat.h Sat Dec 03 10:15:30 2011 +0000 @@ -16,6 +16,7 @@ typedef struct Kzio Kzio; typedef struct Log Log; typedef struct Logflag Logflag; +typedef struct Lockstats Lockstats; typedef struct Mhead Mhead; typedef struct Mnt Mnt; typedef struct Mntcache Mntcache; @@ -36,11 +37,13 @@ typedef struct Procalloc Procalloc; typedef struct Pte Pte; typedef struct QLock QLock; +typedef struct QLockstats QLockstats; typedef struct Queue Queue; typedef struct Ref Ref; typedef struct Rendez Rendez; typedef struct Rgrp Rgrp; typedef struct RWlock RWlock; +typedef struct Sched Sched; typedef struct Schedq Schedq; typedef struct Segment Segment; typedef struct Sem Sem; @@ -50,6 +53,7 @@ typedef struct Timers Timers; typedef struct Uart Uart; typedef struct Waitq Waitq; +typedef struct Waitstats Waitstats; typedef struct Walkqid Walkqid; typedef struct Watchdog Watchdog; typedef struct Zseg Zseg; @@ -76,12 +80,55 @@ Proc *p; }; +enum{ + NWstats = 500, + WSlock = 0, + WSqlock, + WSslock, +}; + +/* + * different arrays with stat info, so we can memset any of them + * to 0 to clear stats. + */ +struct Waitstats +{ + int on; + int npcs; + int* type; + uintptr* pcs; + int* ns; + uvlong* wait; + uvlong* total; +}; +extern Waitstats waitstats; + +struct Lockstats +{ + ulong locks; + ulong glare; + ulong inglare; +}; +extern Lockstats lockstats; + +struct QLockstats +{ + ulong rlock; + ulong rlockq; + ulong wlock; + ulong wlockq; + ulong qlock; + ulong qlockq; +}; +extern QLockstats qlockstats; + struct QLock { Lock use; /* to access Qlock structure */ Proc *head; /* next process waiting for object */ Proc *tail; /* last process waiting for object */ int locked; /* flag */ + uintptr pc; }; struct RWlock @@ -319,7 +366,7 @@ ulong daddr; /* Disc address on swap */ int ref; /* Reference count */ uchar modref; /* Simulated modify/reference bits */ - uchar color; /* Cache coloring */ + int color; /* Cache coloring */ char cachectl[MACHMAX]; /* Cache flushing control for mmuput */ Image *image; /* Associated text or swap image */ Page *next; /* Lru free list */ @@ -355,6 +402,7 @@ Image *next; /* Free list or lru list */ Image *prev; /* lru list */ int notext; /* no file associated */ + int color; }; /* @@ -453,7 +501,7 @@ Rendez rr; /* process waiting to read free addresses */ }; -#define NOCOLOR ((uchar)~0) +#define NOCOLOR -1 struct Segment { @@ -463,7 +511,7 @@ ushort type; /* segment type */ int pgszi; /* page size index in Mach MMMU */ uint ptepertab; - uchar color; + int color; uintptr base; /* virtual base */ uintptr top; /* virtual top */ usize size; /* size in pages */ @@ -685,12 +733,28 @@ struct Schedq { - Lock; Proc* head; Proc* tail; int n; }; +struct Sched +{ + Lock; /* runq */ + int nrdy; + ulong delayedscheds; /* statistics */ + long skipscheds; + long preempts; + int schedgain; + ulong balancetime; + Schedq runq[Nrq]; + ulong runvec; + int nmach; /* # of cores with this color */ + ulong nrun; /* to compute load */ +}; + +extern Sched run[]; + typedef union Ar0 Ar0; union Ar0 { int i; @@ -829,6 +893,7 @@ int setargs; void *ureg; /* User registers for notes */ + int color; Fastcall* fc; int fcount; @@ -854,13 +919,6 @@ int nqsyscall; /* # of syscalls in the last quantum */ int nfullq; - /* might want a struct someday but this is good for now. - * if that day comes, better use a pointer to a Linux struct, so - * we don't pay the price for all processes. - */ - int linux; /* bit 0 is "linux emulation". Others debug */ - int linuxexec; /* Plan 9 process starting a Linux process */ - /* * machine specific fpu, mmu and notify */ diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/portfns.h --- a/sys/src/nix/port/portfns.h Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/portfns.h Sat Dec 03 10:15:30 2011 +0000 @@ -6,6 +6,7 @@ int addconsdev(Queue*, void (*fn)(char*,int), int, int); int addkbdq(Queue*, int); int addphysseg(Physseg*); +void addwaitstat(uintptr pc, uvlong t0, int type); void addwatchdog(Watchdog*); int adec(int*); Block* adjustblock(Block*, int); @@ -15,7 +16,7 @@ void* alloczio(Segment*, long); int anyhigher(void); int anyready(void); -Image* attachimage(int, Chan*, uintptr, usize); +Image* attachimage(int, Chan*, int, uintptr, usize); Page* auxpage(usize); Block* bl2mem(uchar*, Block*, int); int blocklen(Block*); @@ -34,6 +35,7 @@ char* chanpath(Chan*); void checkalarms(void); void checkb(Block*, char*); +void clearwaitstats(void); void closeegrp(Egrp*); void closefgrp(Fgrp*); void closepgrp(Pgrp*); @@ -53,7 +55,7 @@ int decrypt(void*, void*, int); void delay(int); void delconsdevs(void); -Proc* dequeueproc(Schedq*, Proc*); +Proc* dequeueproc(Sched*, Schedq*, Proc*); Chan* devattach(int, char*); Block* devbread(Chan*, long, vlong); long devbwrite(Chan*, Block*, vlong); @@ -112,7 +114,7 @@ void fdclose(int, int); Chan* fdtochan(int, int, int, int); int findmount(Chan**, Mhead**, int, uint, Qid); -int fixfault(Segment*, uintptr, int, int); +int fixfault(Segment*, uintptr, int, int, int); void fmtinit(void); void forceclosefgrp(void); void free(void*); @@ -198,7 +200,7 @@ int newfd(Chan*); Mhead* newmhead(Chan*); Mount* newmount(Mhead*, Chan*, int, char*); -Page* newpage(int, Segment **, uintptr, usize); +Page* newpage(int, Segment **, uintptr, usize, int); Path* newpath(char*); Pgrp* newpgrp(void); Proc* newproc(void); @@ -236,9 +238,9 @@ void physfree(uintmem, u64int); void physinit(uintmem, u64int); void* phystag(uintmem); -void pio(Segment*, uintptr, ulong, Page**); +void pio(Segment*, uintptr, ulong, Page**, int); #define poperror() up->nerrlab-- -void portmwait(void *); +void portmwait(void*); int postnote(Proc*, int, char*, int); int pprint(char*, ...); int preempted(void); @@ -249,6 +251,7 @@ void procdump(void); int procfdprint(Chan*, int, int, char*, int); void procflushseg(Segment*); +void procinit0(void); void procpriority(Proc*, int, int); void procrestore(Proc*); void procsave(Proc*); @@ -342,6 +345,7 @@ void sleep(Rendez*, int (*)(void*), void*); void* smalloc(ulong); char* srvname(Chan*); +void startwaitstats(int); void stopnixproc(void); int swapcount(ulong); void swapinit(void); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/proc.c --- a/sys/src/nix/port/proc.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/proc.c Sat Dec 03 10:15:30 2011 +0000 @@ -9,14 +9,26 @@ #include "errstr.h" #include -int nrdy; +enum +{ + Scaling=2, + + /* + * number of schedulers used. + * 1 uses just one, which is the behavior of Plan 9. + */ + Nsched = 16, +}; + Ref noteidalloc; -ulong delayedscheds; /* statistics */ -long skipscheds; -long preempts; +static Ref pidalloc; -static Ref pidalloc; +/* + * Because machines with many cores are NUMA, we try to use + * a different scheduler per color + */ +Sched run[Nsched]; struct Procalloc procalloc; @@ -25,20 +37,13 @@ extern void psrelease(Proc*); extern void psunhash(Proc*); -enum -{ - Scaling=2, -}; - static int reprioritize(Proc*); static void updatecpu(Proc*); -static int schedgain = 30; /* units in seconds */ static void rebalance(void); -static ulong balancetime; -Schedq runq[Nrq]; -ulong runvec; +int schedsteals = 1; +int scheddonates = 0; char *statename[] = { /* BUG: generate automatically */ @@ -59,6 +64,45 @@ "Down", }; +void +setmachsched(Mach *mp) +{ + int color; + + color = corecolor(mp->machno); + if(color < 0){ + print("unknown color for cpu%d\n", mp->machno); + color = 0; + } + mp->sch = &run[color%Nsched]; +} + +Sched* +procsched(Proc *p) +{ + Mach *pm; + + pm = p->mp; + if(pm == nil) + pm = m; + if(pm->sch == nil) + setmachsched(pm); + return pm->sch; +} + +/* + * bad planning, once more. + */ +void +procinit0(void) +{ + int i; + + for(i = 0; i < Nsched; i++) + run[i].schedgain = 30; + +} + /* * Always splhi()'ed. */ @@ -67,6 +111,13 @@ { Edf *e; + m->inidle = 1; + if(m->sch == nil){ + print("schedinit: no sch for cpu%d\n", m->machno); + setmachsched(m); + } + ainc(&m->sch->nmach); + setlabel(&m->sched); if(up) { if((e = up->edf) && (e->flags & Admitted)) @@ -126,7 +177,9 @@ sched(void) { Proc *p; + Sched *sch; + sch = m->sch; if(m->ilockdepth) panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p", m->machno, @@ -156,7 +209,7 @@ || pga.Lock.p == up || procalloc.Lock.p == up){ up->delaysched++; - delayedscheds++; + sch->delayedscheds++; return; } up->delaysched = 0; @@ -171,6 +224,7 @@ stackok(); procsave(up); + mmuflushtlb(m->pml4->pa); if(setlabel(&up->sched)){ procrestore(up); spllo(); @@ -178,7 +232,9 @@ } gotolabel(&m->sched); } + m->inidle = 1; p = runproc(); + m->inidle = 0; if(!p->edf){ updatecpu(p); p->priority = reprioritize(p); @@ -193,19 +249,21 @@ up->mach = MACHP(m->machno); m->proc = up; mmuswitch(up); + + assert(!up->wired || up->wired == m); gotolabel(&up->sched); } int anyready(void) { - return runvec; + return m->sch->runvec; } int anyhigher(void) { - return runvec & ~((1<<(up->priority+1))-1); + return m->sch->runvec & ~((1<<(up->priority+1))-1); } /* @@ -301,7 +359,9 @@ if(n == 0) return; - D = schedgain*HZ*Scaling; + if(m->sch == nil) /* may happen during boot */ + return; + D = m->sch->schedgain*HZ*Scaling; if(n > D) n = D; @@ -356,13 +416,16 @@ /* * add a process to a scheduling queue */ -void -queueproc(Schedq *rq, Proc *p) +static void +queueproc(Sched *sch, Schedq *rq, Proc *p, int locked) { int pri; - pri = rq - runq; - lock(runq); + pri = rq - sch->runq; + if(!locked) + lock(sch); + else if(canlock(sch)) + panic("queueproc: locked and can lock"); p->priority = pri; p->rnext = 0; if(rq->tail) @@ -371,20 +434,21 @@ rq->head = p; rq->tail = p; rq->n++; - nrdy++; - runvec |= 1<nrdy++; + sch->runvec |= 1<mach==0 only when process state is saved */ if(p == 0 || p->mach){ - unlock(runq); + unlock(sch); return nil; } if(p->rnext == 0) @@ -412,16 +476,50 @@ else rq->head = p->rnext; if(rq->head == nil) - runvec &= ~(1<<(rq-runq)); + sch->runvec &= ~(1<<(rq-sch->runq)); rq->n--; - nrdy--; + sch->nrdy--; if(p->state != Ready) print("dequeueproc %s %d %s\n", p->text, p->pid, statename[p->state]); - unlock(runq); + unlock(sch); return p; } +static void +schedready(Sched *sch, Proc *p, int locked) +{ + Mpl pl; + int pri; + Schedq *rq; + + pl = splhi(); + if(edfready(p)){ + splx(pl); + return; + } + + if(m->nixtype == NIXAC) + MACHP(0)->readied = p; + + /* + * BUG: if schedready is called to rebalance the scheduler, + * for another core, then this is wrong. + */ + if(up != p) + m->readied = p; /* group scheduling */ + + updatecpu(p); + pri = reprioritize(p); + p->priority = pri; + rq = &sch->runq[pri]; + p->state = Ready; + queueproc(sch, rq, p, locked); + if(p->trace) + proctrace(p, SReady, 0); + splx(pl); +} + /* * ready(p) picks a new priority for a process and sticks it in the * runq for that priority. @@ -429,32 +527,7 @@ void ready(Proc *p) { - Mpl pl; - int pri; - Schedq *rq; - void (*pt)(Proc*, int, vlong); - - pl = splhi(); - if(edfready(p)){ - splx(pl); - return; - } - - if(m->nixtype == NIXAC) - MACHP(0)->readied = p; - if(up != p) - m->readied = p; /* group scheduling */ - - updatecpu(p); - pri = reprioritize(p); - p->priority = pri; - rq = &runq[pri]; - p->state = Ready; - queueproc(rq, p); - pt = proctrace; - if(pt) - pt(p, SReady, 0); - splx(pl); + schedready(procsched(p), p, 0); } /* @@ -480,15 +553,17 @@ { Mpl pl; int pri, npri, t; + Sched *sch; Schedq *rq; Proc *p; + sch = m->sch; t = m->ticks; - if(t - balancetime < HZ) + if(t - sch->balancetime < HZ) return; - balancetime = t; + sch->balancetime = t; - for(pri=0, rq=runq; prirunq; prihead; if(p == nil) @@ -501,9 +576,9 @@ npri = reprioritize(p); if(npri != pri){ pl = splhi(); - p = dequeueproc(rq, p); + p = dequeueproc(sch, rq, p); if(p) - queueproc(&runq[npri], p); + queueproc(sch, &sch->runq[npri], p, 0); splx(pl); goto another; } @@ -512,28 +587,99 @@ /* + * Is this scheduler overloaded? + * should it pass processes to any other underloaded scheduler? + */ +static int +overloaded(Sched *sch) +{ + return sch->nmach != 0 && sch->nrdy > sch->nmach; +} + +/* + * Is it reasonable to give processes to this scheduler? + */ +static int +underloaded(Sched *sch) +{ + return sch->nrdy < sch->nmach; +} + +static void +ipisched(Sched *sch) +{ + Mach* mp; + int i; + + for(i = 0; i < MACHMAX; i++){ + mp = sys->machptr[i]; + if(mp != nil && mp != m && mp->online && mp->sch == sch) + apicipi(mp->apicno); + } +} + +/* + * If we are idle, check if another scheduler is overloaded and + * steal a new process from it. But steal low priority processes to + * avoid disturbing high priority ones. + */ +static Proc* +steal(void) +{ + static int last; /* donate in round robin */ + int start, i; + Schedq *rq; + Sched *sch; + Proc *p; + + /* + * measures show that stealing is expensive, we are donating + * by now but only when calling exec(). See maydonate(). + */ + if(!schedsteals) + return nil; + + start = last; + for(i = 0; i < Nsched; i++){ + last = (start+i)%Nsched; + sch = &run[last]; + if(sch == m->sch || sch->nmach == 0 || !overloaded(sch)) + continue; + for(rq = &sch->runq[Nrq-1]; rq >= sch->runq; rq--){ + for(p = rq->head; p != nil; p = p->rnext) + if(!p->wired && p->priority < PriKproc) + break; + if(p != nil && dequeueproc(sch, rq, p) != nil) + return p; + } + } + return nil; +} + +/* * pick a process to run */ Proc* runproc(void) { Schedq *rq; + Sched *sch; Proc *p; ulong start, now; int i; - void (*pt)(Proc*, int, vlong); start = perfticks(); - + sch = m->sch; /* cooperative scheduling until the clock ticks */ if((p=m->readied) && p->mach==0 && p->state==Ready - && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){ - skipscheds++; - rq = &runq[p->priority]; + && sch->runq[Nrq-1].head == nil && sch->runq[Nrq-2].head == nil + && (!p->wired || p->wired == m)){ + sch->skipscheds++; + rq = &sch->runq[p->priority]; goto found; } - preempts++; + sch->preempts++; loop: /* @@ -548,7 +694,7 @@ * processor can run given affinity constraints. * */ - for(rq = &runq[Nrq-1]; rq >= runq; rq--){ + for(rq = &sch->runq[Nrq-1]; rq >= sch->runq; rq--){ for(p = rq->head; p; p = p->rnext){ if(p->mp == nil || p->mp == MACHP(m->machno) || (!p->wired && i > 0)) @@ -556,13 +702,13 @@ } } + p = steal(); + if(p != nil){ + splhi(); + goto stolen; + } /* waste time or halt the CPU */ - /* But not on NIX. We need the TC to be alert in - * case the AC issues a syscall and makes its - * handler process ready. idlehands(); - */ - /* remember how much time we're here */ now = perfticks(); m->perf.inidle += now-start; @@ -571,20 +717,19 @@ found: splhi(); - p = dequeueproc(rq, p); + p = dequeueproc(sch, rq, p); if(p == nil) goto loop; - +stolen: p->state = Scheding; p->mp = MACHP(m->machno); if(edflock(p)){ - edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */ + edfrun(p, rq == &sch->runq[PriEdf]); /* start deadline timer and do admin */ edfunlock(); } - pt = proctrace; - if(pt) - pt(p, SRun, 0); + if(p->trace) + proctrace(p, SRun, 0); return p; } @@ -592,9 +737,11 @@ canpage(Proc *p) { int ok; + Sched *sch; splhi(); - lock(runq); + sch = procsched(p); + lock(sch); /* Only reliable way to see if we are Running */ if(p->mach == 0) { p->newtlb = 1; @@ -602,7 +749,7 @@ } else ok = 0; - unlock(runq); + unlock(sch); spllo(); return ok; @@ -677,6 +824,7 @@ p->tctime = 0ULL; p->ac = nil; p->nfullq = 0; + memset(&p->PMMU, 0, sizeof p->PMMU); return p; } @@ -712,6 +860,18 @@ p->wired = MACHP(bm); p->mp = p->wired; + + /* + * adjust our color to the new domain. + */ + if(up == nil || p != up) + return; + up->color = corecolor(up->mp->machno); + qlock(&up->seglock); + for(i = 0; i < NSEG; i++) + if(up->seg[i]) + up->seg[i]->color = up->color; + qunlock(&up->seglock); } void @@ -742,7 +902,6 @@ sleep(Rendez *r, int (*f)(void*), void *arg) { Mpl pl; - void (*pt)(Proc*, int, vlong); pl = splhi(); @@ -778,9 +937,8 @@ * now we are committed to * change state and call scheduler */ - pt = proctrace; - if(pt) - pt(up, SSleep, 0); + if(up->trace) + proctrace(up, SSleep, 0); up->state = Wakeme; up->r = r; @@ -788,6 +946,7 @@ m->cs++; procsave(up); + mmuflushtlb(m->pml4->pa); if(setlabel(&up->sched)) { /* * here when the process is awakened @@ -1063,7 +1222,6 @@ Rgrp *rgrp; Pgrp *pgrp; Chan *dot; - void (*pt)(Proc*, int, vlong); if(0 && up->nfullq > 0) iprint(" %s=%d", up->text, up->nfullq); @@ -1077,9 +1235,8 @@ if (up->tt) timerdel(up); - pt = proctrace; - if(pt) - pt(up, SDead, 0); + if(up->trace) + proctrace(up, SDead, 0); /* nil out all the resources under lock (free later) */ qlock(&up->debug); @@ -1354,18 +1511,21 @@ scheddump(void) { Proc *p; + Sched *sch; Schedq *rq; - for(rq = &runq[Nrq-1]; rq >= runq; rq--){ - if(rq->head == 0) - continue; - print("rq%ld:", rq-runq); - for(p = rq->head; p; p = p->rnext) - print(" %d(%lud)", p->pid, m->ticks - p->readytime); - print("\n"); - delay(150); + for(sch = run; sch < &run[Nsched]; sch++){ + for(rq = &sch->runq[Nrq-1]; rq >= sch->runq; rq--){ + if(rq->head == 0) + continue; + print("sch%ld rq%ld:", sch - run, rq-sch->runq); + for(p = rq->head; p; p = p->rnext) + print(" %d(%lud)", p->pid, m->ticks - p->readytime); + print("\n"); + delay(150); + } + print("sch%ld: nrdy %d\n", sch - run, sch->nrdy); } - print("nrdy %d\n", nrdy); } void @@ -1584,17 +1744,20 @@ /* * time accounting called by clock() splhi'd + * only cpu0 computes system load average */ void accounttime(void) { + Sched *sch; Proc *p; ulong n, per; - static ulong nrun; + sch = m->sch; p = m->proc; if(p) { - nrun++; + if(m->machno == 0) + sch->nrun++; p->time[p->insyscall]++; } @@ -1625,9 +1788,16 @@ * approximately the load over the last second, * with a tail lasting about 5 seconds. */ - n = nrun; - nrun = 0; - n = (nrdy+n)*1000; + n = sch->nrun; + sch->nrun = 0; + n = (sch->nrdy+n)*1000; m->load = (m->load*(HZ-1)+n)/HZ; } +void +halt(void) +{ + if(m->sch->nrdy != 0) + return; + hardhalt(); +} diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/qlock.c --- a/sys/src/nix/port/qlock.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/qlock.c Sat Dec 03 10:15:30 2011 +0000 @@ -3,36 +3,48 @@ #include "mem.h" #include "dat.h" #include "fns.h" +#include -struct { - ulong rlock; - ulong rlockq; - ulong wlock; - ulong wlockq; - ulong qlock; - ulong qlockq; -} rwstats; +QLockstats qlockstats; + +static void +lockstat(uintptr pc, uvlong w) +{ + addwaitstat(pc, w, WSqlock); +} + +static void +slockstat(uintptr pc, uvlong w) +{ + addwaitstat(pc, w, WSslock); +} void qlock(QLock *q) { Proc *p; + uvlong t0; + cycles(&t0); if(m->ilockdepth != 0) print("qlock: %#p: ilockdepth %d", getcallerpc(&q), m->ilockdepth); if(up != nil && up->nlocks) print("qlock: %#p: nlocks %d", getcallerpc(&q), up->nlocks); - lock(&q->use); - rwstats.qlock++; + if(!canlock(&q->use)){ + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + qlockstats.qlock++; if(!q->locked) { q->locked = 1; + q->pc = getcallerpc(&q); unlock(&q->use); return; } if(up == nil) panic("qlock"); - rwstats.qlockq++; + qlockstats.qlockq++; p = q->tail; if(p == 0) q->head = up; @@ -42,8 +54,11 @@ up->qnext = 0; up->state = Queueing; up->qpc = getcallerpc(&q); + if(up->trace) + proctrace(up, SLock, 0); unlock(&q->use); sched(); + lockstat(getcallerpc(&q), t0); } int @@ -56,6 +71,7 @@ return 0; } q->locked = 1; + q->pc = getcallerpc(&q); unlock(&q->use); return 1; @@ -65,8 +81,13 @@ qunlock(QLock *q) { Proc *p; + uvlong t0; - lock(&q->use); + if(!canlock(&q->use)){ + cycles(&t0); + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } if (q->locked == 0) print("qunlock called with qlock not held, from %#p\n", getcallerpc(&q)); @@ -76,10 +97,12 @@ if(q->head == 0) q->tail = 0; unlock(&q->use); + q->pc = p->qpc; ready(p); return; } q->locked = 0; + q->pc = 0; unlock(&q->use); } @@ -87,9 +110,14 @@ rlock(RWlock *q) { Proc *p; + uvlong t0; - lock(&q->use); - rwstats.rlock++; + cycles(&t0); + if(!canlock(&q->use)){ + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + qlockstats.rlock++; if(q->writer == 0 && q->head == nil){ /* no writer, go for it */ q->readers++; @@ -97,7 +125,7 @@ return; } - rwstats.rlockq++; + qlockstats.rlockq++; p = q->tail; if(up == nil) panic("rlock"); @@ -108,16 +136,24 @@ q->tail = up; up->qnext = 0; up->state = QueueingR; + if(up->trace) + proctrace(up, SLock, 0); unlock(&q->use); sched(); + lockstat(getcallerpc(&q), t0); } void runlock(RWlock *q) { Proc *p; + uvlong t0; - lock(&q->use); + if(!canlock(&q->use)){ + cycles(&t0); + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } p = q->head; if(--(q->readers) > 0 || p == nil){ unlock(&q->use); @@ -139,9 +175,14 @@ wlock(RWlock *q) { Proc *p; + uvlong t0; - lock(&q->use); - rwstats.wlock++; + cycles(&t0); + if(!canlock(&q->use)){ + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + qlockstats.wlock++; if(q->readers == 0 && q->writer == 0){ /* noone waiting, go for it */ q->wpc = getcallerpc(&q); @@ -152,7 +193,7 @@ } /* wait */ - rwstats.wlockq++; + qlockstats.wlockq++; p = q->tail; if(up == nil) panic("wlock"); @@ -163,16 +204,24 @@ q->tail = up; up->qnext = 0; up->state = QueueingW; + if(up->trace) + proctrace(up, SLock, 0); unlock(&q->use); sched(); + lockstat(getcallerpc(&q), t0); } void wunlock(RWlock *q) { Proc *p; + uvlong t0; - lock(&q->use); + if(!canlock(&q->use)){ + cycles(&t0); + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } p = q->head; if(p == nil){ q->writer = 0; @@ -209,8 +258,14 @@ int canrlock(RWlock *q) { - lock(&q->use); - rwstats.rlock++; + uvlong t0; + + if(!canlock(&q->use)){ + cycles(&t0); + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + qlockstats.rlock++; if(q->writer == 0 && q->head == nil){ /* no writer, go for it */ q->readers++; diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/sysproc.c --- a/sys/src/nix/port/sysproc.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/sysproc.c Sat Dec 03 10:15:30 2011 +0000 @@ -8,6 +8,7 @@ #include "../port/edf.h" #include +#include void @@ -233,6 +234,7 @@ wm = up->wired; if(wm) procwired(p, wm->machno); + p->color = up->color; ready(p); sched(); @@ -264,6 +266,38 @@ } Hdr; static void +donate(Proc *p) +{ + static int coreno; + int core, i; + Mach *mp; + extern int scheddonates; + + if(!scheddonates || p->wired) + return; + + for(i = 0; i < MACHMAX; i++){ + core = i; + mp = MACHP(core); + if(mp == m || mp == nil || mp->online == 0 || mp->sch == nil) + continue; + if(mp->nixtype != NIXTC || mp->sch == m->sch) + continue; + if(mp->sch->nrdy > m->sch->nrdy)/* more loaded than us, ignore */ + continue; + p->mp = mp; + p->color = corecolor(mp->machno); + if(p->color < 0) + p->color = 0; + coreno = core + 1; +iprint("donate %d -> %d\n", m->machno, mp->machno); + sched(); + return; + } + /* no core preferred, don't change the process color */ +} + +static void execac(Ar0* ar0, int core, char *ufile, char **argv) { Hdr hdr; @@ -278,6 +312,7 @@ long hdrsz, magic, textsz, datasz, bsssz; uintptr textlim, datalim, bsslim, entry, stack; Mach *mp; + static int colorgen; /* * Open the file, remembering the final element and the full name. @@ -385,6 +420,11 @@ || datalim < textlim || bsslim < datalim) error(Ebadexec); + if(core != 0) + up->color = corecolor(core); + else + donate(up); + /* * The new stack is created in ESEG, temporarily mapped elsewhere. * The stack contains, in descending address order: @@ -407,13 +447,7 @@ nexterror(); } up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BIGPGSZ); - /* - * The color for the new stack determines the colors for the new segments. - * Even a cached text image changes its color to that of the stack. - * This will make new pages allocated for these segments prefer the color - * for the core where the program will run. - */ - // up->seg[ESEG]->color = acpicorecolor(core); + up->seg[ESEG]->color = up->color; /* * Stack is a pointer into the temporary stack @@ -561,10 +595,9 @@ /* Text. Shared. Attaches to cache image if possible * but prepaged if core > 0. */ - img = attachimage(SG_TEXT|SG_RONLY, chan, UTZERO, (textlim-UTZERO)/BIGPGSZ); + img = attachimage(SG_TEXT|SG_RONLY, chan, up->color, UTZERO, (textlim-UTZERO)/BIGPGSZ); s = img->s; up->seg[TSEG] = s; - s->color = up->seg[ESEG]->color; s->flushme = 1; s->fstart = 0; s->flen = hdrsz+textsz; @@ -573,7 +606,7 @@ /* Data. Shared. */ s = newseg(SG_DATA, textlim, (datalim-textlim)/BIGPGSZ); up->seg[DSEG] = s; - s->color = up->seg[ESEG]->color; + s->color = up->color; /* Attached by hand */ incref(img); @@ -583,7 +616,7 @@ /* BSS. Zero fill on demand for TS */ up->seg[BSEG] = newseg(SG_BSS, datalim, (bsslim-datalim)/BIGPGSZ); - up->seg[BSEG]->color= up->seg[ESEG]->color; + up->seg[BSEG]->color= up->color; /* * Move the stack @@ -954,6 +987,8 @@ up->rendhash = *l; *l = up; up->state = Rendezvous; + if(up->trace) + proctrace(up, SLock, 0); unlock(up->rgrp); sched(); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/syssem.c --- a/sys/src/nix/port/syssem.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/syssem.c Sat Dec 03 10:15:30 2011 +0000 @@ -98,9 +98,9 @@ panic("semsleep: no memory"); s->q[s->nq++] = up; up->waitsem = nil; + up->state = Semdown; unlock(s); DBG("semsleep up %#p blocked\n", up); - up->state = Semdown; sched(); Done: DBG("semsleep up %#p awaken\n", up); diff -r 3dd2bfb35f81 -r c3b70ce3c9a5 sys/src/nix/port/taslock.c --- a/sys/src/nix/port/taslock.c Thu Dec 01 13:47:08 2011 +0100 +++ b/sys/src/nix/port/taslock.c Sat Dec 03 10:15:30 2011 +0000 @@ -6,17 +6,97 @@ #include "../port/edf.h" +/* + * measure max lock cycles and max lock waiting time. + */ +#define LOCKCYCLES 0 + uvlong maxlockcycles; uvlong maxilockcycles; -ulong maxlockpc; -ulong maxilockpc; +uintptr maxlockpc; +uintptr maxilockpc; -struct +Lockstats lockstats; +Waitstats waitstats; +Lock waitstatslk; + +static void +newwaitstats(void) { - ulong locks; - ulong glare; - ulong inglare; -} lockstats; + if(waitstats.pcs != nil) + return; + waitstats.pcs = malloc(NWstats * sizeof waitstats.pcs[0]); + waitstats.ns = malloc(NWstats * sizeof waitstats.ns[0]); + waitstats.wait = malloc(NWstats * sizeof waitstats.wait[0]); + waitstats.total = malloc(NWstats * sizeof waitstats.total[0]); + waitstats.type = malloc(NWstats * sizeof waitstats.type[0]); +} + +void +startwaitstats(int on) +{ + newwaitstats(); + mfence(); + waitstats.on = on; + print("lockstats %s\n", on?"on":"off"); +} + +void +clearwaitstats(void) +{ + newwaitstats(); + memset(waitstats.ns, 0, NWstats * sizeof(int)); + memset(waitstats.wait, 0, NWstats * sizeof(uvlong)); + memset(waitstats.total, 0, NWstats * sizeof(uvlong)); +} + +void +addwaitstat(uintptr pc, uvlong t0, int type) +{ + uint i; + uvlong w; + + if(waitstats.on == 0) + return; + + cycles(&w); + w -= t0; + mfence(); + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] == pc){ + ainc(&waitstats.ns[i]); + if(w > waitstats.wait[i]) + waitstats.wait[i] = w; /* race but ok */ + waitstats.total[i] += w; /* race but ok */ + return; + } + if(!canlock(&waitstatslk)) + return; + + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] == pc){ + ainc(&waitstats.ns[i]); + if(w > waitstats.wait[i]) + waitstats.wait[i] = w; /* race but ok */ + waitstats.total[i] += w; + unlock(&waitstatslk); + return; + } + + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] == 0){ + waitstats.ns[i] = 1; + waitstats.type[i] = type; + waitstats.wait[i] = w; + waitstats.total[i] = w; + mfence(); + waitstats.pcs[i] = pc; + waitstats.npcs++; + break; + } + + unlock(&waitstatslk); +} static void dumplockmem(char *tag, Lock *l) @@ -49,6 +129,7 @@ { int i; uintptr pc; + uvlong t0; pc = getcallerpc(&l); @@ -61,14 +142,15 @@ l->pc = pc; l->p = up; l->isilock = 0; -#ifdef LOCKCYCLES - cycles(&l->lockcycles); -#endif + if(LOCKCYCLES) + cycles(&l->lockcycles); + return 0; } if(up) adec(&up->nlocks); + cycles(&t0); lockstats.glare++; for(;;){ lockstats.inglare++; @@ -96,9 +178,10 @@ l->pc = pc; l->p = up; l->isilock = 0; -#ifdef LOCKCYCLES - cycles(&l->lockcycles); -#endif + if(LOCKCYCLES) + cycles(&l->lockcycles); + if(l != &waitstatslk) + addwaitstat(pc, t0, WSlock); return 1; } if(up) @@ -111,12 +194,14 @@ { Mpl pl; uintptr pc; + uvlong t0; pc = getcallerpc(&l); lockstats.locks++; pl = splhi(); if(TAS(&l->key) != 0){ + cycles(&t0); lockstats.glare++; /* * Cannot also check l->pc, l->m, or l->isilock here @@ -129,8 +214,11 @@ while(l->key) ; pl = splhi(); - if(TAS(&l->key) == 0) + if(TAS(&l->key) == 0){ + if(l != &waitstatslk) + addwaitstat(pc, t0, WSlock); goto acquire; + } } } acquire: @@ -142,9 +230,8 @@ l->p = up; l->isilock = 1; l->m = MACHP(m->machno); -#ifdef LOCKCYCLES - cycles(&l->lockcycles); -#endif + if(LOCKCYCLES) + cycles(&l->lockcycles); } int @@ -164,24 +251,25 @@ l->p = up; l->m = MACHP(m->machno); l->isilock = 0; -#ifdef LOCKCYCLES - cycles(&l->lockcycles); -#endif + if(LOCKCYCLES) + cycles(&l->lockcycles); + return 1; } void unlock(Lock *l) { -#ifdef LOCKCYCLES uvlong x; - cycles(&x); - l->lockcycles = x - l->lockcycles; - if(l->lockcycles > maxlockcycles){ - maxlockcycles = l->lockcycles; - maxlockpc = l->pc; + + if(LOCKCYCLES){ + cycles(&x); + l->lockcycles = x - l->lockcycles; + if(l->lockcycles > maxlockcycles){ + maxlockcycles = l->lockcycles; + maxlockpc = l->pc; + } } -#endif if(l->key == 0) print("unlock: not locked: pc %#p\n", getcallerpc(&l)); @@ -206,16 +294,16 @@ iunlock(Lock *l) { Mpl pl; + uvlong x; -#ifdef LOCKCYCLES - uvlong x; - cycles(&x); - l->lockcycles = x - l->lockcycles; - if(l->lockcycles > maxilockcycles){ - maxilockcycles = l->lockcycles; - maxilockpc = l->pc; + if(LOCKCYCLES){ + cycles(&x); + l->lockcycles = x - l->lockcycles; + if(l->lockcycles > maxilockcycles){ + maxilockcycles = l->lockcycles; + maxilockpc = l->pc; + } } -#endif if(l->key == 0) print("iunlock: not locked: pc %#p\n", getcallerpc(&l));