# HG changeset patch # User Noah Evans # Date 1315846987 -7200 # Node ID de961ca7da2bb408615c1e9056e3d40532fa5806 # Parent fb7d4fad4e57ae6e68544e73bfac1bd62638fd3f nix: latest lsub changes R=nemo.mbox, rminnich CC=john, nix-dev http://codereview.appspot.com/4988055 diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/9k8cpu Binary file sys/src/nix/k10/9k8cpu has changed diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/9k8cpu.elf Binary file sys/src/nix/k10/9k8cpu.elf has changed diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/9k8cpu.gz Binary file sys/src/nix/k10/9k8cpu.gz has changed diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/acore.c --- a/sys/src/nix/k10/acore.c Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/k10/acore.c Mon Sep 12 19:03:07 2011 +0200 @@ -57,6 +57,10 @@ Mach *mp; if((mp = sys->machptr[i]) != nil && mp->online != 0){ + if(mp->nixtype != NIXAC){ + print("testicc: core %d is not an AC\n", i); + return; + } print("calling core %d... ", i); mp->icc->flushtlb = 0; snprint((char*)mp->icc->data, ICCLNSZ, "<%d>", i); diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/l64idt.s --- a/sys/src/nix/k10/l64idt.s Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/k10/l64idt.s Mon Sep 12 19:03:07 2011 +0200 @@ -22,10 +22,10 @@ MOVQ RUSER, 0(SP) MOVQ RMACH, 8(SP) - MOVW DS, 16(SP) - MOVW ES, 18(SP) - MOVW FS, 20(SP) - MOVW GS, 22(SP) +// MOVW DS, 16(SP) +// MOVW ES, 18(SP) +// MOVW FS, 20(SP) +// MOVW GS, 22(SP) SWAPGS BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ @@ -71,10 +71,11 @@ JEQ _iretnested SWAPGS - MOVW 22(SP), GS - MOVW 20(SP), FS - MOVW 18(SP), ES - MOVW 16(SP), DS + /* per the architecture manual, moving 16 bits to FS can zero it. Bad ... */ +// MOVW 22(SP), GS +// MOVW 20(SP), FS +// MOVW 18(SP), ES +// MOVW 16(SP), DS MOVQ 8(SP), RMACH MOVQ 0(SP), RUSER diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/l64syscall.s --- a/sys/src/nix/k10/l64syscall.s Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/k10/l64syscall.s Mon Sep 12 19:03:07 2011 +0200 @@ -134,8 +134,10 @@ MOVQ R10, (9*8)(SP) MOVQ R8, (7*8)(SP) MOVQ R9, (8*8)(SP) - - MOVW $SSEL(SiUDS, SsRPL3), (15*8+0)(SP) + MOVL $FSbase, RARG + CALL rdmsr(SB) + MOVL AX, (10*8+0)(SP) // use the unused R11 slot + MOVW DS, (15*8+0)(SP) MOVW ES, (15*8+2)(SP) MOVW FS, (15*8+4)(SP) MOVW GS, (15*8+6)(SP) @@ -149,6 +151,7 @@ MOVQ 16(SP), AX /* Ureg.ax */ MOVQ (16+6*8)(SP), BP /* Ureg.bp */ _linuxsyscallreturn: + MOVL (16+10*8)(SP), R11 /* R11 for wrmsr below */ ADDQ $(17*8), SP /* registers + arguments */ CLI SWAPGS @@ -156,6 +159,17 @@ MOVW 2(SP), ES MOVW 4(SP), FS MOVW 6(SP), GS + PUSHQ AX + MOVL $FSbase, RARG + XORQ CX, CX + MOVL R11, CX + PUSHQ CX + /* dummy */ + PUSHQ RARG + CALL wrmsr(SB) + POPQ AX + POPQ AX + POPQ AX MOVQ 24(SP), CX /* ip */ MOVQ 40(SP), R11 /* flags */ diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/linuxarchsys.c --- a/sys/src/nix/k10/linuxarchsys.c Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/k10/linuxarchsys.c Mon Sep 12 19:03:07 2011 +0200 @@ -7,6 +7,7 @@ #include "../port/error.h" #include #include "ureg.h" +#include "amd64.h" /* from linux */ #define ARCH_SET_GS 0x1001 @@ -15,30 +16,31 @@ #define ARCH_GET_GS 0x1004 void -arch_prctl(Ar0*ar, va_list list) +arch_prctl(Ar0*ar0, Ureg *ureg, va_list list) { uintptr va; int code; code = va_arg(list, int); va = va_arg(list, uintptr); if (up->linux & 128) print("%d:arch_prctl code %x va %p: ", up->pid, code, va); - if (code < ARCH_SET_GS || code > ARCH_GET_GS) - error("Bad code!"); /* always make sure it's a valid address, no matter what the command */ validaddr((void *)va, 8, code > ARCH_SET_FS); - - if (code > ARCH_SET_FS) { - uvlong val; - val = rdmsr(code == ARCH_GET_FS ? 0xC0000100 : 0xC0000101); - memmove((void *)va, &val, sizeof(uvlong)); - if (up->linux & 128) print("get %#p\n", (void *)val); - } else { - if (code == ARCH_SET_GS) - error("Can't set GS yet"); - wrmsr(code == ARCH_SET_FS ? 0xC0000100 : 0xC0000101, va); - if (up->linux & 128) print("set\n"); + switch(code) { + case ARCH_SET_GS: + case ARCH_GET_GS: + error("not yet"); + break; + case ARCH_SET_FS: + memmove(&ureg->r11, &va, 4); + ar0->i = 0; + break; + case ARCH_GET_FS: + memmove((void *)va, &ureg->r11, 4); + ar0->i = 0; + break; + default: + error("Bad code"); + break; } - - ar->i = 0; } diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/linuxsyscall.c --- a/sys/src/nix/k10/linuxsyscall.c Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/k10/linuxsyscall.c Mon Sep 12 19:03:07 2011 +0200 @@ -31,6 +31,7 @@ linuxsyscall(unsigned int, Ureg* ureg) { void noted(Ureg*, uintptr); + void arch_prctl(Ar0 *ar, Ureg *ureg, va_list list); unsigned int scallnr; void notify(Ureg *); char *e; @@ -85,6 +86,11 @@ up->psstate = linuxsystab[scallnr].n; + /* note: arch_prctl needs ureg. Unless someone thinks of a better way. + * one way is to change the way we construct linuxargs, + * and add ureg is scallnr == 158. The current if below is a hack, + * I know. + */ linuxargs[0] = ureg->di; linuxargs[1] = ureg->si; linuxargs[2] = ureg->dx; @@ -98,7 +104,11 @@ print("\n"); } if (up->linux&32) dumpregs(ureg); - linuxsystab[scallnr].f(&ar0, (va_list)linuxargs); + /* this one is special .. sigh */ + if (scallnr == 158) + arch_prctl(&ar0, ureg, (va_list)linuxargs); + else + linuxsystab[scallnr].f(&ar0, (va_list)linuxargs); if (up->linux & 64){print("AFTER: ");dumpregs(ureg);} poperror(); }else{ diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/linuxsystab.h --- a/sys/src/nix/k10/linuxsystab.h Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/k10/linuxsystab.h Mon Sep 12 19:03:07 2011 +0200 @@ -3,6 +3,7 @@ Syscall linuxuname; Syscall linuxbrk; Syscall linuxopen; +Syscall linuxclose; Syscall syssegbrk; Syscall linuxwritev; Syscall linuxsocketcall; @@ -25,12 +26,12 @@ Syscall gasm; Syscall linuxcga; Syscall getrusage; -Syscall arch_prctl; +void arch_prctl(Ar0*,Ureg*,va_list); //see linux_syscall.c extern Syscall sys_write; extern Syscall sys_read; extern Syscall sysopen; extern Syscall syspread; - +extern Syscall sysclose; struct syscall { char* n; Syscall*f; @@ -41,7 +42,8 @@ struct syscall linuxsystab[] = { [0] {"read", sys_read, 3, {.i = 0}}, [1] {"write", sys_write, 3, {.i = -1}}, - [2] {"linuxopen", sysopen, 2, {.i = -1}}, + [2] {"linuxopen", sysopen, 2, {.i = -1}},/* note: can just use sysopen instead of linuxopen! */ + [3] {"linuxclose", sysclose, 1, {.p = (void *)-1}}, [102] {"getuid", linuxgeteuid, 0, {.i = -1}}, [12] {"linuxbrk", linuxbrk, 1, {.i = -1}}, [104] {"getgid", linuxgeteuid, 0, {.i = -1}}, @@ -60,9 +62,9 @@ [20] {"writev", linuxwritev, 1, {.i = 0}}, // [197] {"fstat64", fstat64, 1, {.i = -1}}, // [221] {"futex", futex, 1, {.i = 0}}, - [158] {"arch_prctl", arch_prctl, 2, {.p = (void *)-1}}, + [158] {"arch_prctl", /* fix later */futex, 2, {.p = (void *)-1}}, - [3] {"close", nil, 1, {.p = (void *)-1}}, +/* leave blank lines for things you move up above -- the holes make it easier to see what's been supported. */ [4] {"stat", nil, 1, {.p = (void *)-1}}, [5] {"fstat", nil, 1, {.p = (void *)-1}}, [6] {"lstat", nil, 1, {.p = (void *)-1}}, diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/main.c --- a/sys/src/nix/k10/main.c Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/k10/main.c Mon Sep 12 19:03:07 2011 +0200 @@ -118,21 +118,57 @@ ; DBG("mach %d is go\n", m->machno); - acmmuswitch(); - decref(&squids); - acinit(); - acsched(); + switch(m->nixtype){ + case NIXAC: + acmmuswitch(); + decref(&squids); + acinit(); + acsched(); + panic("squidboy"); + break; + case NIXTC: + /* + * We only need the idt and syscall entry point actually. + * At boot time the boot processor might set our role after + * we have decided to become an AC. + */ + vsvminit(MACHSTKSZ, NIXTC); + + decref(&squids); + /* + * Caution: no clock sync. + */ + timersinit(); + lock(&active); + active.machs |= 1<machno; + unlock(&active); +ndnr(); + schedinit(); + break; + } + panic("squidboy returns (type %d)", m->nixtype); } static void testiccs(void) { int i; + Mach *mp; extern void testicc(int); + char *n[] = { + [NIXAC] "AC", + [NIXTC] "TC", + [NIXKC] "KC" + }; /* setup arguments for all */ for(i = 1; i < MACHMAX; i++) - testicc(i); + if((mp = sys->machptr[i]) != nil && mp->online != 0){ + print("cpu%d machno %d role %s\n", + i, mp->machno, n[mp->nixtype]); + if(mp->nixtype == NIXAC) + testicc(i); + } print("bootcore: all cores done\n"); } @@ -156,6 +192,8 @@ */ mp->icc = mallocalign(sizeof *m->icc, ICCLNSZ, 0, 0); mp->icc->fn = nil; + if(i < 4) + mp->nixtype = NIXTC; incref(&squids); } active.thunderbirdsarego = 1; @@ -250,19 +288,19 @@ confinit(); archinit(); mallocinit(); - acpiinit(); - umeminit(); - trapinit(); /* - * Printinit will cause the first malloc - * call to happen (printinit->qopen->malloc). + * Acpiinit will cause the first malloc + * call to happen. * If the system dies here it's probably due * to malloc not being initialised * correctly, or the data segment is misaligned * (it's amazing how far you can get with * things like that completely broken). */ + acpiinit(); + umeminit(); + trapinit(); printinit(); /* @@ -273,7 +311,7 @@ */ i8259init(32); - mpsinit(32); /* Use at most 6 cores; BUG: use an argument */ + mpsinit(32); /* Use at most 32 cores */ apiconline(); sipi(); diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/mkfile --- a/sys/src/nix/k10/mkfile Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/k10/mkfile Mon Sep 12 19:03:07 2011 +0200 @@ -80,6 +80,7 @@ etherm10g.$O: ../386/etherm10g2k.i ../386/etherm10g4k.i i8259.$O: io.h kbd.$O: ../port/error.h io.h +linuxsyscall.$O: linuxsystab.h pci.$O: io.h sdaoe.$O: ../port/error.h ../port/netif.h ../port/sd.h sdaoe.$O: ../386/aoe.h etherif.h io.h diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/mmu.c --- a/sys/src/nix/k10/mmu.c Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/k10/mmu.c Mon Sep 12 19:03:07 2011 +0200 @@ -18,26 +18,6 @@ #define PPN(x) ((x)&~(PGSZ-1)) -/* - * Nemo: NB: - * m->pml4 is always the same. - * sched->procsave->flushtbl zeroes the entries in pml4 up to - * pml4->daddr (which is the number of entries used by the user, or - * the index in the upper page table for this entry) - * the new process will fault and we populate again the page table - * as needed. - * - * mmuptp[0] is used to keep a free list of pages. - * mmuptp[1-3] are used to keep PT pages for each level - * 4K pages: pml4 -> lvl3 -> lvl2 -> lvl1 ->pg - * 2M pages: pml4 -> lvl3 -> lvl2 -> pg - * - * Therefore, we can't use pml4 in other processors. Each one - * has to play the same trick at least, using its own pml4. - * For NIX, we have to fill up the pml4 of the application core - * so it wont fault. - */ - void mmuflushtlb(u64int) { @@ -211,7 +191,6 @@ } if(m->pml4->daddr){ -if(proc->ac)print("mmuswitch: clear u for pml4\n"); memset(UINT2PTR(m->pml4->va), 0, m->pml4->daddr*sizeof(PTE)); m->pml4->daddr = 0; } @@ -264,15 +243,17 @@ * For 1*GiB pages, we use two levels. */ void -mmuput(uintptr va, uintmem pa, uint attr, Page *pg) +mmuput(uintptr va, Page *pg, uint attr) { int lvl, user, x, pgsz; PTE *pte; Page *page, *prev; Mpl pl; + uintmem pa; DBG("up %#p mmuput %#p %#Px %#ux\n", up, va, pa, attr); + pa = pg->pa; assert(pg->pgszi >= 0); pgsz = m->pgsz[pg->pgszi]; if(pa & (pgsz-1)) @@ -506,7 +487,7 @@ uintptr va; usize o, sz; - print("vmap(%#p, %lud) pc=%#p\n", pa, size, getcallerpc(&pa)); + DBG("vmap(%#p, %lud) pc=%#p\n", pa, size, getcallerpc(&pa)); if(m->machno != 0) panic("vmap"); @@ -574,7 +555,7 @@ * resources used for the allocation (e.g. page table * pages). */ - print("vunmap(%#p, %lud)\n", v, size); + DBG("vunmap(%#p, %lud)\n", v, size); } int diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/k10/trap.c --- a/sys/src/nix/k10/trap.c Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/k10/trap.c Mon Sep 12 19:03:07 2011 +0200 @@ -10,6 +10,7 @@ #include "../port/pmc.h" #include "io.h" +#include "amd64.h" extern int notify(Ureg*); @@ -316,6 +317,11 @@ char buf[ERRMAX]; Vctl *ctl, *v; + vno = ureg->type; + +if(m->machno != 0 && m->nixtype != NIXAC) +print("cpu%d trap %ulld\n", m->machno, ureg->type); + m->perf.intrts = perfticks(); user = userureg(ureg); if(user && (m->nixtype == NIXTC)){ @@ -327,7 +333,6 @@ _pmcupdate(m); - vno = ureg->type; if(ctl = vctl[vno]){ if(ctl->isintr){ m->intr++; @@ -437,6 +442,7 @@ iprint("r15\t%#16.16llux\n", ureg->r15); iprint("ds %#4.4ux es %#4.4ux fs %#4.4ux gs %#4.4ux\n", ureg->ds, ureg->es, ureg->fs, ureg->gs); + iprint("ureg fs\t%#ux\n", *(unsigned int *)&ureg->ds); iprint("type\t%#llux\n", ureg->type); iprint("error\t%#llux\n", ureg->error); iprint("pc\t%#llux\n", ureg->ip); @@ -445,6 +451,8 @@ iprint("sp\t%#llux\n", ureg->sp); iprint("ss\t%#llux\n", ureg->ss); iprint("type\t%#llux\n", ureg->type); + iprint("FS\t%#llux\n", rdmsr(FSbase)); + iprint("GS\t%#llux\n", rdmsr(GSbase)); iprint("m\t%#16.16p\nup\t%#16.16p\n", m, up); } @@ -623,6 +631,7 @@ /* This routine must save the values of registers the user is not permitted * to write from devproc and then restore the saved values before returning. + * TODO: fix this because the segment registers are wrong for 64-bit mode. */ void setregisters(Ureg* ureg, char* pureg, char* uva, int n) diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/port/fault.c --- a/sys/src/nix/port/fault.c Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/port/fault.c Mon Sep 12 19:03:07 2011 +0200 @@ -68,7 +68,7 @@ int ref; Pte **p, *etp; uintptr soff; - uintmem mmuphys, pgsz; + uintmem pgsz; uint mmuattr; Page **pg, *lkp, *new; Page *(*fn)(Segment*, uintptr); @@ -90,7 +90,6 @@ if(pg > etp->last) etp->last = pg; - mmuphys = 0; mmuattr = 0; switch(type) { default: @@ -101,7 +100,6 @@ if(pagedout(*pg)) pio(s, addr, soff, pg); - mmuphys = segppn(s, (*pg)->pa); mmuattr = PTERONLY|PTEVALID; (*pg)->modref = PG_REF; break; @@ -128,7 +126,6 @@ * we're the only user of the segment. */ if(read && conf.copymode == 0 && s->ref == 1) { - mmuphys = segppn(s, (*pg)->pa); mmuattr = PTERONLY|PTEVALID; (*pg)->modref |= PG_REF; break; @@ -155,7 +152,6 @@ unlock(lkp); } - mmuphys = segppn(s, (*pg)->pa); mmuattr = PTEWRITE|PTEVALID; (*pg)->modref = PG_MOD|PG_REF; break; @@ -175,7 +171,6 @@ } } - mmuphys = segppn(s, (*pg)->pa); mmuattr = PTEVALID; if((s->pseg->attr & SG_RONLY) == 0) mmuattr |= PTEWRITE; @@ -186,9 +181,10 @@ } qunlock(&s->lk); - if(dommuput) - mmuput(addr, mmuphys, mmuattr, *pg); - + if(dommuput){ + assert(segppn(s, (*pg)->pa) == (*pg)->pa); + mmuput(addr, *pg, mmuattr); + } return 0; } diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/port/linuxsysemu.c --- a/sys/src/nix/port/linuxsysemu.c Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/port/linuxsysemu.c Mon Sep 12 19:03:07 2011 +0200 @@ -51,9 +51,13 @@ long ru_nivcsw; /* involuntary " */ }; -/*Linux kerne.org 2.6.35-30-generic #54-Ubuntu SMP Tue Jun 7 18:41:54 UTC 2011 x86_64 GNU/Linux\n*/ struct utsname linuxutsname = { - "Linux", "mynode", "2.6.35", "NIX", "x86_64", "GNUsucks" + /* from a real system */ + "Linux", + "nix", + "2.6.35-30-generic", + "#54-Ubuntu SMP Tue Jun 7 18:41:54 UTC 2011", + "x86_64", }; void @@ -78,11 +82,6 @@ if (up->linux & 128) print("%d:linuxuname va %p\n", up->pid, va); validaddr(va, 1, 1); memmove(va, &linuxutsname, sizeof(linuxutsname)); - // if this does not work we will need a /proc for bgl - // uname is just such a piece of shit. Some systems want things of the size in the struct, - // others don't. Idiots. -//#define BULLSHIT "Linux\0 NIX\0 2.6.19\0NIX\0x86_64\0GNUsucks" -// memmove(va, BULLSHIT, strlen(BULLSHIT)+1); if (up->linux&128) print("Returns %s\n", linuxutsname.release); ar->i = 0; } @@ -168,8 +167,26 @@ void sysopen(Ar0 *, va_list); aname = va_arg(list, char*); omode = va_arg(list, int); - USED(aname,omode); + if (up->linux & 128){ + validaddr(aname, 1, 0); + print("%d:linuxopen (%s,%o):", up->pid, aname, omode); + } sysopen(ar0, list); + if (up->linux & 128) print("=%d\n", ar0->i); +} + +void +linuxclose(Ar0 *ar0, va_list list) +{ + int fd; + void sysclose(Ar0 *, va_list); + + fd = va_arg(list, int); + if (up->linux & 128) + print("%d:linuxclose (%d):", up->pid, fd); + + sysclose(ar0, list); + if (up->linux & 128) print("=%d\n", ar0->i); } void diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/port/page.c --- a/sys/src/nix/port/page.c Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/port/page.c Mon Sep 12 19:03:07 2011 +0200 @@ -279,13 +279,6 @@ return p; } -/* - * Caching/free policy imlemented in putpage. - * Make sure elsewhere that pg->pa low bits are not - * set e.g. with attribute bits. - * - * TODO: change mmuput to take pa from page argument. - */ void putpage(Page *p) { diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/port/portfns.h --- a/sys/src/nix/port/portfns.h Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/port/portfns.h Mon Sep 12 19:03:07 2011 +0200 @@ -180,7 +180,7 @@ uvlong mk64fract(uvlong, uvlong); void mkqid(Qid*, vlong, ulong, int); void mmuflush(void); -void mmuput(uintptr, uintmem, uint, Page*); +void mmuput(uintptr, Page*, uint); void mmurelease(Proc*); void mmuswitch(Proc*); Chan* mntauth(Chan*, char*); diff -r fb7d4fad4e57 -r de961ca7da2b sys/src/nix/port/proc.c --- a/sys/src/nix/port/proc.c Fri Sep 09 16:49:47 2011 +0200 +++ b/sys/src/nix/port/proc.c Mon Sep 12 19:03:07 2011 +0200 @@ -1339,7 +1339,10 @@ /* * wait for all processors to take a clock interrupt - * and flush their mmu's + * and flush their mmu's. + * NIX BUG: this won't work if another core is in AC mode. + * In that case we must IPI it, but only if that core is + * using this segment. */ for(nm = 0; nm < conf.nmach; nm++) if(MACHP(nm) != m)