support PAT (page table attributes) for x86. these are similar to mtrrs but are part of the page table. an architecture restriction is that all entries for the same physical memory must match. thus i believe this patch corrects the fact that the frame buffer was mapped into user space as normal (write-back) memory but mapped in the kernel as uncachable. reference: ia32 arch. vol. 3a ยง10.12 "pat". Notes: Tue Jun 9 10:43:55 EDT 2009 geoff mtrrsupport seems to be enough for now. Reference: /n/sources/patch/saved/pat Date: Tue Aug 19 17:37:13 CES 2008 Signed-off-by: quanstro@quanstro.net Reviewed-by: geoff --- /sys/src/9/pc/fns.h Tue Aug 19 17:36:34 2008 +++ /sys/src/9/pc/fns.h Tue Aug 19 17:36:33 2008 @@ -165,6 +165,7 @@ #define userureg(ur) (((ur)->cs & 0xFFFF) == UESEL) void vectortable(void); void* vmap(ulong, int); +void* vmappat(ulong, int, int); int vmapsync(ulong); void vunmap(void*, int); void wrmsr(int, vlong); --- /sys/src/9/pc/mem.h Tue Aug 19 17:36:38 2008 +++ /sys/src/9/pc/mem.h Tue Aug 19 17:36:35 2008 @@ -138,12 +138,25 @@ #define PTEVALID (1<<0) #define PTEWT (1<<3) #define PTEUNCACHED (1<<4) +#define PTEPCD (1<<4) /* PAT name for PTEUNCACHED */ #define PTEWRITE (1<<1) #define PTERONLY (0<<1) #define PTEKERNEL (0<<2) #define PTEUSER (1<<2) -#define PTESIZE (1<<7) +#define PTESIZE (1<<7) /* only in page directory entry */ +#define PTEPAT (1<<7) /* PAT, in page table entry */ #define PTEGLOBAL (1<<8) +#define PTEDPAT (1<<12) /* PAT, in page directory entry */ + +/* + * pat memory types + */ +#define PATUC (0) +#define PATWC (1) +#define PATWT (4) +#define PATWP (5) +#define PATWB (6) +#define PATUCMINUS (7) /* * Macros for calculating offsets within the page directory base --- /sys/src/9/pc/dat.h Tue Aug 19 17:36:42 2008 +++ /sys/src/9/pc/dat.h Tue Aug 19 17:36:40 2008 @@ -284,6 +284,7 @@ Cpuapic = 1<<9, Mtrr = 1<<12, /* memory-type range regs. */ Pge = 1<<13, /* page global extension */ + Pat = 1<<16, // Pse2 = 1<<17, /* more page size extensions */ Clflush = 1<<19, Mmx = 1<<23, --- /sys/src/9/pc/mmu.c Tue Aug 19 17:36:52 2008 +++ /sys/src/9/pc/mmu.c Tue Aug 19 17:36:46 2008 @@ -73,6 +73,63 @@ memmove(m->gdt, gdt, sizeof gdt); } +/* + * set up a pat mappings. the system depends + * on the first 4 mappings not changing. + */ +enum{ + Patmsr = 0x277, +}; + +static uchar pattab[8] = { + PATWB, + PATWT, + PATUCMINUS, + PATUC, + + PATWB, + PATWT, + PATUCMINUS, + PATUC, +}; + +static ulong patflags[8] = { + 0, + PTEWT, + PTEPCD, + PTEPCD | PTEWT, + PTEPAT, + PTEPAT | PTEWT, + PTEPAT | PTEPCD, + PTEPAT | PTEPCD | PTEWT, +}; + +static void +setpatreg(int rno, int type) +{ + int i; + ulong s; + vlong pat; + + s = splhi(); + rdmsr(Patmsr, &pat); + + pat &= ~(0xffull<>3] = pat>>i; +} + +static void +patinit(void) +{ + setpatreg(7, PATWC); +} + void mmuinit(void) { @@ -127,6 +184,9 @@ taskswitch(PADDR(m->pdb), (ulong)m + BY2PG); ltr(TSSSEL); + + if(m->cpuiddx & Pat) + patinit(); } /* @@ -402,6 +462,57 @@ } /* + * Special PAT flags for certain memory ranges. + */ +typedef struct Memflags Memflags; +struct Memflags +{ + ulong pa; + ulong len; + ulong flags; +}; +static Memflags mftab[64]; +static int nmftab; + +static ulong +memflags(ulong pa) +{ + Memflags *tab, *m; + int n, i; + + tab = mftab; + n = nmftab; + while(n > 0){ + i = n/2; + m = tab+i; + if(m->pa < pa){ + if(pa - m->pa < m->len) + return m->flags; + tab += i+1; + n -= i+1; + }else + n = i; + } + return 0; +} + +void +addmemflags(ulong pa, ulong len, ulong flags) +{ + Memflags *m; + + if(nmftab >= nelem(mftab)) + panic("addmemflags"); + + for(m=mftab+nmftab; m > mftab && (m-1)->pa > pa; m--) + *m = *(m-1); + m->pa = pa; + m->len = len; + m->flags = flags; + nmftab++; +} + +/* * Update the mmu in response to a user fault. pa may have PTEWRITE set. */ void @@ -446,7 +557,7 @@ up->mmuused = page; } old = vpt[VPTX(va)]; - vpt[VPTX(va)] = pa|PTEUSER|PTEVALID; + vpt[VPTX(va)] = pa|memflags(pa)|PTEUSER|PTEVALID; if(old&PTEVALID) flushpg(va); if(getcr3() != up->mmupdb->pa) @@ -535,9 +646,11 @@ /* * Add a device mapping to the vmap range. + * remember the flags so putmmu can maintain + * consistent mappings. */ void* -vmap(ulong pa, int size) +vmapflags(ulong pa, int size, ulong flags) { int osize; ulong o, va; @@ -552,12 +665,12 @@ size = ROUND(size, BY2PG); if(pa == 0){ - print("vmap pa=0 pc=%#p\n", getcallerpc(&pa)); + print("vmap pa=0 pc=%#.8lux\n", getcallerpc(&pa)); return nil; } ilock(&vmaplock); if((va = vmapalloc(size)) == 0 - || pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){ + || pdbmap(MACHP(0)->pdb, pa|flags, va, size) < 0){ iunlock(&vmaplock); return 0; } @@ -568,9 +681,28 @@ */ USED(osize); // print(" vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o); + addmemflags(pa, size, flags); return (void*)(va + o); } +void* +vmap(ulong pa, int size) +{ + return vmapflags(pa, size, PTEUNCACHED|PTEWRITE); +} + +void* +vmappat(ulong pa, int size, int pattype) +{ + int i; + + if(m->cpuiddx & Pat) + for(i = 0; i < nelem(pattab); i++) + if(pattab[i] == pattype) + return vmapflags(pa, size, patflags[i]|PTEWRITE); + return vmap(pa, size); +} + static int findhole(ulong *a, int n, int count) { @@ -712,6 +844,10 @@ * va, pa aligned and size >= 4MB and processor can do it. */ if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){ + if(flag & PTESIZE){ + flag &= ~PTEPAT; + flag |= PTEDPAT; + } *table = (pa+off)|flag|PTESIZE|PTEVALID; pgsz = 4*MB; }else{ --- /sys/src/9/pc/screen.c Tue Aug 19 17:36:57 2008 +++ /sys/src/9/pc/screen.c Tue Aug 19 17:36:54 2008 @@ -122,7 +122,7 @@ scr->paddr = upaalloc(size, align); if(scr->paddr == 0) return -1; - scr->vaddr = vmap(scr->paddr, size); + scr->vaddr = vmappat(scr->paddr, size, PATWB); if(scr->vaddr == nil) return -1; scr->apsize = size; @@ -517,7 +517,7 @@ */ if(nsize > 64*MB) nsize = 64*MB; - scr->vaddr = vmap(npaddr, nsize); + scr->vaddr = vmappat(npaddr, nsize, PATWT); if(scr->vaddr == 0) error("cannot allocate vga frame buffer"); scr->vaddr = (char*)scr->vaddr+x; --- /sys/src/9/pc/vganvidia.c Tue Aug 19 17:37:02 2008 +++ /sys/src/9/pc/vganvidia.c Tue Aug 19 17:37:00 2008 @@ -345,14 +345,14 @@ while((readget(scr) != nv.dmaput) && x++ < 1000000) ; if(x >= 1000000) - iprint("idle stat %lud put %d scr %#p pc %#p\n", readget(scr), nv.dmaput, scr, getcallerpc(&scr)); + iprint("idle stat %lud put %d scr %p pc %luX\n", readget(scr), nv.dmaput, scr, getcallerpc(&scr)); x = 0; while(pgraph[0x00000700/4] & 0x01 && x++ < 1000000) ; if(x >= 1000000) - iprint("idle stat %lud scrio %#p scr %#p pc %#p\n", *pgraph, scr->mmio, scr, getcallerpc(&scr)); + iprint("idle stat %lud scrio %.8lux scr %p pc %luX\n", *pgraph, scr->mmio, scr, getcallerpc(&scr)); } static void @@ -371,7 +371,7 @@ if(scr->storage <= scr->apsize) nv.dmabase = (ulong*)((uchar*)scr->vaddr + scr->storage - 128*1024); else{ - nv.dmabase = (void*)vmap(scr->paddr + scr->storage - 128*1024, 128*1024); + nv.dmabase = (void*)vmappat(scr->paddr + scr->storage - 128*1024, 128*1024, PATWT); if(nv.dmabase == 0){ hwaccel = 0; hwblank = 0;