In the current pc/mmu.c, vmapalloc() contains this comment: /* * could span page directory entries, but not worth the trouble. * not going to be very much contention. */ Traditionally not many callers use vmap(), and vunmap() is even rarer: at boot time, a few drivers vmap() some space which will be used for the lifetime of the kernel; some callers invoke vunmap() in error cases, and others occasionally unmap temporarily-mapped small areas. Some students here are working on a driver that puts more pressure on vmap()/vunmap(). In particular, this driver uses vmap() to get multiple megabytes (4, 8, ...), uses the space for some minutes, and then releases it. In some situations this runs into trouble: for "large" allocations, vmapalloc() looks for empty page-directory entries and fails if there aren't any. This can fail even if there is enough space in the vmap, if previously most of the vmap page-directory entries were set to point to page tables: even if all the PTEs in those page tables are blank, the tables aren't removed, so the PDEs aren't empty, so vmapalloc() thinks there is no large space. This problem hasn't shown up before because nobody (that I know of) frequently calls vunmap() on large areas. This patch modifies vmapalloc() as follows: 1. It still tries to satisfy "large" requests by looking for blank PDEs. This is the same code as the old mmu.c, but if a "large" request cannot be satisified this way, instead of returning failure we press on. 2. The middle part of vmapalloc() has been rewritten so it can satisfy requests with regions that span page tables. This is tried for both "large" and non-"large" requests. 3. Finally, if a request is non-"large" and wasn't satisfied by a region mapped by an existing page table (step 2), a new page table is begun (this is the same as code in the original mmu.c). In addition to the patch I am including: 1. devtest.c - a driver that, when poked, will vmap() and then immediately vunmap() 32M. 2. old_result, new_result - transcripts of using the devtest.c driver to exercise the old and new vmapalloc() versions. I realize messing with mmu.c merits some care. Hopefully the risk is acceptable here because most existing callers of vmap()/vunmap() won't run any of the new code, e.g., callers who allocate a large range and never free it. The patch and the test driver were prepared by my students (Ashish Kaila, Rohan Patil, Pratik Shah, and Maneet Singh) but edited for clarity and then tested by me before submission. Reference: /n/sources/patch/mmu.c-span-pdes Date: Sat Jun 14 01:25:47 CES 2014 Signed-off-by: davide+p9@cs.cmu.edu --- /sys/src/9/pc/mmu.c Sat Jun 14 01:25:43 2014 +++ /sys/src/9/pc/mmu.c Sat Jun 14 01:25:41 2014 @@ -88,8 +88,6 @@ m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID; m->tss = malloc(sizeof(Tss)); - if(m->tss == nil) - panic("mmuinit: no memory"); memset(m->tss, 0, sizeof(Tss)); m->tss->iomap = 0xDFFF<<16; @@ -597,30 +595,58 @@ vmapalloc(ulong size) { int i, n, o; - ulong *vpdb; - int vpdbsize; - - vpdb = &MACHP(0)->pdb[PDX(VMAP)]; + ulong *vpdb, have, want, count; + int vpdbsize; + + vpdb = &MACHP(0)->pdb[PDX(VMAP)]; vpdbsize = VMAPSIZE/(4*MB); if(size >= 4*MB){ + /* Large request: try to satisfy with 4M-aligned 4M hole(s) */ n = (size+4*MB-1) / (4*MB); if((o = findhole(vpdb, vpdbsize, n)) != -1) return VMAP + o*4*MB; - return 0; } + n = (size+BY2PG-1) / BY2PG; - for(i=0; i4MB, we are doing this because the code above failed + * to find enough free directory entries. This can happen even + * if a range is free: the PDEs might point to page tables that + * previously mapped something but no longer do. + * + * If this is a small request, we are hoping to fit into an existing + * page table rather than starting a new one. */ + for(i=0; i WD2PG ? WD2PG : want; + want -= count; + if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, count)) != -1) + have += count; + else { + have = 0; + want = n; + } + if(have >= n) + return VMAP + i*4*MB + (o+count)*BY2PG - have*BY2PG; + } else { + if(have > 0){ + have = 0; + want = n; + } + } + } + + /* Last chance (for a small request): start using a new page table. */ + if((size < 4*MB) && ((o = findhole(vpdb, vpdbsize, 1)) != -1)) + return VMAP + o*4*MB; + return 0; } @@ -698,7 +724,7 @@ flag = pa&0xFFF; pa &= ~0xFFF; - if((MACHP(0)->cpuiddx & Pse) && (getcr4() & 0x10)) + if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10)) pse = 1; else pse = 0; --- /sys/src/9/pc/devtest.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9/pc/devtest.c Sat Jun 14 01:25:43 2014 @@ -0,0 +1,143 @@ +/* + * devtest + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum { + Qdir = 0, + Qtest, + Qmax, +}; + +typedef long Rdwrfn(Chan*, void*, long, vlong); + +static Rdwrfn *readfn[Qmax]; +static Rdwrfn *writefn[Qmax]; + +static Dirtab testdir[Qmax] = { + ".", { Qdir, 0, QTDIR }, 0, DMDIR | 0555, +}; + +int ntestdir = Qtest; + +static Chan* +testattach(char* spec) +{ + return devattach('o', spec); +} + +Walkqid* +testwalk(Chan* c, Chan *nc, char** name, int nname) +{ + return devwalk(c, nc, name, nname, testdir, ntestdir, devgen); +} + +static int +teststat(Chan* c, uchar* dp, int n) +{ + return devstat(c, dp, n, testdir, ntestdir, devgen); +} + +static Chan* +testopen(Chan* c, int omode) +{ + return devopen(c, omode, testdir, ntestdir, devgen); +} + +static void +testclose(Chan*) +{ +} + +Dirtab* +addtestfile(char *name, int perm, Rdwrfn *rdfn, Rdwrfn *wrfn) +{ + int i; + Dirtab d; + Dirtab *dp; + + memset(&d, 0, sizeof d); + strcpy(d.name, name); + d.perm = perm; + + if(ntestdir >= Qmax) + return nil; + + for(i=0; iqid.path){ + case Qdir: + return devdirread(c, a, n, testdir, ntestdir, devgen); + default: + return 0; + } +} + +static long +testwrite(Chan *c, void *a, long n, vlong offset) +{ + ulong va; + ulong fakephysaddr = 8192; + char err[128]; + + USED(c); USED(a); USED(n); USED(offset); + + va = (ulong)vmap(fakephysaddr, 32 * 1024 * 1024); + + if (va != 0) + vunmap((void *)va, 32 * 1024 * 1024); + else + error(Enomem); + + snprint(err, sizeof(err), "testwrite: va 0x%lux", va); + error(err); + + return n; +} + +static void +testinit(void) { + addtestfile("test", 0660, testread, testwrite); +} + +Dev testdevtab = { + 'o', + "test", + + devreset, + testinit, + devshutdown, + testattach, + testwalk, + teststat, + testopen, + devcreate, + testclose, + testread, + devbread, + testwrite, + devbwrite, + devremove, + devwstat, +}; --- /sys/src/9/pc/old_result Thu Jan 1 00:00:00 1970 +++ /sys/src/9/pc/old_result Sat Jun 14 01:25:44 2014 @@ -0,0 +1,16 @@ +term% echo > '#o/test' +echo: write error: testwrite: va 0xe0c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe2c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe4c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe6c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe8c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xeac00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xecc00000 +term% echo > '#o/test' +echo: write error: kernel allocate failed --- /sys/src/9/pc/new_result Thu Jan 1 00:00:00 1970 +++ /sys/src/9/pc/new_result Sat Jun 14 01:25:45 2014 @@ -0,0 +1,22 @@ +term% echo > '#o/test' +echo: write error: testwrite: va 0xe0c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe2c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe4c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe6c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe8c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xeac00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xecc00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe0c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe0c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe0c00000 +term% echo > '#o/test' +echo: write error: testwrite: va 0xe0c00000