the objection of this patch is to improve the behaviour of the kernel when reaching a all physical memory used situation. removed swapfull() checks from fault.c and segment.c as the pagers oom handler will deal with the situation. the killbig way of recovering is more deterministic as the user can prevent it from killing say the local fileserver or the window system by changing the permissions in /proc/n. with swapfull, any process that runs will randomly fail and if its your local fileserver, the machine is dead. also, the system will now behave the same with or without swap file enabled. changed pio() in port/fault.c: only access the s->image while we hold s->lk simplified code so demand load and swap in becomes the same code path added ulong gen field to struct Page in port/portfns.h. pager uses a generation counter (genclock) that it increases after each walk through the process table. pages that got referenced/ accessed get the current genclock value assigned. the pager can now determine the age of a page by calculating the difference between the genclock and page->gen. on each genclock increment, all unreferenced pages get older by one generation. on each walk, the pager calculates the average age of pages wich is used in the next generation to decide if a page should be swapped or left alone. if a page is younger or equal the average age of the pages from the last generation walk, it is not swapped out. sort the io transaction list of the pager before processing. when the pager detects a swapfull, it will invoke the oom path and call killbig(). Reference: /n/sources/patch/applied/kernel-pager-swapping Date: Fri Apr 16 17:35:41 CES 2010 Signed-off-by: cinap_lenrek@gmx.de --- /sys/src/9/port/fault.c Fri Apr 16 17:35:06 2010 +++ /sys/src/9/port/fault.c Wed Jun 2 00:53:08 2010 @@ -136,12 +136,6 @@ if(ref > 1) { unlock(lkp); - if(swapfull()){ - qunlock(&s->lk); - pprint("swap space full\n"); - faulterror(Enoswap, nil, 1); - } - new = newpage(0, &s, addr); if(s == 0) return -1; @@ -151,7 +145,7 @@ } else { /* save a copy of the original for the image cache */ - if(lkp->image && !swapfull()) + if(lkp->image) duppage(lkp); unlock(lkp); @@ -208,6 +202,11 @@ *p = new; return; } + + c = s->image->c; + ask = s->flen-soff; + if(ask > BY2PG) + ask = BY2PG; } else { /* from a swap image */ daddr = swapaddr(loadrec); @@ -217,39 +216,38 @@ *p = new; return; } - } - + c = swapimage.c; + ask = BY2PG; + } qunlock(&s->lk); new = newpage(0, 0, addr); k = kmap(new); kaddr = (char*)VA(k); - if(loadrec == 0) { /* This is demand load */ - c = s->image->c; - while(waserror()) { - if(strcmp(up->errstr, Eintr) == 0) - continue; - kunmap(k); - putpage(new); + while(waserror()) { + if(strcmp(up->errstr, Eintr) == 0) + continue; + kunmap(k); + putpage(new); + if(loadrec == 0){ faulterror("sys: demand load I/O error", c, 0); + }else{ + faulterror("sys: page in I/O error", c, 0); } + } - ask = s->flen-soff; - if(ask > BY2PG) - ask = BY2PG; - - n = devtab[c->type]->read(c, kaddr, ask, daddr); - if(n != ask) - faulterror(Eioload, c, 0); - if(ask < BY2PG) - memset(kaddr+ask, 0, BY2PG-ask); - - poperror(); - kunmap(k); - qlock(&s->lk); - + n = devtab[c->type]->read(c, kaddr, ask, daddr); + if(n != ask) + faulterror(Eioload, c, 0); + if(ask < BY2PG) + memset(kaddr+ask, 0, BY2PG-ask); + + poperror(); + kunmap(k); + qlock(&s->lk); + if(loadrec == 0) { /* This is demand load */ /* * race, another proc may have gotten here first while * s->lk was unlocked @@ -262,24 +260,7 @@ else putpage(new); } - else { /* This is paged out */ - c = swapimage.c; - if(waserror()) { - kunmap(k); - putpage(new); - qlock(&s->lk); - qunlock(&s->lk); - faulterror("sys: page in I/O error", c, 0); - } - - n = devtab[c->type]->read(c, kaddr, BY2PG, daddr); - if(n != BY2PG) - faulterror(Eioload, c, 0); - - poperror(); - kunmap(k); - qlock(&s->lk); - + else { /* This is paged out */ /* * race, another proc may have gotten here first * (and the pager may have run on that page) while --- /sys/src/9/port/portdat.h Fri Apr 16 17:35:12 2010 +++ /sys/src/9/port/portdat.h Fri Apr 16 17:35:08 2010 @@ -316,6 +316,7 @@ ulong pa; /* Physical address in memory */ ulong va; /* Virtual address for user */ ulong daddr; /* Disc address on swap */ + ulong gen; /* Generation counter for swap */ ushort ref; /* Reference count */ char modref; /* Simulated modify/reference bits */ char color; /* Cache coloring */ --- /sys/src/9/port/segment.c Fri Apr 16 17:35:18 2010 +++ /sys/src/9/port/segment.c Fri Apr 16 17:35:15 2010 @@ -64,8 +64,6 @@ if(size > (SEGMAPSIZE*PTEPERTAB)) error(Enovmem); - if(swapfull()) - error(Enoswap); s = smalloc(sizeof(Segment)); s->ref = 1; s->type = type; @@ -471,11 +469,6 @@ qunlock(&s->lk); flushmmu(); return 0; - } - - if(swapfull()){ - qunlock(&s->lk); - error(Enoswap); } for(i = 0; i < NSEG; i++) { --- /sys/src/9/port/swap.c Fri Apr 16 17:35:23 2010 +++ /sys/src/9/port/swap.c Fri Apr 16 17:35:20 2010 @@ -12,11 +12,30 @@ static void pagepte(int, Page**); static void pager(void*); - Image swapimage; +Image swapimage; + static int swopen; static Page **iolist; static int ioptr; +static ulong genclock; +static ulong genage; +static ulong gencount; +static uvlong gensum; + +static void +gentick(void) +{ + genclock++; + if(gencount){ + genage = gensum / gencount; + } else { + genage = 0; + } + gensum = 0; + gencount = 0; +} + void swapinit(void) { @@ -115,14 +134,16 @@ loop: up->psstate = "Idle"; + wakeup(&palloc.r); sleep(&swapalloc.r, needpages, 0); while(needpages(junk)) { - - if(swapimage.c) { + if(swapimage.c && !swapfull()) { p++; - if(p >= ep) + if(p >= ep){ p = proctab(0); + gentick(); + } if(p->state == Dead || p->noswap) continue; @@ -160,15 +181,14 @@ qunlock(&p->seglock); } else { - print("out of physical memory; no swap configured\n"); - if(!cpuserver) - freebroken(); /* can use the memory */ - else - killbig("out of memory"); + print("out of memory\n"); + + killbig("out of memory"); + freebroken(); /* can use the memory */ /* Emulate the old system if no swap channel */ - tsleep(&up->sleep, return0, 0, 5000); - wakeup(&palloc.r); + if(!swapimage.c) + tsleep(&up->sleep, return0, 0, 5000); } } goto loop; @@ -180,6 +200,7 @@ int type, i, size; Pte *l; Page **pg, *entry; + ulong age; if(!canqlock(&s->lk)) /* We cannot afford to wait, we will surely deadlock */ return; @@ -213,11 +234,23 @@ if(pagedout(entry)) continue; - if(entry->modref & PG_REF) { + if(entry->modref & PG_REF){ entry->modref &= ~PG_REF; - continue; + entry->gen = genclock; } + if(genclock < entry->gen){ + age = ~(entry->gen - genclock); + } else { + age = genclock - entry->gen; + } + + gensum += age; + gencount++; + + if(age <= genage) + continue; + pagepte(type, pg); if(ioptr >= conf.nswppo) @@ -324,6 +357,20 @@ ioptr); } +static int +pageiocomp(void *a, void *b) +{ + Page *p1, *p2; + + p1 = *((Page**)a); + p2 = *((Page**)b); + if(p1->daddr > p2->daddr){ + return 1; + } else { + return -1; + } +} + static void executeio(void) { @@ -334,6 +381,8 @@ KMap *k; c = swapimage.c; + + qsort(iolist, ioptr, sizeof(iolist[0]), pageiocomp); for(i = 0; i < ioptr; i++) { if(ioptr > conf.nswppo)