the problem is due to the suprisingly slow cga console. these timings are based on rdtsc() subtractions around the named areas for the simple test of cat'ing /lib/pci to the console. they are huge: cycles printing chars 482510340 scrolling 135746656968 total 137112900000 by introducing a frame buffer to avoid reading from the cga console for scrolling (a guess based on problems with graphics performance), we get about a 10x improvement: printing normal chars 1080381568 scrolling 12046340760 total 13610262120 by guessing that any string >40 bytes is likely to induce scrolling, we can redraw the whole screen once we're done. this gives us 100/1000x improvment on our hot spots, but just 7x in run time. printing chars 33186956 scrolling 24111480 total 1854594800 this looks like about all we can do. Reference: /n/atom/patch/applied/cgafb Date: Thu Feb 13 04:01:19 CET 2014 Signed-off-by: quanstro@quanstro.net --- /sys/src/nix/k10/cga.c Thu Feb 13 04:00:30 2014 +++ /sys/src/nix/k10/cga.c Thu Feb 13 04:00:31 2014 @@ -36,6 +36,8 @@ Lock; int pos; int initdone; + uchar blankline[Width]; + uchar fb[Width*Height]; uchar *cga; } cga; @@ -97,7 +99,13 @@ } static void -cgaputr(Rune c) +cgaredraw(void) +{ + memmove(cga.cga, cga.fb, Width*Height-Postlen*2); +} + +static void +cgaputr(Rune c, int fb) { int i; uchar *p; @@ -109,12 +117,12 @@ else if(c == '\t'){ i = 8 - ((cga.pos/2)&7); while(i-- > 0) - cgaputr(' '); + cgaputr(' ', fb); } else if(c == '\b'){ if(cga.pos >= 2) cga.pos -= 2; - cgaputr(' '); + cgaputr(' ', fb); cga.pos -= 2; } else{ @@ -129,36 +137,42 @@ break; } } - cga.cga[cga.pos++] = c; - cga.cga[cga.pos++] = Attr; + cga.fb[cga.pos++] = c; + cga.fb[cga.pos++] = Attr; + if(!fb){ + cga.cga[cga.pos-2] = c; + cga.cga[cga.pos-1] = Attr; + } } if(cga.pos >= (Width*Height)-Postlen*2){ - memmove(cga.cga, &cga.cga[Width], Width*(Height-1)); - p = &cga.cga[Width*(Height-1)-Postlen*2]; - for(i = 0; i < Width/2; i++){ - *p++ = ' '; - *p++ = Attr; - } + memmove(cga.fb, &cga.fb[Width], Width*(Height-1)); + p = &cga.fb[Width*(Height-1)-Postlen*2]; + memmove(p, cga.blankline, Width); cga.pos -= Width; + if(!fb) + memmove(cga.cga, cga.fb, Width*Height-Postlen*2); } - cgacursor(); } void cgaconsputs(char* s, int n) { - int i; + int i, fb; Rune r; if(sys->novga || cga.initdone == 0) return; ilock(&cga); + fb = n>=Width/4; while(n > 0 && fullrune(s, n)){ i = chartorune(&r, s); n -= i; s += i; - cgaputr(r); + cgaputr(r, fb); } + if(fb) + cgaredraw(); + cgacursor(); iunlock(&cga); } @@ -182,6 +196,11 @@ void cgainit(void) { + char *s; + uchar *p, *e; + + if((s = getconf("*novga")) != nil) + sys->novga = atoi(s); if(sys->novga) return; ilock(&cga); @@ -189,6 +208,12 @@ cga.pos |= cgaregr(0x0f); cga.pos *= 2; cga.cga = CGA; + e = cga.blankline + Width; + for(p = cga.blankline; p < e; ){ + *p++ = ' '; + *p++ = Attr; + } +memmove(cga.fb, cga.cga, Width*Height); cga.initdone = 1; iunlock(&cga); }