This set of changes implements system call tracing on Plan 9. Please ignore the previous version called syscalltrace as it has an error. This code is tested on kw and works fine. I've stress tested it pretty hard at this point. It also has 2 bug fixes not present in the 9k version of ratrace, which I will probably backport to 9k soon. ron Reference: /n/sources/patch/applied/syscalltrace Date: Mon Sep 27 18:15:01 CES 2010 Signed-off-by: rminnich@gmail.com --- /sys/src/9/port/syscallfmt.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9/port/syscallfmt.c Mon Sep 27 18:12:41 2010 @@ -0,0 +1,406 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "/sys/src/libc/9syscall/sys.h" +// WE ARE OVERRUNNIGN SOMEHOW +/* + * Print functions for system call tracing. + */ +static void +fmtrwdata(Fmt* f, char* a, int n, char* suffix) +{ + int i; + char *t; + + if(a == nil){ + fmtprint(f, "0x0%s", suffix); + return; + } + validaddr((ulong)a, n, 0); + t = smalloc(n+1); + for(i = 0; i < n; i++){ + if(a[i] > 0x20 && a[i] < 0x7f) + t[i] = a[i]; + else + t[i] = '.'; + } + + fmtprint(f, " %#p/\"%s\"%s", a, t, suffix); + free(t); +} + +static void +fmtuserstring(Fmt* f, char* a, char* suffix) +{ + int n; + char *t; + + if(a == nil){ + fmtprint(f, "0/\"\"%s", suffix); + return; + } + validaddr((ulong)a, 1, 0); + n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1; + t = smalloc(n+1); + memmove(t, a, n); + t[n] = 0; + fmtprint(f, "%#p/\"%s\"%s", a, t, suffix); + free(t); +} + +/* + */ +void +syscallfmt(int syscallno, ulong pc, va_list list) +{ + long l; + Fmt fmt; + void *v; + vlong vl; + uintptr p; + int i[2], len; + char *a, **argv; + + fmtstrinit(&fmt); + fmtprint(&fmt, "%uld %s ", up->pid, up->text); + + if(syscallno > nsyscall) + fmtprint(&fmt, " %d ", syscallno); + else + fmtprint(&fmt, "%s ", sysctab[syscallno]?sysctab[syscallno]:"huh?"); + + fmtprint(&fmt, "%ulx ", pc); + if(up->syscalltrace != nil) + free(up->syscalltrace); + + switch(syscallno){ + case SYSR1: + p = va_arg(list, uintptr); + fmtprint(&fmt, "%#p", p); + break; + case _ERRSTR: /* deprecated */ + case CHDIR: + case EXITS: + case REMOVE: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case BIND: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux", i[0]); + break; + case CLOSE: + case NOTED: + i[0] = va_arg(list, int); + fmtprint(&fmt, "%d", i[0]); + break; + case DUP: + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %d", i[0], i[1]); + break; + case ALARM: + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%#lud ", l); + break; + case EXEC: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + argv = va_arg(list, char**); + evenaddr(PTR2UINT(argv)); + for(;;){ + validaddr((ulong)argv, sizeof(char**), 0); + a = *(char **)argv; + if(a == nil) + break; + fmtprint(&fmt, " "); + fmtuserstring(&fmt, a, ""); + argv++; + } + break; + case _FSESSION: /* deprecated */ + case _FSTAT: /* deprecated */ + case _FWSTAT: /* obsolete */ + i[0] = va_arg(list, int); + a = va_arg(list, char*); + fmtprint(&fmt, "%d %#p", i[0], a); + break; + case FAUTH: + i[0] = va_arg(list, int); + a = va_arg(list, char*); + fmtprint(&fmt, "%d", i[0]); + fmtuserstring(&fmt, a, ""); + break; + case SEGBRK: + case RENDEZVOUS: + v = va_arg(list, void*); + fmtprint(&fmt, "%#p ", v); + v = va_arg(list, void*); + fmtprint(&fmt, "%#p", v); + break; + case _MOUNT: /* deprecated */ + i[0] = va_arg(list, int); + fmtprint(&fmt, "%d ", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux ", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case OPEN: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux", i[0]); + break; + case OSEEK: /* deprecated */ + i[0] = va_arg(list, int); + l = va_arg(list, long); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %ld %d", i[0], l, i[1]); + break; + case SLEEP: + l = va_arg(list, long); + fmtprint(&fmt, "%ld", l); + break; + case _STAT: /* obsolete */ + case _WSTAT: /* obsolete */ + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + a = va_arg(list, char*); + fmtprint(&fmt, "%#p", a); + break; + case RFORK: + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux", i[0]); + break; + case PIPE: + case BRK_: + v = va_arg(list, int*); + fmtprint(&fmt, "%#p", v); + break; + case CREATE: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%#ux %#ux", i[0], i[1]); + break; + case FD2PATH: + case FSTAT: + case FWSTAT: + i[0] = va_arg(list, int); + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%d %#p %lud", i[0], a, l); + break; + case NOTIFY: + case SEGDETACH: + case _WAIT: /* deprecated */ + v = va_arg(list, void*); + fmtprint(&fmt, "%#p", v); + break; + case SEGATTACH: + i[0] = va_arg(list, int); + fmtprint(&fmt, "%d ", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + /*FALLTHROUGH*/ + case SEGFREE: + case SEGFLUSH: + v = va_arg(list, void*); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%#p %lud", v, l); + break; + case UNMOUNT: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case SEMACQUIRE: + case SEMRELEASE: + v = va_arg(list, int*); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#p %d", v, i[0]); + break; + case SEEK: + v = va_arg(list, vlong*); + i[0] = va_arg(list, int); + vl = va_arg(list, vlong); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%#p %d %#llux %d", v, i[0], vl, i[1]); + break; + case FVERSION: + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %d ", i[0], i[1]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%lud", l); + break; + case WSTAT: + case STAT: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + /*FALLTHROUGH*/ + case ERRSTR: + case AWAIT: + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%#p %lud", a, l); + break; + case MOUNT: + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %d ", i[0], i[1]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux ", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case _READ: /* deprecated */ + case PREAD: + i[0] = va_arg(list, int); + v = va_arg(list, void*); + l = va_arg(list, long); + fmtprint(&fmt, "%d %#p %ld", i[0], v, l); + if(syscallno == PREAD){ + vl = va_arg(list, vlong); + fmtprint(&fmt, " %lld", vl); + } + break; + case _WRITE: /* deprecated */ + case PWRITE: + i[0] = va_arg(list, int); + v = va_arg(list, void*); + l = va_arg(list, long); + fmtprint(&fmt, "%d ", i[0]); + len = MIN(l, 64); + fmtrwdata(&fmt, v, len, " "); + fmtprint(&fmt, "%ld", l); + if(syscallno == PWRITE){ + vl = va_arg(list, vlong); + fmtprint(&fmt, " %lld", vl); + } + break; + } + + up->syscalltrace = fmtstrflush(&fmt); +} + +void +sysretfmt(int syscallno, va_list list, long ret, uvlong start, uvlong stop) +{ + long l; + void* v; + Fmt fmt; + vlong vl; + int i, len; + char *a, *errstr; + + fmtstrinit(&fmt); + + if(up->syscalltrace) + free(up->syscalltrace); + + errstr = "\"\""; + switch(syscallno){ + default: + if(ret == -1) + errstr = up->errstr; + fmtprint(&fmt, " = %ld", ret); + break; + case ALARM: + case _WRITE: + case PWRITE: + if(ret == -1) + errstr = up->errstr; + fmtprint(&fmt, " = %ld", ret); + break; + case EXEC: + case SEGBRK: + case SEGATTACH: + case RENDEZVOUS: + if((void *)ret == (void*)-1) + errstr = up->errstr; + fmtprint(&fmt, " = %#p", (void *)ret); + break; + case AWAIT: + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + if(ret > 0){ + fmtuserstring(&fmt, a, " "); + fmtprint(&fmt, "%lud = %ld", l, ret); + } + else{ + fmtprint(&fmt, "%#p/\"\" %lud = %ld", a, l, ret); + errstr = up->errstr; + } + break; + case _ERRSTR: + case ERRSTR: + a = va_arg(list, char*); + if(syscallno == _ERRSTR) + l = 64; + else + l = va_arg(list, unsigned long); + if(ret > 0){ + fmtuserstring(&fmt, a, " "); + fmtprint(&fmt, "%lud = %ld", l, ret); + } + else{ + fmtprint(&fmt, "\"\" %lud = %ld", l, ret); + errstr = up->errstr; + } + break; + case FD2PATH: + i = va_arg(list, int); + USED(i); + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + if(ret > 0){ + fmtuserstring(&fmt, a, " "); + fmtprint(&fmt, "%lud = %ld", l, ret); + } + else{ + fmtprint(&fmt, "\"\" %lud = %ld", l, ret); + errstr = up->errstr; + } + break; + case _READ: + case PREAD: + i = va_arg(list, int); + USED(i); + v = va_arg(list, void*); + l = va_arg(list, long); + if(ret > 0){ + len = MIN(ret, 64); + fmtrwdata(&fmt, v, len, ""); + } + else{ + fmtprint(&fmt, "/\"\""); + errstr = up->errstr; + } + fmtprint(&fmt, " %ld", l); + if(syscallno == PREAD){ + vl = va_arg(list, vlong); + fmtprint(&fmt, " %lld", vl); + } + fmtprint(&fmt, " = %ld", ret); + break; + } + fmtprint(&fmt, " %s %#llud %#llud\n", errstr, start, stop); + up->syscalltrace = fmtstrflush(&fmt); +} --- /sys/src/9/kw/syscall.c Mon Sep 27 18:12:46 2010 +++ /sys/src/9/kw/syscall.c Mon Sep 27 18:12:43 2010 @@ -123,7 +123,7 @@ l = strlen(n->msg); if(l > ERRMAX-23) /* " pc=0x0123456789abcdef\0" */ l = ERRMAX-23; - snprint(n->msg + l, sizeof n->msg - l, " pc=%#lux", ureg->pc); + snprint(n->msg + l, sizeof n->msg - l, " pc=%#ux", ureg->pc); } if(n->flag != NUser && (up->notified || up->notify == 0)){ @@ -186,9 +186,10 @@ ulong sp; long ret; int i, scallnr; + vlong startns, stopns; if(!userureg(ureg)) - panic("syscall: from kernel: pc %#lux r14 %#lux psr %#lux", + panic("syscall: from kernel: pc %#ux r14 %#ux psr %#ux", ureg->pc, ureg->r14, ureg->psr); cycles(&up->kentry); @@ -198,23 +199,38 @@ up->pc = ureg->pc; up->dbgreg = ureg; - if(up->procctl == Proc_tracesyscall){ - up->procctl = Proc_stopme; - procctl(up); - } - scallnr = ureg->r0; up->scallnr = scallnr; if(scallnr == RFORK) fpusysrfork(ureg); spllo(); - sp = ureg->sp; + + if(up->procctl == Proc_tracesyscall){ + /* + * Redundant validaddr. Do we care? + * Tracing syscalls is not exactly a fast path... + * Beware, validaddr currently does a pexit rather + * than an error if there's a problem; that might + * change in the future. + */ + if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD)) + validaddr(sp, sizeof(Sargs)+BY2WD, 0); + + syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2SE)); + up->procctl = Proc_stopme; + procctl(up); + if (up->syscalltrace) + free(up->syscalltrace); + up->syscalltrace = nil; + } + up->nerrlab = 0; ret = -1; + startns = todget(nil); if(!waserror()){ if(scallnr >= nsyscall){ - pprint("bad sys call number %d pc %#lux\n", + pprint("bad sys call number %d pc %#ux\n", scallnr, ureg->pc); postnote(up, 1, "sys: bad sys call", NDebug); error(Ebadarg); @@ -253,10 +269,15 @@ ureg->r0 = ret; if(up->procctl == Proc_tracesyscall){ + stopns = todget(nil); up->procctl = Proc_stopme; + sysretfmt(scallnr, (va_list)(sp+BY2SE), ret, startns, stopns); s = splhi(); procctl(up); splx(s); + if(up->syscalltrace) + free(up->syscalltrace); + up->syscalltrace = nil; } up->insyscall = 0; --- /sys/src/cmd/ratrace.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/ratrace.c Mon Oct 4 23:18:34 2010 @@ -0,0 +1,189 @@ +#include +#include +#include +#include + +enum { + Stacksize = 8*1024, + Bufsize = 8*1024, +}; + +Channel *out; +Channel *quit; +Channel *forkc; +int nread = 0; + +typedef struct Str Str; +struct Str { + char *buf; + int len; +}; + +void +die(char *s) +{ + fprint(2, "%s\n", s); + exits(s); +} + +void +cwrite(int fd, char *path, char *cmd, int len) +{ + if (write(fd, cmd, len) < len) { + fprint(2, "cwrite: %s: failed %d bytes: %r\n", path, len); + sendp(quit, nil); + threadexits(nil); + } +} + +void +reader(void *v) +{ + int cfd, tfd, forking = 0, pid, newpid; + char *ctl, *truss; + Str *s; + + pid = (int)(uintptr)v; + ctl = smprint("/proc/%d/ctl", pid); + if ((cfd = open(ctl, OWRITE)) < 0) + die(smprint("%s: %r", ctl)); + truss = smprint("/proc/%d/syscall", pid); + if ((tfd = open(truss, OREAD)) < 0) + die(smprint("%s: %r", truss)); + + cwrite(cfd, ctl, "stop", 4); + cwrite(cfd, truss, "startsyscall", 12); + + s = mallocz(sizeof(Str) + Bufsize, 1); + s->buf = (char *)&s[1]; + while((s->len = pread(tfd, s->buf, Bufsize - 1, 0)) > 0){ + if (forking && s->buf[1] == '=' && s->buf[3] != '-') { + forking = 0; + newpid = strtol(&s->buf[3], 0, 0); + sendp(forkc, (void*)newpid); + procrfork(reader, (void*)newpid, Stacksize, 0); + } + + /* + * There are three tests here and they (I hope) guarantee + * no false positives. + */ + if (strstr(s->buf, " Rfork") != nil) { + char *a[8]; + char *rf; + + rf = strdup(s->buf); + if (tokenize(rf, a, 8) == 5) { + ulong flags; + + flags = strtoul(a[4], 0, 16); + if (flags & RFPROC) + forking = 1; + } + free(rf); + } + sendp(out, s); + cwrite(cfd, truss, "startsyscall", 12); + s = mallocz(sizeof(Str) + Bufsize, 1); + s->buf = (char *)&s[1]; + } + sendp(quit, nil); + threadexitsall(nil); +} + +void +writer(void *) +{ + int newpid; + Alt a[4]; + Str *s; + + a[0].op = CHANRCV; + a[0].c = quit; + a[0].v = nil; + a[1].op = CHANRCV; + a[1].c = out; + a[1].v = &s; + a[2].op = CHANRCV; + a[2].c = forkc; + a[2].v = &newpid; + a[3].op = CHANEND; + + for(;;) + switch(alt(a)){ + case 0: + nread--; + if(nread <= 0) + goto done; + break; + case 1: + /* it's a nice null terminated thing */ + fprint(2, "%s", s->buf); + free(s); + break; + case 2: + // procrfork(reader, (void*)newpid, Stacksize, 0); + nread++; + break; + } +done: + exits(nil); +} + +void +usage(void) +{ + fprint(2, "Usage: ratrace [-c cmd [arg...]] | [pid]\n"); + exits("usage"); +} + +void +threadmain(int argc, char **argv) +{ + int pid; + char *cmd = nil; + char **args = nil; + + /* + * don't bother with fancy arg processing, because it picks up options + * for the command you are starting. Just check for -c as argv[1] + * and then take it from there. + */ + if (argc < 2) + usage(); + if (argv[1][0] == '-') + switch(argv[1][1]) { + case 'c': + if (argc < 3) + usage(); + cmd = strdup(argv[2]); + args = &argv[2]; + break; + default: + usage(); + } + + /* run a command? */ + if(cmd) { + pid = fork(); + if (pid < 0) + sysfatal("fork failed: %r"); + if(pid == 0) { + exec(cmd, args); + if(cmd[0] != '/') + exec(smprint("/bin/%s", cmd), args); + sysfatal("exec %s failed: %r", cmd); + } + } else { + if(argc != 1) + usage(); + pid = atoi(argv[0]); + } + + out = chancreate(sizeof(char*), 0); + quit = chancreate(sizeof(char*), 0); + forkc = chancreate(sizeof(ulong *), 0); + nread++; + procrfork(writer, nil, Stacksize, 0); + reader((void*)pid); +} --- /sys/src/9/kw/mkfile Mon Sep 27 18:12:52 2010 +++ /sys/src/9/kw/mkfile Mon Sep 27 18:12:49 2010 @@ -35,6 +35,7 @@ qlock.$O\ segment.$O\ swap.$O\ + syscallfmt.$O\ sysfile.$O\ sysproc.$O\ taslock.$O\