update join from sources. finally with rune support. Reference: /n/atom/patch/applied/joinupd Date: Tue Mar 25 04:26:53 CET 2014 Signed-off-by: quanstro@quanstro.net --- /sys/src/cmd/join.c Tue Mar 25 04:26:47 2014 +++ /sys/src/cmd/join.c Tue Mar 25 04:26:47 2014 @@ -1,44 +1,57 @@ /* join F1 F2 on stuff */ #include #include -#include +#include #include -#define F1 0 -#define F2 1 -#define F0 3 + +enum { + F1, + F2, + NIN, + F0, +}; + #define NFLD 100 /* max field per line */ -#define comp() runecmp(ppi[F1][j1],ppi[F2][j2]) -FILE *f[2]; -Rune buf[2][BUFSIZ]; /*input lines */ -Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */ -Rune *s1,*s2; -int j1 = 1; /* join of this field of file 1 */ -int j2 = 1; /* join of this field of file 2 */ -int olist[2*NFLD]; /* output these fields */ -int olistf[2*NFLD]; /* from these files */ -int no; /* number of entries in olist */ +#define comp() runestrcmp(ppi[F1][j1], ppi[F2][j2]) + +Biobuf *f[NIN]; +Rune buf[NIN][Bsize]; /* input lines */ +Rune *ppi[NIN][NFLD+1]; /* pointers to fields in lines */ Rune sep1 = ' '; /* default field separator */ Rune sep2 = '\t'; -char *sepstr=" "; -int discard; /* count of truncated lines */ -Rune null[BUFSIZ] = L""; +int j1 = 1; /* join of this field of file 1 */ +int j2 = 1; /* join of this field of file 2 */ int a1; int a2; -char *getoptarg(int*, char***); -void output(int, int); -int input(int); -void oparse(char*); -void error(char*, char*); -void seek1(void), seek2(void); -Rune *strtorune(Rune *, char *); - +int olist[NIN*NFLD]; /* output these fields */ +int olistf[NIN*NFLD]; /* from these files */ +int no; /* number of entries in olist */ +char *sepstr = " "; +int discard; /* count of truncated lines */ +Rune null[Bsize] = L""; +Biobuf binbuf, boutbuf; +Biobuf *bin, *bout; + +char *getoptarg(int*, char***); +int input(int); +void join(int); +void oparse(char*); +void output(int, int); +Rune *strtorune(Rune *, char *); void main(int argc, char **argv) { int i; + vlong off1, off2; + + bin = &binbuf; + bout = &boutbuf; + Binit(bin, 0, OREAD); + Binit(bout, 1, OWRITE); + argv0 = argv[0]; while (argc > 1 && argv[1][0] == '-') { if (argv[1][1] == '\0') break; @@ -56,7 +69,7 @@ a2++; break; default: - error("incomplete option -a",""); + sysfatal("incomplete option -a"); } break; case 'e': @@ -111,138 +124,128 @@ proceed: for (i = 0; i < no; i++) if (olist[i]-- > NFLD) /* 0 origin */ - error("field number too big in -o",""); - if (argc != 3) - error("usage: join [-1 x -2 y] [-o list] file1 file2",""); + sysfatal("field number too big in -o"); + if (argc != 3) { + fprint(2, "usage: join [-1 x -2 y] [-o list] file1 file2\n"); + exits("usage"); + } if (j1 < 1 || j2 < 1) - error("invalid field indices", ""); + sysfatal("invalid field indices"); j1--; j2--; /* everyone else believes in 0 origin */ - s1 = ppi[F1][j1]; - s2 = ppi[F2][j2]; + if (strcmp(argv[1], "-") == 0) - f[F1] = stdin; - else if ((f[F1] = fopen(argv[1], "r")) == 0) - error("can't open %s", argv[1]); - if(strcmp(argv[2], "-") == 0) { - f[F2] = stdin; - } else if ((f[F2] = fopen(argv[2], "r")) == 0) - error("can't open %s", argv[2]); - - if(ftell(f[F2]) >= 0) - seek2(); - else if(ftell(f[F1]) >= 0) - seek1(); - else - error("neither file is randomly accessible",""); + f[F1] = bin; + else if ((f[F1] = Bopen(argv[1], OREAD)) == 0) + sysfatal("can't open %s: %r", argv[1]); + if(strcmp(argv[2], "-") == 0) + f[F2] = bin; + else if ((f[F2] = Bopen(argv[2], OREAD)) == 0) + sysfatal("can't open %s: %r", argv[2]); + + off1 = Boffset(f[F1]); + off2 = Boffset(f[F2]); + if(Bseek(f[F2], 0, 2) >= 0){ + Bseek(f[F2], off2, 0); + join(F2); + }else if(Bseek(f[F1], 0, 2) >= 0){ + Bseek(f[F1], off1, 0); + Bseek(f[F2], off2, 0); + join(F1); + }else + sysfatal("neither file is randomly accessible"); if (discard) - error("some input line was truncated", ""); + sysfatal("some input line was truncated"); exits(""); } -int runecmp(Rune *a, Rune *b){ - while(*a==*b){ - if(*a=='\0') return 0; - a++; - b++; - } - if(*a<*b) return -1; - return 1; -} -char *runetostr(char *buf, Rune *r){ + +char * +runetostr(char *buf, Rune *r) +{ char *s; - for(s=buf;*r;r++) s+=runetochar(s, r); - *s='\0'; + + for(s = buf; *r; r++) + s += runetochar(s, r); + *s = '\0'; return buf; } -Rune *strtorune(Rune *buf, char *s){ + +Rune * +strtorune(Rune *buf, char *s) +{ Rune *r; - for(r=buf;*s;r++) s+=chartorune(r, s); - *r='\0'; + + for (r = buf; *s; r++) + s += chartorune(r, s); + *r = '\0'; return buf; } -/* lazy. there ought to be a clean way to combine seek1 & seek2 */ -#define get1() n1=input(F1) -#define get2() n2=input(F2) + void -seek2() +readboth(int n[]) { - int n1, n2; - int top2=0; - int bot2 = ftell(f[F2]); - get1(); - get2(); - while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { - if(n1>0 && n2>0 && comp()>0 || n1==0) { - if(a2) output(0, n2); - bot2 = ftell(f[F2]); - get2(); - } else if(n1>0 && n2>0 && comp()<0 || n2==0) { - if(a1) output(n1, 0); - get1(); - } else /*(n1>0 && n2>0 && comp()==0)*/ { - while(n2>0 && comp()==0) { - output(n1, n2); - top2 = ftell(f[F2]); - get2(); - } - fseek(f[F2], bot2, 0); - get2(); - get1(); - for(;;) { - if(n1>0 && n2>0 && comp()==0) { - output(n1, n2); - get2(); - } else if(n1>0 && n2>0 && comp()<0 || n2==0) { - fseek(f[F2], bot2, 0); - get2(); - get1(); - } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ - fseek(f[F2], top2, 0); - bot2 = top2; - get2(); - break; - } - } - } - } + n[F1] = input(F1); + n[F2] = input(F2); } + void -seek1() +seekbotreadboth(int seekf, vlong bot, int n[]) { - int n1, n2; - int top1=0; - int bot1 = ftell(f[F1]); - get1(); - get2(); - while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { - if(n1>0 && n2>0 && comp()>0 || n1==0) { - if(a2) output(0, n2); - get2(); - } else if(n1>0 && n2>0 && comp()<0 || n2==0) { - if(a1) output(n1, 0); - bot1 = ftell(f[F1]); - get1(); - } else /*(n1>0 && n2>0 && comp()==0)*/ { - while(n2>0 && comp()==0) { - output(n1, n2); - top1 = ftell(f[F1]); - get1(); + Bseek(f[seekf], bot, 0); + readboth(n); +} + +void +join(int seekf) +{ + int cmp, less; + int n[NIN]; + vlong top, bot; + + less = seekf == F2; + top = 0; + bot = Boffset(f[seekf]); + readboth(n); + while(n[F1]>0 && n[F2]>0 || (a1||a2) && n[F1]+n[F2]>0) { + cmp = comp(); + if(n[F1]>0 && n[F2]>0 && cmp>0 || n[F1]==0) { + if(a2) + output(0, n[F2]); + if (seekf == F2) + bot = Boffset(f[seekf]); + n[F2] = input(F2); + } else if(n[F1]>0 && n[F2]>0 && cmp<0 || n[F2]==0) { + if(a1) + output(n[F1], 0); + if (seekf == F1) + bot = Boffset(f[seekf]); + n[F1] = input(F1); + } else { + /* n[F1]>0 && n[F2]>0 && cmp==0 */ + while(n[F2]>0 && cmp==0) { + output(n[F1], n[F2]); + top = Boffset(f[seekf]); + n[seekf] = input(seekf); + cmp = comp(); } - fseek(f[F1], bot1, 0); - get2(); - get1(); + seekbotreadboth(seekf, bot, n); for(;;) { - if(n1>0 && n2>0 && comp()==0) { - output(n1, n2); - get1(); - } else if(n1>0 && n2>0 && comp()>0 || n1==0) { - fseek(f[F1], bot1, 0); - get2(); - get1(); - } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ - fseek(f[F1], top1, 0); - bot1 = top1; - get1(); + cmp = comp(); + if(n[F1]>0 && n[F2]>0 && cmp==0) { + output(n[F1], n[F2]); + n[seekf] = input(seekf); + } else if(n[F1]>0 && n[F2]>0 && + (less? cmp<0 :cmp>0) || n[seekf]==0) + seekbotreadboth(seekf, bot, n); + else { + /* + * n[F1]>0 && n[F2]>0 && + * (less? cmp>0 :cmp<0) || + * n[seekf==F1? F2: F1]==0 + */ + Bseek(f[seekf], top, 0); + bot = top; + n[seekf] = input(seekf); break; } } @@ -253,50 +256,63 @@ int input(int n) /* get input line and split into fields */ { - register int i, c; + int c, i, len; + char *line; Rune *bp; Rune **pp; - char line[BUFSIZ]; bp = buf[n]; pp = ppi[n]; - if (fgets(line, BUFSIZ, f[n]) == 0) + line = Brdline(f[n], '\n'); + if (line == nil) return(0); + len = Blinelen(f[n]) - 1; + c = line[len]; + line[len] = '\0'; strtorune(bp, line); + line[len] = c; /* restore delimiter */ + if (c != '\n') + discard++; + i = 0; do { i++; if (sep1 == ' ') /* strip multiples */ while ((c = *bp) == sep1 || c == sep2) bp++; /* skip blanks */ - *pp++ = bp; /* record beginning */ - while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') + *pp++ = bp; /* record beginning */ + while ((c = *bp) != sep1 && c != sep2 && c != '\0') bp++; - *bp++ = '\0'; /* mark end by overwriting blank */ - } while (c != '\n' && c != '\0' && i < NFLD-1); - if (c != '\n') - discard++; + *bp++ = '\0'; /* mark end by overwriting blank */ + } while (c != '\0' && i < NFLD-1); *pp = 0; return(i); } void +prfields(int f, int on, int jn) +{ + int i; + char buf[Bsize]; + + for (i = 0; i < on; i++) + if (i != jn) + Bprint(bout, "%s%s", sepstr, runetostr(buf, ppi[f][i])); +} + +void output(int on1, int on2) /* print items from olist */ { int i; Rune *temp; - char buf[BUFSIZ]; + char buf[Bsize]; if (no <= 0) { /* default case */ - printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); - for (i = 0; i < on1; i++) - if (i != j1) - printf("%s%s", sepstr, runetostr(buf, ppi[F1][i])); - for (i = 0; i < on2; i++) - if (i != j2) - printf("%s%s", sepstr, runetostr(buf, ppi[F2][i])); - printf("\n"); + Bprint(bout, "%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); + prfields(F1, on1, j1); + prfields(F2, on2, j2); + Bputc(bout, '\n'); } else { for (i = 0; i < no; i++) { if (olistf[i]==F0 && on1>j1) @@ -310,24 +326,15 @@ *temp==0) temp = null; } - printf("%s", runetostr(buf, temp)); + Bprint(bout, "%s", runetostr(buf, temp)); if (i == no - 1) - printf("\n"); + Bputc(bout, '\n'); else - printf("%s", sepstr); + Bprint(bout, "%s", sepstr); } } } -void -error(char *s1, char *s2) -{ - fprintf(stderr, "join: "); - fprintf(stderr, s1, s2); - fprintf(stderr, "\n"); - exits(s1); -} - char * getoptarg(int *argcp, char ***argvp) { @@ -336,7 +343,7 @@ if(argv[1][2] != 0) return &argv[1][2]; if(argc<=2 || argv[2][0]=='-') - error("incomplete option %s", argv[1]); + sysfatal("incomplete option %s", argv[1]); *argcp = argc-1; *argvp = ++argv; return argv[1]; @@ -358,9 +365,10 @@ olistf[no] = *s=='1'? F1: F2; olist[no] = atoi(s += 2); break; - } /* fall thru */ + } + /* fall thru */ default: - error("invalid -o list", ""); + sysfatal("invalid -o list"); } if(s[1] == ',') s++;