- add -I flag (fold runes). this is an analogue to -i. see man page changes. - correct merging of rune classes, and make them bigger to accomidate -I - correct bursting of rune classes (cf. arisaw's 9fans message of today) Reference: /n/sources/patch/grepcapi Date: Sun Mar 30 01:24:30 CET 2014 Signed-off-by: quanstro@quanstro.net --- /sys/src/cmd/grep/grep.h Sun Mar 30 01:22:03 2014 +++ /sys/src/cmd/grep/grep.h Sun Mar 30 01:22:02 2014 @@ -64,7 +64,8 @@ Nflag = 1<<5, Sflag = 1<<6, Vflag = 1<<7, - Bflag = 1<<8 + Bflag = 1<<8, + IIflag = 1<<9, }; EXTERN union --- /sys/src/cmd/grep/sub.c Sun Mar 30 01:22:06 2014 +++ /sys/src/cmd/grep/sub.c Sun Mar 30 01:22:04 2014 @@ -196,17 +196,35 @@ getrec(void) { int c; + Rune r; + static char rem[10], *remp, *reme; - if(flags['f']) { - c = Bgetc(rein); - if(c <= 0) - return 0; - } else - c = *input++ & 0xff; - if(flags['i'] && c >= 'A' && c <= 'Z') - c += 'a'-'A'; - if(c == '\n') - lineno++; + if(remp != reme) + c = (uchar)*remp++; + else { + if(flags['f']) { + c = Bgetrune(rein); + if(c <= 0) + return 0; + }else{ + input += chartorune(&r, input); + c = r; + } + if(flags['I']){ + r = tobaserune(c); + if(Iflag) + r = tolowerrune(r); + }else if(flags['i'] && c >= 'A' && c <= 'Z') + c += 'a'-'A'; + if(c >= Runesync){ + reme = rem + runetochar(rem, &r); + *reme = 0; + remp = rem; + c = (uchar)*remp++; + } + if(c == '\n') + lineno++; + } return c; } --- /sys/src/cmd/grep/main.c Sun Mar 30 01:22:08 2014 +++ /sys/src/cmd/grep/main.c Sun Mar 30 01:22:07 2014 @@ -1,7 +1,7 @@ #define EXTERN #include "grep.h" -char *validflags = "bchiLlnsv"; +char *validflags = "bchIiLlnsv"; void usage(void) { @@ -73,9 +73,11 @@ int search(char *file, int flag) { + Rune r; State *s, *ns; int c, fid, eof, nl, empty; long count, lineno, n; + uchar rem[10], *remp, *reme; uchar *elp, *lp, *bol; if(file == 0) { @@ -98,6 +100,8 @@ flag &= ~Hflag; /* do not print file name in output */ if(flags['i']) flag |= Iflag; /* fold upper-lower */ + if(flags['I']) + flag |= IIflag; /* fold runes to base rune */ if(flags['l']) flag |= Llflag; /* print only name of file if any match */ if(flags['L']) @@ -117,6 +121,7 @@ nl = 0; lp = u.buf; bol = lp; + reme = remp = rem; loop0: n = lp-bol; @@ -154,6 +159,8 @@ } lp = u.buf; elp = lp+n; + if(flag & IIflag) + goto loopI; if(flag & Iflag) goto loopi; @@ -236,6 +243,64 @@ if(lp != elp) goto loopi; goto loop0; + +/* + * character loop for -I flag + * for speed + */ +loopI: + if(remp != reme) + c = *remp++; + else{ + c = *lp; + if(c > Runesync){ + if(!fullrune((char*)lp, elp-lp) && !empty) + goto loop0; + lp += chartorune(&r, (char*)lp); + r = tobaserune(r); + if(flag & Iflag) + r = tolowerrune(r); + reme = rem + runetochar((char*)rem, &r); + *reme = 0; + remp = rem; + c = *remp++; + }else{ + lp++; + if((flag & Iflag) && c >= 'A' && c <= 'Z') + c += 'a'-'A'; + } + } +loopI0: + ns = s->next[c]; + if(ns == 0) { + increment(s, c); + goto loopI0; + } + s = ns; + if(c == '\n') { + lineno++; + if(!!s->match == !(flag&Vflag)) { + count++; + if(flag & (Cflag|Sflag|Llflag|LLflag)) + goto contI; + if(flag & Hflag) + Bprint(&bout, "%s:", file); + if(flag & Nflag) + Bprint(&bout, "%ld: ", lineno); + /* suppress extra newline at EOF unless we are labeling matches with file name */ + Bwrite(&bout, bol, lp-bol-(eof && !(flag&Hflag))); + if(flag & Bflag) + Bflush(&bout); + } + if((lineno & Flshcnt) == 0) + Bflush(&bout); + contI: + bol = lp; + } + if(lp != elp) + goto loopI; + goto loop0; + } State* --- /sys/src/cmd/grep/comp.c Sun Mar 30 01:22:11 2014 +++ /sys/src/cmd/grep/comp.c Sun Mar 30 01:22:10 2014 @@ -135,11 +135,13 @@ { 0x007f, 0x07ff, + 0xffff, }; Rune tab2[] = { 0x003f, 0x0fff, + 0xffff, }; Re2 @@ -215,7 +217,7 @@ Re2 re2class(char *s) { - Rune pairs[200+2], *p, *q, ov; + Rune pairs[400+2], *p, *q, ov; int nc; Re2 x; @@ -234,7 +236,7 @@ break; p[1] = *p; p += 2; - if(p >= pairs + nelem(pairs) - 2) + if(p == pairs + nelem(pairs) - 2) error("class too big"); s += chartorune(p, s); if(*p != '-') @@ -254,7 +256,7 @@ for(p=pairs+2; *p; p+=2) { if(p[0] > p[1]) continue; - if(p[0] > q[1] || p[1] < q[0]) { + if(p[0] > q[1]+1 || p[1] < q[0]) { q[2] = p[0]; q[3] = p[1]; q += 2; @@ -275,7 +277,7 @@ x = re2or(x, rclass(ov, p[0]-1)); ov = p[1]+1; } - x = re2or(x, rclass(ov, Runemask)); + x = re2or(x, rclass(ov, 0xffff)); } else { x = rclass(p[0], p[1]); for(p+=2; *p; p+=2) --- /sys/man/1/grep Sun Mar 30 01:22:14 2014 +++ /sys/man/1/grep Sun Mar 30 01:22:12 2014 @@ -4,7 +4,7 @@ .SH SYNOPSIS .B grep [ -.B -bchiLlnsv +.B -bchiILlnsv ] [ .B -e @@ -51,6 +51,13 @@ Ignore alphabetic case distinctions. The implementation folds into lower case all letters in the pattern and input before interpretation. Matched lines are printed in their original form. +.TP +.B -I +Ignore Unicode variations; use base codepoint. The implementation +folds all letters in the pattern and input into their base codepoint before +interpretation. Matched lines are printed in their original form. This +may be combined with +.BR -i . .TP .B -l (ell) Print the names of files with selected lines; don't print the lines.