caught and fixed some references to 0x[12]0000 that should have been Runemax+1, etc. or Runemask+1, etc. also removed private utf routines from file. Reference: /n/sources/patch/applied/rune1-regexp-file-grep Date: Sat Apr 13 01:17:17 CES 2013 Signed-off-by: geoff@plan9.bell-labs.com --- /sys/src/cmd/grep/grep.h Sat Apr 13 01:16:19 2013 +++ /sys/src/cmd/grep/grep.h Sat Apr 13 01:16:19 2013 @@ -53,7 +53,7 @@ Caselim = 7, Nhunk = 1<<16, - Cbegin = 0x10000, + Cbegin = Runemax+1, Flshcnt = (1<<9)-1, Cflag = 1<<0, --- /sys/src/cmd/sam/regexp.c Sat Apr 13 01:16:19 2013 +++ /sys/src/cmd/sam/regexp.c Sat Apr 13 01:16:19 2013 @@ -9,7 +9,7 @@ struct Inst { - long type; /* < 0x10000 ==> literal, otherwise action */ + long type; /* <= Runemax ==> literal, otherwise action */ union { int rsid; int rsubid; @@ -46,7 +46,7 @@ #define NLIST 127 -Ilist *tl, *nl; /* This list, next list */ +Ilist *tl, *nl; /* This list, next list */ Ilist list[2][NLIST+1]; /* +1 for trailing null */ static Rangeset sempty; @@ -56,25 +56,28 @@ * 0x100xx are operators, value == precedence * 0x200xx are tokens, i.e. operands for operators */ -#define OPERATOR 0x10000 /* Bitmask of all operators */ -#define START 0x10000 /* Start, used for marker on stack */ -#define RBRA 0x10001 /* Right bracket, ) */ -#define LBRA 0x10002 /* Left bracket, ( */ -#define OR 0x10003 /* Alternation, | */ -#define CAT 0x10004 /* Concatentation, implicit operator */ -#define STAR 0x10005 /* Closure, * */ -#define PLUS 0x10006 /* a+ == aa* */ -#define QUEST 0x10007 /* a? == a|nothing, i.e. 0 or 1 a's */ -#define ANY 0x20000 /* Any character but newline, . */ -#define NOP 0x20001 /* No operation, internal use only */ -#define BOL 0x20002 /* Beginning of line, ^ */ -#define EOL 0x20003 /* End of line, $ */ -#define CCLASS 0x20004 /* Character class, [] */ -#define NCCLASS 0x20005 /* Negated character class, [^] */ -#define END 0x20077 /* Terminate: match found */ +enum { + OPERATOR = Runemask+1, /* Bitmask of all operators */ + START = OPERATOR, /* Start, used for marker on stack */ + RBRA, /* Right bracket, ) */ + LBRA, /* Left bracket, ( */ + OR, /* Alternation, | */ + CAT, /* Concatentation, implicit operator */ + STAR, /* Closure, * */ + PLUS, /* a+ == aa* */ + QUEST, /* a? == a|nothing, i.e. 0 or 1 a's */ + + ANY = OPERATOR<<1, /* Any character but newline, . */ + NOP, /* No operation, internal use only */ + BOL, /* Beginning of line, ^ */ + EOL, /* End of line, $ */ + CCLASS, /* Character class, [] */ + NCCLASS, /* Negated character class, [^] */ + END, /* Terminate: match found */ -#define ISATOR 0x10000 -#define ISAND 0x20000 + ISATOR = OPERATOR, + ISAND = OPERATOR<<1, +}; /* * Parser Information @@ -459,7 +462,7 @@ exprp++; return '\n'; } - return *exprp++|0x10000; + return *exprp++|(Runemax+1); } return *exprp++; } --- /sys/src/cmd/acme/regx.c Sat Apr 13 01:16:19 2013 +++ /sys/src/cmd/acme/regx.c Sat Apr 13 01:16:19 2013 @@ -20,7 +20,7 @@ typedef struct Inst Inst; struct Inst { - uint type; /* < 0x10000 ==> literal, otherwise action */ + uint type; /* <= Runemax+1 ==> literal, otherwise action */ union { int sid; int subid; @@ -61,25 +61,28 @@ * 0x100xx are operators, value == precedence * 0x200xx are tokens, i.e. operands for operators */ -#define OPERATOR 0x10000 /* Bitmask of all operators */ -#define START 0x10000 /* Start, used for marker on stack */ -#define RBRA 0x10001 /* Right bracket, ) */ -#define LBRA 0x10002 /* Left bracket, ( */ -#define OR 0x10003 /* Alternation, | */ -#define CAT 0x10004 /* Concatentation, implicit operator */ -#define STAR 0x10005 /* Closure, * */ -#define PLUS 0x10006 /* a+ == aa* */ -#define QUEST 0x10007 /* a? == a|nothing, i.e. 0 or 1 a's */ -#define ANY 0x20000 /* Any character but newline, . */ -#define NOP 0x20001 /* No operation, internal use only */ -#define BOL 0x20002 /* Beginning of line, ^ */ -#define EOL 0x20003 /* End of line, $ */ -#define CCLASS 0x20004 /* Character class, [] */ -#define NCCLASS 0x20005 /* Negated character class, [^] */ -#define END 0x20077 /* Terminate: match found */ +enum { + OPERATOR = Runemask+1, /* Bitmask of all operators */ + START = OPERATOR, /* Start, used for marker on stack */ + RBRA, /* Right bracket, ) */ + LBRA, /* Left bracket, ( */ + OR, /* Alternation, | */ + CAT, /* Concatentation, implicit operator */ + STAR, /* Closure, * */ + PLUS, /* a+ == aa* */ + QUEST, /* a? == a|nothing, i.e. 0 or 1 a's */ + + ANY = OPERATOR<<1, /* Any character but newline, . */ + NOP, /* No operation, internal use only */ + BOL, /* Beginning of line, ^ */ + EOL, /* End of line, $ */ + CCLASS, /* Character class, [] */ + NCCLASS, /* Negated character class, [^] */ + END, /* Terminate: match found */ -#define ISATOR 0x10000 -#define ISAND 0x20000 + ISATOR = OPERATOR, + ISAND = OPERATOR<<1, +}; /* * Parser Information @@ -452,7 +455,7 @@ exprp++; return '\n'; } - return *exprp++|0x10000; + return *exprp++|(Runemax+1); } return *exprp++; } --- /sys/src/cmd/file.c Sat Apr 13 01:16:19 2013 +++ /sys/src/cmd/file.c Sat Apr 13 01:16:19 2013 @@ -267,64 +267,10 @@ close(fd); } -/* - * Unicode 4.0 4-byte runes. - */ -typedef int Rune1; - -enum { - UTFmax1 = 4, -}; - -int -fullrune1(char *p, int n) -{ - int c; - - if(n >= 1) { - c = *(uchar*)p; - if(c < 0x80) - return 1; - if(n >= 2 && c < 0xE0) - return 1; - if(n >= 3 && c < 0xF0) - return 1; - if(n >= 4) - return 1; - } - return 0; -} - -int -chartorune1(Rune1 *rune, char *str) -{ - int c, c1, c2, c3, n; - Rune r; - - c = *(uchar*)str; - if(c < 0xF0){ - r = 0; - n = chartorune(&r, str); - *rune = r; - return n; - } - c &= ~0xF0; - c1 = *(uchar*)(str+1) & ~0x80; - c2 = *(uchar*)(str+2) & ~0x80; - c3 = *(uchar*)(str+3) & ~0x80; - n = (c<<18) | (c1<<12) | (c2<<6) | c3; - if(n < 0x10000 || n > 0x10FFFF){ - *rune = Runeerror; - return 1; - } - *rune = n; - return 4; -} - void filetype(int fd) { - Rune1 r; + Rune r; int i, f, n; char *p, *eob; @@ -363,9 +309,9 @@ language[i].count = 0; eob = (char *)buf+nbuf; for(n = 0, p = (char *)buf; p < eob; n++) { - if (!fullrune1(p, eob-p) && eob-p < UTFmax1) + if (!fullrune(p, eob-p) && eob-p < UTFmax) break; - p += chartorune1(&r, p); + p += chartorune(&r, p); if (r == 0) f = Cnull; else if (r <= 0x7f) {