Fix day one bug in sam's regexp code (and copies in libregexp and acme) that cause unnecessary regexp overflow errors when searching for /a***a/. Reference: /n/sources/patch/applied/regexp-overflow Date: Sat Jun 9 07:21:00 CES 2007 Signed-off-by: rsc@swtch.com --- /sys/src/libregexp/regexec.c Sat Jun 9 07:19:52 2007 +++ /sys/src/libregexp/regexec.c Sat Jun 9 07:19:52 2007 @@ -133,7 +133,7 @@ break; case OR: /* evaluate right choice later */ - if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) + if(_renewthread(tl, inst->right, ms, &tlp->se) == tle) return -1; /* efficiency: advance and re-evaluate */ continue; --- /sys/src/libregexp/rregexec.c Sat Jun 9 07:19:53 2007 +++ /sys/src/libregexp/rregexec.c Sat Jun 9 07:19:53 2007 @@ -125,7 +125,7 @@ break; case OR: /* evaluate right choice later */ - if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) + if(_renewthread(tl, inst->right, ms, &tlp->se) == tle) return -1; /* efficiency: advance and re-evaluate */ continue; --- /sys/src/cmd/sam/regexp.c Sat Jun 9 07:19:55 2007 +++ /sys/src/cmd/sam/regexp.c Sat Jun 9 07:19:55 2007 @@ -44,10 +44,10 @@ Posn startp; /* first char of match */ }; -#define NLIST 128 +#define NLIST 127 Ilist *tl, *nl; /* This list, next list */ -Ilist list[2][NLIST]; +Ilist list[2][NLIST+1]; /* +1 for trailing null */ static Rangeset sempty; /* @@ -104,7 +104,7 @@ Rune **class; int negateclass; -void addinst(Ilist *l, Inst *inst, Rangeset *sep); +int addinst(Ilist *l, Inst *inst, Rangeset *sep); void newmatch(Rangeset*); void bnewmatch(Rangeset*); void pushand(Inst*, Inst*); @@ -531,7 +531,7 @@ * *l must be pending when addinst called; if *l has been looked * at already, the optimization is a bug. */ -void +int addinst(Ilist *l, Inst *inst, Rangeset *sep) { Ilist *p; @@ -540,12 +540,13 @@ if(p->inst==inst){ if((sep)->p[0].p1 < p->se.p[0].p1) p->se= *sep; /* this would be bug */ - return; /* It's already there */ + return 0; /* It's already there */ } } p->inst = inst; p->se= *sep; (p+1)->inst = 0; + return 1; } int @@ -592,11 +593,11 @@ nnl = 0; if(sel.p[0].p1<0 && (!wrapped || p= NLIST) Overflow: error(Eoverflow); - sempty.p[0].p1 = p; - addinst(tl, startinst, &sempty); } /* Execute machine until this list is empty */ for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */ @@ -605,9 +606,9 @@ default: /* regular character */ if(inst->type==c){ Addinst: + if(addinst(nl, inst->next, &tlp->se)) if(++nnl >= NLIST) goto Overflow; - addinst(nl, inst->next, &tlp->se); } break; case LBRA: @@ -645,9 +646,9 @@ break; case OR: /* evaluate right choice later */ + if(addinst(tl, inst->right, &tlp->se)) if(++ntl >= NLIST) goto Overflow; - addinst(tlp, inst->right, &tlp->se); /* efficiency: advance and re-evaluate */ inst = inst->left; goto Switchstmt; @@ -717,12 +718,12 @@ nnl = 0; if(sel.p[0].p1<0 && (!wrapped || p>startp)){ /* Add first instruction to this list */ + /* the minus is so the optimizations in addinst work */ + sempty.p[0].p1 = -p; + if(addinst(tl, bstartinst, &sempty)) if(++ntl >= NLIST) Overflow: error(Eoverflow); - /* the minus is so the optimizations in addinst work */ - sempty.p[0].p1 = -p; - addinst(tl, bstartinst, &sempty); } /* Execute machine until this list is empty */ for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */ @@ -731,9 +732,9 @@ default: /* regular character */ if(inst->type == c){ Addinst: + if(addinst(nl, inst->next, &tlp->se)) if(++nnl >= NLIST) goto Overflow; - addinst(nl, inst->next, &tlp->se); } break; case LBRA: @@ -771,9 +772,9 @@ break; case OR: /* evaluate right choice later */ + if(addinst(tl, inst->right, &tlp->se)) if(++ntl >= NLIST) goto Overflow; - addinst(tlp, inst->right, &tlp->se); /* efficiency: advance and re-evaluate */ inst = inst->left; goto Switchstmt; --- /sys/src/cmd/acme/regx.c Sat Jun 9 07:19:57 2007 +++ /sys/src/cmd/acme/regx.c Sat Jun 9 07:19:57 2007 @@ -49,10 +49,10 @@ uint startp; /* first char of match */ }; -#define NLIST 128 +#define NLIST 127 Ilist *tl, *nl; /* This list, next list */ -Ilist list[2][NLIST]; +Ilist list[2][NLIST+1]; /* +1 for trailing null */ static Rangeset sempty; /* @@ -109,7 +109,7 @@ Rune **class; int negateclass; -void addinst(Ilist *l, Inst *inst, Rangeset *sep); +int addinst(Ilist *l, Inst *inst, Rangeset *sep); void newmatch(Rangeset*); void bnewmatch(Rangeset*); void pushand(Inst*, Inst*); @@ -524,7 +524,7 @@ * *l must be pending when addinst called; if *l has been looked * at already, the optimization is a bug. */ -void +int addinst(Ilist *l, Inst *inst, Rangeset *sep) { Ilist *p; @@ -533,12 +533,13 @@ if(p->inst==inst){ if((sep)->r[0].q0 < p->se.r[0].q0) p->se= *sep; /* this would be bug */ - return; /* It's already there */ + return 0; /* It's already there */ } } p->inst = inst; p->se= *sep; (p+1)->inst = nil; + return 1; } int @@ -609,14 +610,14 @@ nnl = 0; if(sel.r[0].q0<0 && (!wrapped || p= NLIST){ Overflow: warning(nil, "regexp list overflow\n"); sel.r[0].q0 = -1; goto Return; } - sempty.r[0].q0 = p; - addinst(tl, startinst, &sempty); } /* Execute machine until this list is empty */ for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */ @@ -625,9 +626,9 @@ default: /* regular character */ if(inst->type==c){ Addinst: + if(addinst(nl, inst->next, &tlp->se)) if(++nnl >= NLIST) goto Overflow; - addinst(nl, inst->next, &tlp->se); } break; case LBRA: @@ -665,9 +666,9 @@ break; case OR: /* evaluate right choice later */ + if(addinst(tl, inst->right, &tlp->se)) if(++ntl >= NLIST) goto Overflow; - addinst(tlp, inst->right, &tlp->se); /* efficiency: advance and re-evaluate */ inst = inst->left; goto Switchstmt; @@ -746,15 +747,15 @@ nnl = 0; if(sel.r[0].q0<0 && (!wrapped || p>startp)){ /* Add first instruction to this list */ + /* the minus is so the optimizations in addinst work */ + sempty.r[0].q0 = -p; + if(addinst(tl, bstartinst, &sempty)) if(++ntl >= NLIST){ Overflow: warning(nil, "regexp list overflow\n"); sel.r[0].q0 = -1; goto Return; } - /* the minus is so the optimizations in addinst work */ - sempty.r[0].q0 = -p; - addinst(tl, bstartinst, &sempty); } /* Execute machine until this list is empty */ for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */ @@ -763,9 +764,9 @@ default: /* regular character */ if(inst->type == c){ Addinst: + if(addinst(nl, inst->next, &tlp->se)) if(++nnl >= NLIST) goto Overflow; - addinst(nl, inst->next, &tlp->se); } break; case LBRA: @@ -803,9 +804,9 @@ break; case OR: /* evaluate right choice later */ + if(addinst(tl, inst->right, &tlp->se)) if(++ntl >= NLIST) goto Overflow; - addinst(tlp, inst->right, &tlp->se); /* efficiency: advance and re-evaluate */ inst = inst->left; goto Switchstmt;