update to 20070501 version. i've been testing this for 2 years. it should put us in a better position to accept more updates. Reference: /n/sources/patch/maybe/bwk-awk-update Date: Sun Mar 3 22:37:38 CET 2013 Signed-off-by: quanstro@quanstro.net --- /sys/src/cmd/awk/awk.h Sun Mar 3 22:34:28 2013 +++ /sys/src/cmd/awk/awk.h Sun Mar 3 22:34:26 2013 @@ -10,8 +10,10 @@ typedef unsigned char uschar; -#define xfree(a) { if ((a) != NULL) { free((char *) a); a = NULL; } } +#define xfree(a) { if ((a) != NULL) { free((void *) (a)); (a) = NULL; } } +#define NN(p) ((p) ? (p) : "(null)") /* guaranteed non-null for dprintf +*/ #define DEBUG #ifdef DEBUG /* uses have to be doubly parenthesized */ @@ -20,8 +22,6 @@ # define dprintf(x) #endif -extern char errbuf[]; - extern int compile_time; /* 1 if compiling, 0 if running */ extern int safe; /* 0 => unsafe, 1 => safe */ @@ -107,6 +107,7 @@ #define FTOLOWER 13 #define FFLUSH 14 #define FUTF 15 +#define FSTRTONUM 16 /* Node: parse tree is made of nodes, with Cell's at bottom */ @@ -166,8 +167,7 @@ #define isexit(n) ((n)->csub == JEXIT) #define isbreak(n) ((n)->csub == JBREAK) #define iscont(n) ((n)->csub == JCONT) -#define isnext(n) ((n)->csub == JNEXT) -#define isnextfile(n) ((n)->csub == JNEXTFILE) +#define isnext(n) ((n)->csub == JNEXT || (n)->csub == JNEXTFILE) #define isret(n) ((n)->csub == JRET) #define isrec(n) ((n)->tval & REC) #define isfld(n) ((n)->tval & FLD) --- /sys/src/cmd/awk/awkgram.y Sun Mar 3 22:34:31 2013 +++ /sys/src/cmd/awk/awkgram.y Sun Mar 3 22:34:29 2013 @@ -52,7 +52,7 @@ %token NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' %token ARRAY %token MATCH NOTMATCH MATCHOP -%token FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS +%token FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE %token AND BOR APPEND EQ GE GT LE LT NE IN %token ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC %token SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE @@ -318,7 +318,6 @@ stmt: BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); $$ = stat1(BREAK, NIL); } - | CLOSE pattern st { $$ = stat1(CLOSE, $2); } | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); $$ = stat1(CONTINUE, NIL); } | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st @@ -367,6 +366,7 @@ | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } + | CLOSE term { $$ = op1(CLOSE, $2); } | DECR var { $$ = op1(PREDECR, $2); } | INCR var { $$ = op1(PREINCR, $2); } | var DECR { $$ = op1(POSTDECR, $1); } --- /sys/src/cmd/awk/lex.c Sun Mar 3 22:34:34 2013 +++ /sys/src/cmd/awk/lex.c Sun Mar 3 22:34:32 2013 @@ -38,7 +38,7 @@ int parencnt = 0; typedef struct Keyword { - char *word; + const char *word; int sub; int type; } Keyword; @@ -81,6 +81,7 @@ { "sprintf", SPRINTF, SPRINTF }, { "sqrt", FSQRT, BLTIN }, { "srand", FSRAND, BLTIN }, + { "strtonum", FSTRTONUM, BLTIN }, { "sub", SUB, SUB }, { "substr", SUBSTR, SUBSTR }, { "system", FSYSTEM, BLTIN }, @@ -90,12 +91,7 @@ { "while", WHILE, WHILE }, }; -#define DEBUG -#ifdef DEBUG #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } -#else -#define RET(x) return(x) -#endif int peek(void) { @@ -106,7 +102,7 @@ int gettok(char **pbuf, int *psz) /* get next input token */ { - int c; + int c, retc; char *buf = *pbuf; int sz = *psz; char *bp = buf; @@ -123,7 +119,7 @@ if (isalpha(c) || c == '_') { /* it's a varname */ for ( ; (c = input()) != 0; ) { if (bp-buf >= sz) - if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0)) + if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) FATAL( "out of space for name %.10s...", buf ); if (isalnum(c) || c == '_') *bp++ = c; @@ -133,12 +129,14 @@ break; } } - } else { /* it's a number */ + *bp = 0; + retc = 'a'; /* alphanumeric */ + } else { /* maybe it's a number, but could be . */ char *rem; /* read input until can't be a number */ for ( ; (c = input()) != 0; ) { if (bp-buf >= sz) - if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0)) + if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) FATAL( "out of space for number %.10s...", buf ); if (isdigit(c) || c == 'e' || c == 'E' || c == '.' || c == '+' || c == '-') @@ -150,12 +148,19 @@ } *bp = 0; strtod(buf, &rem); /* parse the number */ - unputstr(rem); /* put rest back for later */ - rem[0] = 0; + if (rem == buf) { /* it wasn't a valid number at all */ + buf[1] = 0; /* return one character as token */ + retc = buf[0]; /* character is its own type */ + unputstr(rem+1); /* put rest back for later */ + } else { /* some prefix was a number */ + unputstr(rem); /* put rest back for later */ + rem[0] = 0; /* truncate buf after number part */ + retc = '0'; /* type is number */ + } } *pbuf = buf; *psz = sz; - return buf[0]; + return retc; } int word(char *); @@ -186,7 +191,7 @@ return 0; if (isalpha(c) || c == '_') return word(buf); - if (isdigit(c) || c == '.') { + if (isdigit(c)) { yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab); /* should this also have STR set? */ RET(NUMBER); @@ -295,20 +300,25 @@ input(); yylval.i = POWEQ; RET(ASGNOP); } else RET(POWER); - + case '$': /* BUG: awkward, if not wrong */ c = gettok(&buf, &bufsize); - if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) { - unputstr(buf); - RET(INDIRECT); - } else if (isalpha(c)) { + if (isalpha(c)) { if (strcmp(buf, "NF") == 0) { /* very special */ unputstr("(NF)"); RET(INDIRECT); } + c = peek(); + if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) { + unputstr(buf); + RET(INDIRECT); + } yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab); RET(IVAR); + } else if (c == 0) { /* */ + SYNTAX( "unexpected end of input after $" ); + RET(';'); } else { unputstr(buf); RET(INDIRECT); @@ -356,7 +366,7 @@ if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) FATAL("out of space for strings"); for (bp = buf; (c = input()) != '"'; ) { - if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0)) + if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) FATAL("out of space for string %.10s...", buf); switch (c) { case '\n': @@ -364,6 +374,8 @@ case 0: SYNTAX( "non-terminated string %.10s...", buf ); lineno++; + if (c == 0) /* hopeless */ + FATAL( "giving up" ); break; case '\\': c = input(); @@ -448,12 +460,13 @@ int c, n; n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0])); +/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */ kp = keywords + n; if (n != -1) { /* found in table */ yylval.i = kp->sub; switch (kp->type) { /* special handling */ - case FSYSTEM: - if (safe) + case BLTIN: + if (kp->sub == FSYSTEM && safe) SYNTAX( "system is unsafe" ); RET(kp->type); case FUNC: @@ -485,7 +498,7 @@ } } -void startreg(void) /* next call to yyles will return a regular expression */ +void startreg(void) /* next call to yylex will return a regular expression */ { reg = 1; } @@ -501,7 +514,7 @@ FATAL("out of space for rex expr"); bp = buf; for ( ; (c = input()) != '/' && c != 0; ) { - if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, 0)) + if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) FATAL("out of space for reg expr %.10s...", buf); if (c == '\n') { SYNTAX( "newline in regular expression %.10s...", buf ); @@ -515,6 +528,8 @@ } } *bp = 0; + if (c == 0) + SYNTAX("non-terminated regular expression %.10s...", buf); yylval.s = tostring(buf); unput('/'); RET(REGEXPR); @@ -534,9 +549,9 @@ extern char *lexprog; if (yysptr > yysbuf) - c = *--yysptr; + c = (uschar)*--yysptr; else if (lexprog != NULL) { /* awk '...' */ - if ((c = *lexprog) != 0) + if ((c = (uschar)*lexprog) != 0) lexprog++; } else /* awk -f ... */ c = pgetc(); @@ -560,7 +575,7 @@ ep = ebuf + sizeof(ebuf) - 1; } -void unputstr(char *s) /* put a string back on input */ +void unputstr(const char *s) /* put a string back on input */ { int i; --- /sys/src/cmd/awk/lib.c Sun Mar 3 22:34:38 2013 +++ /sys/src/cmd/awk/lib.c Sun Mar 3 22:34:36 2013 @@ -57,12 +57,11 @@ void recinit(unsigned int n) { - record = (char *) malloc(n); - fields = (char *) malloc(n); - fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *)); - if (record == NULL || fields == NULL || fldtab == NULL) + if ( (record = (char *) malloc(n)) == NULL + || (fields = (char *) malloc(n+1)) == NULL + || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL + || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL ) FATAL("out of space for $0 and fields"); - fldtab[0] = (Cell *) malloc(sizeof (Cell)); *fldtab[0] = dollar0; fldtab[0]->sval = record; fldtab[0]->nval = tostring("0"); @@ -100,12 +99,14 @@ infile = stdin; /* no filenames, so use stdin */ } +static int firsttime = 1; + int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ { /* note: cares whether buf == record */ int c; - static int firsttime = 1; char *buf = *pbuf; - int bufsize = *pbufsize; + uschar saveb0; + int bufsize = *pbufsize, savebufsize = bufsize; if (firsttime) { firsttime = 0; @@ -117,6 +118,7 @@ donefld = 0; donerec = 1; } + saveb0 = buf[0]; buf[0] = 0; while (argno < *ARGC || infile == stdin) { dprintf( ("argno=%d, file=|%s|\n", argno, file) ); @@ -163,14 +165,15 @@ infile = NULL; argno++; } + buf[0] = saveb0; *pbuf = buf; - *pbufsize = bufsize; + *pbufsize = savebufsize; return 0; /* true end of file */ } void nextfile(void) { - if (infile != stdin) + if (infile != NULL && infile != stdin) fclose(infile); infile = NULL; argno++; @@ -307,6 +310,13 @@ } *fr = 0; } else if (*r != 0) { /* if 0, it's a null field */ + /* subtlecase : if length(FS) == 1 && length(RS > 0) + * \n is NOT a field separator (cf awk book 61,84). + * this variable is tested in the inner while loop. + */ + int rtest = '\n'; /* normal case */ + if (strlen(*RS) > 0) + rtest = '\0'; for (;;) { i++; if (i > nfields) @@ -315,7 +325,7 @@ xfree(fldtab[i]->sval); fldtab[i]->sval = fr; fldtab[i]->tval = FLD | STR | DONTFREE; - while (*r != sep && *r != '\n' && *r != '\0') /* \n is always a separator */ + while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */ *fr++ = *r++; *fr++ = 0; if (*r++ == 0) @@ -370,7 +380,7 @@ Cell *fieldadr(int n) /* get nth field */ { if (n < 0) - FATAL("trying to access field %d", n); + FATAL("trying to access out of range field %d", n); if (n > nfields) /* fields after NF are empty */ growfldtab(n); /* but does not increase NF */ return(fldtab[n]); @@ -379,17 +389,22 @@ void growfldtab(int n) /* make new fields up to at least $n */ { int nf = 2 * nfields; + size_t s; if (n > nf) nf = n; - fldtab = (Cell **) realloc(fldtab, (nf+1) * (sizeof (struct Cell *))); + s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */ + if (s / sizeof(struct Cell *) - 1 == nf) /* didn't overflow */ + fldtab = (Cell **) realloc(fldtab, s); + else /* overflow sizeof int */ + xfree(fldtab); /* make it null */ if (fldtab == NULL) FATAL("out of space creating %d fields", nf); makefields(nfields+1, nf); nfields = nf; } -int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */ +int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */ { /* this relies on having fields[] the same length as $0 */ /* the fields are all stored in this one array with \0's */ @@ -471,12 +486,12 @@ int errorflag = 0; -void yyerror(char *s) +void yyerror(const char *s) { - SYNTAX(s); + SYNTAX("%s", s); } -void SYNTAX(char *fmt, ...) +void SYNTAX(const char *fmt, ...) { extern char *cmdname, *curfname; static int been_here = 0; @@ -532,7 +547,7 @@ fprintf(stderr, "\t%d extra %c's\n", -n, c2); } -void FATAL(char *fmt, ...) +void FATAL(const char *fmt, ...) { extern char *cmdname; va_list varg; @@ -548,7 +563,7 @@ exit(2); } -void WARNING(char *fmt, ...) +void WARNING(const char *fmt, ...) { extern char *cmdname; va_list varg; @@ -587,6 +602,8 @@ fprintf(stderr, " source file %s", cursource()); }else if(line >= 0) fprintf(stderr, " source line %d", line); + else + fprintf(stderr, " unknown location"); fprintf(stderr, "\n"); eprint(); } @@ -639,7 +656,7 @@ } } -double errcheck(double x, char *s) +double errcheck(double x, const char *s) { if (errno == EDOM) { @@ -654,22 +671,48 @@ return x; } -int isclvar(char *s) /* is s of form var=something ? */ +int isclvar(const char *s) /* is s of form var=something ? */ { - char *os = s; + const char *os = s; - if (!isalpha(*s) && *s != '_') + if (!isalpha((uschar) *s) && *s != '_') return 0; for ( ; *s; s++) - if (!(isalnum(*s) || *s == '_')) + if (!(isalnum((uschar) *s) || *s == '_')) break; return *s == '=' && s > os && *(s+1) != '='; } /* strtod is supposed to be a proper test of what's a valid number */ +/* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */ +/* wrong: violates 4.10.1.4 of ansi C standard */ + +static char ntab[128] = { +['\n'] 1, +['\v'] 1, +[' '] 1, +['\t'] 1, +['0'] 1, +['1'] 1, +['2'] 1, +['3'] 1, +['4'] 1, +['5'] 1, +['6'] 1, +['7'] 1, +['8'] 1, +['9'] 1, +['-'] 1, +['+'] 1, +['.'] 1, +['n'] 1, /* nans */ +['N'] 1, +['i'] 1, /* infs */ +['I'] 1, +}; #include -int is_number(char *s) +int is_number(const char *s) { double r; char *ep; @@ -678,26 +721,8 @@ * fast could-it-be-a-number check before calling strtod, * which takes a surprisingly long time to reject non-numbers. */ - switch (*s) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case '\t': - case '\n': - case '\v': - case '\f': - case '\r': - case ' ': - case '-': - case '+': - case '.': - case 'n': /* nans */ - case 'N': - case 'i': /* infs */ - case 'I': - break; - default: + if(*s >= 0x80 || ntab[*s] == 0) return 0; /* can't be a number */ - } errno = 0; r = strtod(s, &ep); --- /sys/src/cmd/awk/main.c Sun Mar 3 22:34:40 2013 +++ /sys/src/cmd/awk/main.c Sun Mar 3 22:34:39 2013 @@ -22,11 +22,12 @@ THIS SOFTWARE. ****************************************************************/ -char *version = "version 19990602"; +char *version = "version 20070501"; #define DEBUG #include #include +#include #include #include #include @@ -44,7 +45,9 @@ int compile_time = 2; /* for error printing: */ /* 2 = cmdline, 1 = compile, 0 = running */ -char *pfile[20]; /* program filenames from -f's */ +#define MAX_PFILE 20 /* max number of -f's */ + +char *pfile[MAX_PFILE]; /* program filenames from -f's */ int npfile = 0; /* number of filenames */ int curpfile = 0; /* current filename */ @@ -52,19 +55,27 @@ int main(int argc, char *argv[]) { - char *fs = NULL, *marg; - int temp; + const char *fs = NULL; + setlocale(LC_CTYPE, ""); + setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */ cmdname = argv[0]; if (argc == 1) { - fprintf(stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname); + fprintf(stderr, + "usage: %s [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]\n", + cmdname); exit(1); } signal(SIGFPE, fpecatch); yyin = NULL; symtab = makesymtab(NSYMTAB); while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') { - if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ + if (strcmp(argv[1],"-version") == 0 || strcmp(argv[1],"--version") == 0) { + printf("awk %s\n", version); + exit(0); + break; + } + if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */ argc--; argv++; break; @@ -79,6 +90,8 @@ argv++; if (argc <= 1) FATAL("no program filename"); + if (npfile >= MAX_PFILE - 1) + FATAL("too many -f options"); pfile[npfile++] = argv[1]; break; case 'F': /* set field separator */ @@ -101,21 +114,6 @@ if (argv[1][2] == '\0' && --argc > 1 && isclvar((++argv)[1])) setclvar(argv[1]); break; - case 'm': /* more memory: -mr=record, -mf=fields */ - /* no longer needed */ - marg = argv[1]; - if (argv[1][3]) - temp = atoi(&argv[1][3]); - else { - argv++; argc--; - temp = atoi(&argv[1][0]); - } - switch (marg[2]) { - case 'r': recsize = temp; break; - case 'f': nfields = temp; break; - default: FATAL("unknown option %s\n", marg); - } - break; case 'd': dbg = atoi(&argv[1][2]); if (dbg == 0) @@ -154,6 +152,7 @@ if (!safe) envinit(environ); yyparse(); + setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */ if (fs) *FS = qstring(fs, '\0'); dprintf( ("errorflag=%d\n", errorflag) ); --- /sys/src/cmd/awk/maketab.c Sun Mar 3 22:34:43 2013 +++ /sys/src/cmd/awk/maketab.c Sun Mar 3 22:34:41 2013 @@ -25,7 +25,7 @@ /* * this program makes the table to link function names * and type indices that is used by execute() in run.c. - * it finds the indices in y.tab.h, produced by yacc. + * it finds the indices in ytab.h, produced by yacc. */ #include @@ -36,8 +36,8 @@ struct xx { int token; - char *name; - char *pname; + const char *name; + const char *pname; } proc[] = { { PROGRAM, "program", NULL }, { BOR, "boolop", " || " }, @@ -107,12 +107,12 @@ }; #define SIZE (LASTTOKEN - FIRSTTOKEN + 1) -char *table[SIZE]; +const char *table[SIZE]; char *names[SIZE]; int main(int argc, char *argv[]) { - struct xx *p; + const struct xx *p; int i, n, tok; char c; FILE *fp; @@ -135,8 +135,8 @@ if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */ continue; if (tok < FIRSTTOKEN || tok > LASTTOKEN) { - fprintf(stderr, "maketab funny token %d %s\n", tok, buf); - exit(1); + /* fprintf(stderr, "maketab funny token %d %s ignored\n", tok, buf); */ + continue; } names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1); strcpy(names[tok-FIRSTTOKEN], name); --- /sys/src/cmd/awk/parse.c Sun Mar 3 22:34:45 2013 +++ /sys/src/cmd/awk/parse.c Sun Mar 3 22:34:44 2013 @@ -239,6 +239,11 @@ SYNTAX( "`%s' is an array name and a function name", v->nval ); return; } + if (isarg(v->nval) != -1) { + SYNTAX( "`%s' is both function name and argument name", v->nval ); + return; + } + v->tval = FCN; v->sval = (char *) st; n = 0; /* count arguments */ @@ -248,7 +253,7 @@ dprintf( ("defining func %s (%d args)\n", v->nval, n) ); } -int isarg(char *s) /* is s in argument list for current function? */ +int isarg(const char *s) /* is s in argument list for current function? */ { /* return -1 if not, otherwise arg # */ extern Node *arglist; Node *p = arglist; --- /sys/src/cmd/awk/proctab.c Sun Mar 3 22:34:47 2013 +++ /sys/src/cmd/awk/proctab.c Sun Mar 3 22:34:46 2013 @@ -2,7 +2,7 @@ #include "awk.h" #include "y.tab.h" -static char *printname[92] = { +static char *printname[93] = { (char *) "FIRSTTOKEN", /* 57346 */ (char *) "PROGRAM", /* 57347 */ (char *) "PASTAT", /* 57348 */ @@ -24,81 +24,82 @@ (char *) "STAR", /* 57364 */ (char *) "QUEST", /* 57365 */ (char *) "PLUS", /* 57366 */ - (char *) "AND", /* 57367 */ - (char *) "BOR", /* 57368 */ - (char *) "APPEND", /* 57369 */ - (char *) "EQ", /* 57370 */ - (char *) "GE", /* 57371 */ - (char *) "GT", /* 57372 */ - (char *) "LE", /* 57373 */ - (char *) "LT", /* 57374 */ - (char *) "NE", /* 57375 */ - (char *) "IN", /* 57376 */ - (char *) "ARG", /* 57377 */ - (char *) "BLTIN", /* 57378 */ - (char *) "BREAK", /* 57379 */ - (char *) "CLOSE", /* 57380 */ - (char *) "CONTINUE", /* 57381 */ - (char *) "DELETE", /* 57382 */ - (char *) "DO", /* 57383 */ - (char *) "EXIT", /* 57384 */ - (char *) "FOR", /* 57385 */ - (char *) "FUNC", /* 57386 */ - (char *) "SUB", /* 57387 */ - (char *) "GSUB", /* 57388 */ - (char *) "IF", /* 57389 */ - (char *) "INDEX", /* 57390 */ - (char *) "LSUBSTR", /* 57391 */ - (char *) "MATCHFCN", /* 57392 */ - (char *) "NEXT", /* 57393 */ - (char *) "NEXTFILE", /* 57394 */ - (char *) "ADD", /* 57395 */ - (char *) "MINUS", /* 57396 */ - (char *) "MULT", /* 57397 */ - (char *) "DIVIDE", /* 57398 */ - (char *) "MOD", /* 57399 */ - (char *) "ASSIGN", /* 57400 */ - (char *) "ASGNOP", /* 57401 */ - (char *) "ADDEQ", /* 57402 */ - (char *) "SUBEQ", /* 57403 */ - (char *) "MULTEQ", /* 57404 */ - (char *) "DIVEQ", /* 57405 */ - (char *) "MODEQ", /* 57406 */ - (char *) "POWEQ", /* 57407 */ - (char *) "PRINT", /* 57408 */ - (char *) "PRINTF", /* 57409 */ - (char *) "SPRINTF", /* 57410 */ - (char *) "ELSE", /* 57411 */ - (char *) "INTEST", /* 57412 */ - (char *) "CONDEXPR", /* 57413 */ - (char *) "POSTINCR", /* 57414 */ - (char *) "PREINCR", /* 57415 */ - (char *) "POSTDECR", /* 57416 */ - (char *) "PREDECR", /* 57417 */ - (char *) "VAR", /* 57418 */ - (char *) "IVAR", /* 57419 */ - (char *) "VARNF", /* 57420 */ - (char *) "CALL", /* 57421 */ - (char *) "NUMBER", /* 57422 */ - (char *) "STRING", /* 57423 */ - (char *) "REGEXPR", /* 57424 */ - (char *) "GETLINE", /* 57425 */ - (char *) "RETURN", /* 57426 */ - (char *) "SPLIT", /* 57427 */ - (char *) "SUBSTR", /* 57428 */ - (char *) "WHILE", /* 57429 */ - (char *) "CAT", /* 57430 */ - (char *) "NOT", /* 57431 */ - (char *) "UMINUS", /* 57432 */ - (char *) "POWER", /* 57433 */ - (char *) "DECR", /* 57434 */ - (char *) "INCR", /* 57435 */ - (char *) "INDIRECT", /* 57436 */ - (char *) "LASTTOKEN", /* 57437 */ + (char *) "EMPTYRE", /* 57367 */ + (char *) "AND", /* 57368 */ + (char *) "BOR", /* 57369 */ + (char *) "APPEND", /* 57370 */ + (char *) "EQ", /* 57371 */ + (char *) "GE", /* 57372 */ + (char *) "GT", /* 57373 */ + (char *) "LE", /* 57374 */ + (char *) "LT", /* 57375 */ + (char *) "NE", /* 57376 */ + (char *) "IN", /* 57377 */ + (char *) "ARG", /* 57378 */ + (char *) "BLTIN", /* 57379 */ + (char *) "BREAK", /* 57380 */ + (char *) "CLOSE", /* 57381 */ + (char *) "CONTINUE", /* 57382 */ + (char *) "DELETE", /* 57383 */ + (char *) "DO", /* 57384 */ + (char *) "EXIT", /* 57385 */ + (char *) "FOR", /* 57386 */ + (char *) "FUNC", /* 57387 */ + (char *) "SUB", /* 57388 */ + (char *) "GSUB", /* 57389 */ + (char *) "IF", /* 57390 */ + (char *) "INDEX", /* 57391 */ + (char *) "LSUBSTR", /* 57392 */ + (char *) "MATCHFCN", /* 57393 */ + (char *) "NEXT", /* 57394 */ + (char *) "NEXTFILE", /* 57395 */ + (char *) "ADD", /* 57396 */ + (char *) "MINUS", /* 57397 */ + (char *) "MULT", /* 57398 */ + (char *) "DIVIDE", /* 57399 */ + (char *) "MOD", /* 57400 */ + (char *) "ASSIGN", /* 57401 */ + (char *) "ASGNOP", /* 57402 */ + (char *) "ADDEQ", /* 57403 */ + (char *) "SUBEQ", /* 57404 */ + (char *) "MULTEQ", /* 57405 */ + (char *) "DIVEQ", /* 57406 */ + (char *) "MODEQ", /* 57407 */ + (char *) "POWEQ", /* 57408 */ + (char *) "PRINT", /* 57409 */ + (char *) "PRINTF", /* 57410 */ + (char *) "SPRINTF", /* 57411 */ + (char *) "ELSE", /* 57412 */ + (char *) "INTEST", /* 57413 */ + (char *) "CONDEXPR", /* 57414 */ + (char *) "POSTINCR", /* 57415 */ + (char *) "PREINCR", /* 57416 */ + (char *) "POSTDECR", /* 57417 */ + (char *) "PREDECR", /* 57418 */ + (char *) "VAR", /* 57419 */ + (char *) "IVAR", /* 57420 */ + (char *) "VARNF", /* 57421 */ + (char *) "CALL", /* 57422 */ + (char *) "NUMBER", /* 57423 */ + (char *) "STRING", /* 57424 */ + (char *) "REGEXPR", /* 57425 */ + (char *) "GETLINE", /* 57426 */ + (char *) "RETURN", /* 57427 */ + (char *) "SPLIT", /* 57428 */ + (char *) "SUBSTR", /* 57429 */ + (char *) "WHILE", /* 57430 */ + (char *) "CAT", /* 57431 */ + (char *) "NOT", /* 57432 */ + (char *) "UMINUS", /* 57433 */ + (char *) "POWER", /* 57434 */ + (char *) "DECR", /* 57435 */ + (char *) "INCR", /* 57436 */ + (char *) "INDIRECT", /* 57437 */ + (char *) "LASTTOKEN", /* 57438 */ }; -Cell *(*proctab[92])(Node **, int) = { +Cell *(*proctab[93])(Node **, int) = { nullproc, /* FIRSTTOKEN */ program, /* PROGRAM */ pastat, /* PASTAT */ @@ -120,6 +121,7 @@ nullproc, /* STAR */ nullproc, /* QUEST */ nullproc, /* PLUS */ + nullproc, /* EMPTYRE */ boolop, /* AND */ boolop, /* BOR */ nullproc, /* APPEND */ --- /sys/src/cmd/awk/proto.h Sun Mar 3 22:34:50 2013 +++ /sys/src/cmd/awk/proto.h Sun Mar 3 22:34:48 2013 @@ -33,12 +33,14 @@ extern void startreg(void); extern int input(void); extern void unput(int); -extern void unputstr(char *); +extern void unputstr(const char *); extern int yylook(void); extern int yyback(int *, int); extern int yyinput(void); extern void *compre(char *); +extern void penter(Node *); +extern void freetr(Node *); extern int hexstr(char **); extern void quoted(char **, char **, char *); extern int match(void *, char *, char *); @@ -70,7 +72,7 @@ extern Node *pa2stat(Node *, Node *, Node *); extern Node *linkum(Node *, Node *); extern void defn(Cell *, Node *, Node *); -extern int isarg(char *); +extern int isarg(const char *); extern char *tokname(int); extern Cell *(*proctab[])(Node **, int); extern int ptoi(void *); @@ -81,18 +83,20 @@ extern void envinit(char **); extern Array *makesymtab(int); extern void freesymtab(Cell *); -extern void freeelem(Cell *, char *); -extern Cell *setsymtab(char *, char *, double, unsigned int, Array *); -extern int hash(char *, int); +extern void freeelem(Cell *, const char *); +extern Cell *setsymtab(const char *, const char *, double, unsigned int, Array *); +extern int hash(const char *, int); extern void rehash(Array *); -extern Cell *lookup(char *, Array *); +extern Cell *lookup(const char *, Array *); extern double setfval(Cell *, double); -extern void funnyvar(Cell *, char *); -extern char *setsval(Cell *, char *); +extern void funnyvar(Cell *, const char *); +extern char *setsval(Cell *, const char *); extern double getfval(Cell *); +extern unsigned long getival(Cell *, unsigned long, unsigned long); extern char *getsval(Cell *); -extern char *tostring(char *); -extern char *qstring(char *, int); +extern char *getpssval(Cell *); /* for print */ +extern char *tostring(const char *); +extern char *qstring(const char *, int); extern void recinit(unsigned int); extern void initgetrec(void); @@ -106,24 +110,24 @@ extern void fldbld(void); extern void cleanfld(int, int); extern void newfld(int); -extern int refldbld(char *, char *); +extern int refldbld(const char *, const char *); extern void recbld(void); extern Cell *fieldadr(int); -extern void yyerror(char *); +extern void yyerror(const char *); extern void fpecatch(int); extern void bracecheck(void); extern void bcheck2(int, int, int); -extern void SYNTAX(char *, ...); -extern void FATAL(char *, ...); -extern void WARNING(char *, ...); +extern void SYNTAX(const char *, ...); +extern void FATAL(const char *, ...); +extern void WARNING(const char *, ...); extern void error(void); extern void eprint(void); extern void bclass(int); -extern double errcheck(double, char *); -extern int isclvar(char *); -extern int is_number(char *); +extern double errcheck(double, const char *); +extern int isclvar(const char *); +extern int is_number(const char *); -extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, char *what); +extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what); extern void run(Node *); extern Cell *execute(Node *); extern Cell *program(Node **, int); @@ -145,7 +149,7 @@ extern Cell *indirect(Node **, int); extern Cell *substr(Node **, int); extern Cell *sindex(Node **, int); -extern int format(char **, int *, char *, Node *); +extern int format(char **, int *, const char *, Node *); extern Cell *awksprintf(Node **, int); extern Cell *awkprintf(Node **, int); extern Cell *arith(Node **, int); @@ -166,8 +170,8 @@ extern Cell *printstat(Node **, int); extern Cell *nullproc(Node **, int); extern FILE *redirect(int, Node *); -extern FILE *openfile(int, char *); -extern char *filename(FILE *); +extern FILE *openfile(int, const char *); +extern const char *filename(FILE *); extern Cell *closefile(Node **, int); extern void closeall(void); extern Cell *sub(Node **, int); --- /sys/src/cmd/awk/run.c Sun Mar 3 22:34:56 2013 +++ /sys/src/cmd/awk/run.c Sun Mar 3 22:34:52 2013 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -49,19 +50,22 @@ } */ -#ifdef _NFILE -#ifndef FOPEN_MAX -#define FOPEN_MAX _NFILE -#endif -#endif - -#ifndef FOPEN_MAX -#define FOPEN_MAX 40 /* max number of open files */ -#endif - -#ifndef RAND_MAX -#define RAND_MAX 32767 /* all that ansi guarantees */ -#endif +#define Imax ULONG_MAX + +/* do we really need these? */ +/* #ifdef _NFILE */ +/* #ifndef FOPEN_MAX */ +/* #define FOPEN_MAX _NFILE */ +/* #endif */ +/* #endif */ +/* */ +/* #ifndef FOPEN_MAX */ +/* #define FOPEN_MAX 40 */ /* max number of open files */ +/* #endif */ +/* */ +/* #ifndef RAND_MAX */ +/* #define RAND_MAX 32767 */ /* all that ansi guarantees */ +/* #endif */ jmp_buf env; extern int pairstack[]; @@ -91,7 +95,7 @@ /* buffer memory management */ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, - char *whatrtn) + const char *whatrtn) /* pbuf: address of pointer to buffer being managed * psiz: address of buffer size variable * minlen: minimum length of buffer needed @@ -110,6 +114,7 @@ if (rminlen) minlen += quantum - rminlen; tbuf = (char *) realloc(*pbuf, minlen); + dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) ); if (tbuf == NULL) { if (whatrtn) FATAL("out of memory in %s", whatrtn); @@ -134,7 +139,6 @@ Cell *execute(Node *u) /* execute a node of the parse tree */ { - int nobj; Cell *(*proc)(Node **, int); Cell *x; Node *a; @@ -151,11 +155,10 @@ recbld(); return(x); } - nobj = a->nobj; - if (notlegal(nobj)) /* probably a Cell* but too risky to print */ + if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ FATAL("illegal statement"); - proc = proctab[nobj-FIRSTTOKEN]; - x = (*proc)(a->narg, nobj); + proc = proctab[a->nobj-FIRSTTOKEN]; + x = (*proc)(a->narg, a->nobj); if (isfld(x) && !donefld) fldbld(); else if (isrec(x) && !donerec) @@ -222,6 +225,7 @@ { static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE }; int i, ncall, ndef; + int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ Node *x; Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ Cell *y, *z, *fcn; @@ -250,7 +254,7 @@ y = execute(x); oargs[i] = y; dprintf( ("args[%d]: %s %f <%s>, t=%o\n", - i, y->nval, y->fval, isarr(y) ? "(array)" : y->sval, y->tval) ); + i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) ); if (isfcn(y)) FATAL("can't use function %s as argument in %s", y->nval, s); if (isarr(y)) @@ -299,12 +303,18 @@ } else if (t != y) { /* kludge to prevent freeing twice */ t->csub = CTEMP; tempfree(t); + } else if (t == y && t->csub == CCOPY) { + t->csub = CTEMP; + tempfree(t); + freed = 1; } } tempfree(fcn); - if (isexit(y) || isnext(y) || isnextfile(y)) + if (isexit(y) || isnext(y)) return y; - tempfree(y); /* this can free twice! */ + if (freed == 0) { + tempfree(y); /* don't free twice! */ + } z = fp->retval; /* return value */ dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); fp--; @@ -318,7 +328,8 @@ y = gettemp(); y->csub = CCOPY; /* prevents freeing until call is over */ y->nval = x->nval; /* BUG? */ - y->sval = x->sval ? tostring(x->sval) : NULL; + if (isstr(x)) + y->sval = tostring(x->sval); y->fval = x->fval; y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */ /* is DONTFREE right? */ @@ -344,7 +355,7 @@ case EXIT: if (a[0] != NULL) { y = execute(a[0]); - errorflag = (int) getfval(y); + errorflag = getfval(y) != 0.; tempfree(y); } longjmp(env, 1); @@ -457,7 +468,7 @@ for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); - if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, 0)) + if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array")) FATAL("out of memory for %s[%s...]", x->nval, buf); strcat(buf, s); if (np->nnext) @@ -465,7 +476,7 @@ tempfree(y); } if (!isarr(x)) { - dprintf( ("making %s into an array\n", x->nval) ); + dprintf( ("making %s into an array\n", NN(x->nval)) ); if (freeable(x)) xfree(x->sval); x->tval &= ~(STR|NUM|DONTFREE); @@ -504,7 +515,7 @@ for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); - if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, 0)) + if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); strcat(buf, s); if (np->nnext) @@ -543,7 +554,7 @@ for (p = a[0]; p; p = p->nnext) { x = execute(p); /* expr */ s = getsval(x); - if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, 0)) + if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); strcat(buf, s); tempfree(x); @@ -669,7 +680,7 @@ void tfree(Cell *a) /* free a tempcell */ { if (freeable(a)) { - dprintf( ("freeing %s %s %o\n", a->nval, a->sval, a->tval) ); + dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) ); xfree(a->sval); } if (a == tmps) @@ -698,12 +709,16 @@ Cell *indirect(Node **a, int n) /* $( a[0] ) */ { + Awkfloat val; Cell *x; int m; char *s; x = execute(a[0]); - m = (int) getfval(x); + val = getfval(x); /* freebsd: defend against super large field numbers */ + if ((Awkfloat)INT_MAX < val) + FATAL("trying to access out of range field %s", x->nval); + m = (int) val; if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ FATAL("illegal field $(%s), name \"%s\"", s, x->nval); /* BUG: can x->nval ever be null??? */ @@ -730,37 +745,27 @@ if (k <= 1) { tempfree(x); tempfree(y); - if (a[2] != 0) + if (a[2] != 0) { tempfree(z); + } x = gettemp(); setsval(x, ""); return(x); } - m = (int) getfval(y); - if (m <= 0) - m = 1; - else if (m > k) - m = k; + m = getival(y, 1, k); /* 1 <= m <= k */ tempfree(y); if (a[2] != 0) { - n = (int) getfval(z); + n = getival(z, 0, k-m); + /* n <= 0 <= k-m */ tempfree(z); } else n = k - 1; - if (n < 0) - n = 0; - else if (n > k - m) - n = k - m; dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) ); y = gettemp(); - while (*s && --m) - s += mblen(s, k); - for (p = s; *p && n--; p += mblen(p, k)) - ; - temp = *p; /* with thanks to John Linderman */ - *p = '\0'; - setsval(y, s); - *p = temp; + temp = s[n+m-1]; /* with thanks to John Linderman */ + s[n+m-1] = '\0'; + setsval(y, s + m - 1); + s[n+m-1] = temp; tempfree(x); return(y); } @@ -793,12 +798,13 @@ #define MAXNUMSIZE 50 -int format(char **pbuf, int *pbufsize, char *s, Node *a) /* printf-like conversions */ +int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ { char *fmt; - char *p, *t, *os; + char *p, *t; + const char *os; Cell *x; - int flag = 0, n; + int flag = 0, n, ch; int fmtwd; /* format width */ int fmtsz = recsize; char *buf = *pbuf; @@ -809,7 +815,7 @@ if ((fmt = (char *) malloc(fmtsz)) == NULL) FATAL("out of memory in format()"); while (*s) { - adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format"); + adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); if (*s != '%') { *p++ = *s++; continue; @@ -823,16 +829,16 @@ fmtwd = atoi(s+1); if (fmtwd < 0) fmtwd = -fmtwd; - adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); + adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); for (t = fmt; (*t++ = *s) != '\0'; s++) { - if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, 0)) + if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) FATAL("format item %.30s... ran format() out of memory", os); - if (isalpha(*s) && *s != 'l' && *s != 'h' && *s != 'L') + if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L') break; /* the ansi panoply */ if (*s == '*') { x = execute(a); a = a->nnext; - sprintf(t-1, "%d", fmtwd=(int) getfval(x)); + sprintf(t-1, "%d", fmtwd=getival(x, Imax, Imax)); if (fmtwd < 0) fmtwd = -fmtwd; adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); @@ -843,31 +849,30 @@ *t = '\0'; if (fmtwd < 0) fmtwd = -fmtwd; - adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); - + adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); switch (*s) { case 'f': case 'e': case 'g': case 'E': case 'G': - flag = 1; + flag = 'f'; break; case 'd': case 'i': - flag = 2; + flag = 'd'; if(*(s-1) == 'l') break; *(t-1) = 'l'; *t = 'd'; *++t = '\0'; break; case 'o': case 'x': case 'X': case 'u': - flag = *(s-1) == 'l' ? 2 : 3; + flag = *(s-1) == 'l' ? 'd' : 'u'; break; case 's': - flag = 4; + flag = 's'; break; case 'c': - flag = 5; + flag = 'c'; break; default: WARNING("weird printf conversion %s", fmt); - flag = 0; + flag = '?'; break; } if (a == NULL) @@ -877,40 +882,43 @@ n = MAXNUMSIZE; if (fmtwd > n) n = fmtwd; - adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format"); + adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); switch (flag) { - case 0: sprintf(p, "%s", fmt); /* unknown, so dump it too */ + case '?': sprintf(p, "%s", fmt); /* unknown, so dump it too */ t = getsval(x); n = strlen(t); if (fmtwd > n) n = fmtwd; - adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format"); + adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); p += strlen(p); sprintf(p, "%s", t); break; - case 1: sprintf(p, fmt, getfval(x)); break; - case 2: sprintf(p, fmt, (long) getfval(x)); break; - case 3: sprintf(p, fmt, (int) getfval(x)); break; - case 4: + case 'f': sprintf(p, fmt, getfval(x)); break; + case 'd': sprintf(p, fmt, getival(x, Imax, Imax)); break; + case 'u': sprintf(p, fmt, getival(x, Imax, Imax)); break; + case 's': t = getsval(x); n = strlen(t); if (fmtwd > n) n = fmtwd; - if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, 0)) + if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); sprintf(p, fmt, t); break; - case 5: + case 'c': if (isnum(x)) { - if (getfval(x)) - sprintf(p, fmt, (int) getfval(x)); - else{ - *p++ = '\0'; - *p = '\0'; + ch = getival(x, Imax, Imax); + if (ch) + sprintf(p, fmt, ch); + else { + *p++ = '\0'; /* explicit null byte */ + *p = '\0'; /* next output will start here */ } } else sprintf(p, fmt, getsval(x)[0]); break; + default: + FATAL("can't happen: bad conversion %c in format()", flag); } tempfree(x); p += strlen(p); @@ -1195,9 +1203,10 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ { Cell *x = 0, *y, *ap; - char *s, *t, *fs = 0; - char temp, num[50]; - int n, nb, sep, tempstat, arg3type; + char *s; + int sep; + char *t, temp, num[50], *fs = 0; + int n, tempstat, arg3type; y = execute(a[0]); /* source string */ s = getsval(y); @@ -1213,14 +1222,17 @@ FATAL("illegal type of split"); sep = *fs; ap = execute(a[1]); /* array name */ + n = y->tval; + y->tval |= DONTFREE; /* split(a[x], a); */ freesymtab(ap); - dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs) ); + y->tval = n; + dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) ); ap->tval &= ~STR; ap->tval |= ARR; ap->sval = (char *) makesymtab(NSYMTAB); n = 0; - if ((*s != '\0' && strlen(fs) > 1) || arg3type == REGEXPR) { /* reg expr */ + if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ void *p; if (arg3type == REGEXPR) { /* it's ready already */ p = (void *) a[2]; @@ -1279,6 +1291,7 @@ s++; } } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ + int nb; for (n = 0; *s != 0; s += nb) { Rune r; char buf[UTFmax+1]; @@ -1288,7 +1301,7 @@ nb = chartorune(&r, s); memmove(buf, s, nb); buf[nb] = '\0'; - if (isdigit(buf[0])) + if (isdigit((uschar)buf[0])) setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); else setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); @@ -1313,8 +1326,9 @@ } tempfree(ap); tempfree(y); - if (a[2] != 0 && arg3type == STRING) + if (a[2] != 0 && arg3type == STRING) { tempfree(x); + } x = gettemp(); x->tval = NUM; x->fval = n; @@ -1379,7 +1393,7 @@ x = execute(a[0]); if (isbreak(x)) return True; - if (isnext(x) || isnextfile(x) || isexit(x) || isret(x)) + if (isnext(x) || isexit(x) || isret(x)) return(x); tempfree(x); x = execute(a[1]); @@ -1444,11 +1458,33 @@ return True; } +Awkfloat +strtonum(Cell *x) +{ + char *r, *e; + + r = getsval(x); + if(r[0] == '+' || r[0] == '-') + r++; + if(r[0] == '0'){ + if(r[1] >= '0' && r[1] <= '9'){ + for(; *r; r++) + if(r[0] < '0' || r[0] > '7') + goto dflt; + return (Awkfloat)strtoll(getsval(x), 0, 8); + } + if(r[1] == 'x' || r[1] == 'X') + return (Awkfloat)strtoll(getsval(x), 0, 0); + } +dflt: + return strtod(r, &e); +} + Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ { Cell *x, *y; Awkfloat u; - int t; + int t, r; wchar_t wc; char *p, *buf; char mbc[50]; @@ -1462,7 +1498,7 @@ switch (t) { case FLENGTH: if (isarr(x)) - u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ + u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ else { p = getsval(x); u = (Awkfloat) countposn(p, strlen(p)); @@ -1511,12 +1547,12 @@ buf = tostring(getsval(x)); if (t == FTOUPPER) { for (p = buf; *p; p++) - if (islower(*p)) - *p = toupper(*p); + if (islower((uschar) *p)) + *p = toupper((uschar)*p); } else { for (p = buf; *p; p++) - if (isupper(*p)) - *p = tolower(*p); + if (isupper((uschar) *p)) + *p = tolower((uschar)*p); } tempfree(x); x = gettemp(); @@ -1533,12 +1569,22 @@ u = fflush(fp); break; case FUTF: - wc = (int)getfval(x); + r = getival(x, Imax, Imax); + if(r < 0) + r = -r; + if(r > Runemax){ + WARNING("utf argument out-of-range %g", getfval(x)); + r = Runeerror; + } + wc = r; mbc[wctomb(mbc, wc)] = 0; tempfree(x); x = gettemp(); setsval(x, mbc); return x; + case FSTRTONUM: + u = strtonum(x); + break; default: /* can't happen */ FATAL("illegal function type %d", t); break; @@ -1556,7 +1602,6 @@ Cell *printstat(Node **a, int n) /* print a[0] */ { - int r; Node *x; Cell *y; FILE *fp; @@ -1567,18 +1612,17 @@ fp = redirect(ptoi(a[1]), a[2]); for (x = a[0]; x != NULL; x = x->nnext) { y = execute(x); - fputs(getsval(y), fp); + fputs(getpssval(y), fp); tempfree(y); if (x->nnext == NULL) - r = fputs(*ORS, fp); + fputs(*ORS, fp); else - r = fputs(*OFS, fp); - if (r == EOF) - FATAL("write error on %s", filename(fp)); + fputs(*OFS, fp); } if (a[1] != 0) - if (fflush(fp) == EOF) - FATAL("write error on %s", filename(fp)); + fflush(fp); + if (ferror(fp)) + FATAL("write error on %s", filename(fp)); return(True); } @@ -1607,7 +1651,7 @@ struct files { FILE *fp; - char *fname; + const char *fname; int mode; /* '|', 'a', 'w' => LE/LT, GT */ } files[FOPEN_MAX] ={ { NULL, "/dev/stdin", LT }, /* watch out: don't free this! */ @@ -1622,9 +1666,9 @@ files[2].fp = stderr; } -FILE *openfile(int a, char *us) +FILE *openfile(int a, const char *us) { - char *s = us; + const char *s = us; int i, m; FILE *fp = 0; @@ -1668,7 +1712,7 @@ return fp; } -char *filename(FILE *fp) +const char *filename(FILE *fp) { int i; @@ -1686,7 +1730,8 @@ n = n; x = execute(a[0]); getsval(x); - for (i = 0; i < FOPEN_MAX; i++) + stat = -1; + for (i = 0; i < FOPEN_MAX; i++) { if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) { if (ferror(files[i].fp)) WARNING( "i/o error occurred on %s", files[i].fname ); @@ -1701,15 +1746,18 @@ files[i].fname = NULL; /* watch out for ref thru this */ files[i].fp = NULL; } + } tempfree(x); - return(True); + x = gettemp(); + setfval(x, (Awkfloat) stat); + return(x); } void closeall(void) { int i, stat; - for (i = 0; i < FOPEN_MAX; i++) + for (i = 0; i < FOPEN_MAX; i++) { if (files[i].fp) { if (ferror(files[i].fp)) WARNING( "i/o error occurred on %s", files[i].fname ); @@ -1720,6 +1768,7 @@ if (stat == EOF) WARNING( "i/o error occurred while closing %s", files[i].fname ); } + } } void flush_all(void) @@ -1796,13 +1845,13 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ { Cell *x, *y; - char *rptr, *sptr, *t, *pb, *c; + char *rptr, *sptr, *t, *pb, *c, *q; char *buf; void *p; int mflag, num; int bufsz = recsize; - if ((buf = (char *)malloc(bufsz)) == NULL) + if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in gsub"); mflag = 0; /* if mflag == 0, can replace empty string */ num = 0; @@ -1829,7 +1878,6 @@ if (*sptr == '\\') { backsub(&pb, &sptr); } else if (*sptr == '&') { - char *q; sptr++; adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); for (q = patbeg; q < patbeg+patlen; ) @@ -1858,7 +1906,6 @@ if (*sptr == '\\') { backsub(&pb, &sptr); } else if (*sptr == '&') { - char *q; sptr++; adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); for (q = patbeg; q < patbeg+patlen; ) @@ -1867,7 +1914,7 @@ *pb++ = *sptr++; } c = patbeg + patlen; - if ((c[-1] == 0) || (*c == 0)) + if (patlen == 0 || *c == 0 || *(c-1) == 0) goto done; if (pb > buf + bufsz) FATAL("gsub result1 %.30s too big; can't happen", buf); --- /sys/src/cmd/awk/tran.c Sun Mar 3 22:34:59 2013 +++ /sys/src/cmd/awk/tran.c Sun Mar 3 22:34:57 2013 @@ -51,6 +51,7 @@ Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ Awkfloat *RLENGTH; /* length of same */ +Cell *fsloc; /* FS */ Cell *nrloc; /* NR */ Cell *nfloc; /* NF */ Cell *fnrloc; /* FNR */ @@ -73,7 +74,8 @@ nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab); nullnode = celltonode(nullloc, CCON); - FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval; + fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); + FS = &fsloc->sval; RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; @@ -126,6 +128,8 @@ for ( ; *envp; envp++) { if ((p = strchr(*envp, '=')) == NULL) continue; + if( p == *envp ) /* no left hand side name in env string */ + continue; *p++ = 0; /* split into two strings at = */ if (is_number(p)) setsymtab(*envp, p, atof(p), STR|NUM, ENVtab); @@ -168,14 +172,17 @@ xfree(cp->sval); temp = cp->cnext; /* avoids freeing then using */ free(cp); + tp->nelem--; } tp->tab[i] = 0; } + if (tp->nelem != 0) + WARNING("can't happen: inconsistent element count freeing %s", ap->nval); free(tp->tab); free(tp); } -void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */ +void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */ { Array *tp; Cell *p, *prev = NULL; @@ -198,14 +205,14 @@ } } -Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp) +Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp) { int h; Cell *p; if (n != NULL && (p = lookup(n, tp)) != NULL) { dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", - p, p->nval, p->sval, p->fval, p->tval) ); + p, NN(p->nval), NN(p->sval), p->fval, p->tval) ); return(p); } p = (Cell *) malloc(sizeof(Cell)); @@ -228,7 +235,7 @@ return(p); } -int hash(char *s, int n) /* form hash value for string s */ +int hash(const char *s, int n) /* form hash value for string s */ { unsigned hashval; @@ -259,7 +266,7 @@ tp->size = nsz; } -Cell *lookup(char *s, Array *tp) /* look for s in tp */ +Cell *lookup(const char *s, Array *tp) /* look for s in tp */ { Cell *p; int h; @@ -291,11 +298,11 @@ xfree(vp->sval); /* free any previous string */ vp->tval &= ~STR; /* mark string invalid */ vp->tval |= NUM; /* mark number ok */ - dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) ); + dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) ); return vp->fval = f; } -void funnyvar(Cell *vp, char *rw) +void funnyvar(Cell *vp, const char *rw) { if (isarr(vp)) FATAL("can't %s %s; it's an array name.", rw, vp->nval); @@ -305,12 +312,13 @@ vp, vp->nval, vp->sval, vp->fval, vp->tval); } -char *setsval(Cell *vp, char *s) /* set string val of a Cell */ +char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ { char *t; int fldno; - dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) ); + dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", + vp, NN(vp->nval), s, vp->tval, donerec, donefld) ); if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "assign to"); if (isfld(vp)) { @@ -324,12 +332,13 @@ donerec = 1; } t = tostring(s); /* in case it's self-assign */ - vp->tval &= ~NUM; - vp->tval |= STR; if (freeable(vp)) xfree(vp->sval); + vp->tval &= ~NUM; + vp->tval |= STR; vp->tval &= ~DONTFREE; - dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) ); + dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", + vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) ); return(vp->sval = t); } @@ -346,11 +355,36 @@ if (is_number(vp->sval) && !(vp->tval&CON)) vp->tval |= NUM; /* make NUM only sparingly */ } - dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) ); + dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) ); return(vp->fval); } -char *getsval(Cell *vp) /* get string val of a Cell */ +unsigned long getival(Cell *vp, unsigned long min, unsigned long max) /* get integer val of Cell */ +{ + long l; + unsigned long r; + Awkfloat u; + + /* + * don't signal FPE on u/oflow, clamp to max/min instead. + * use unsigned long to avoid losing bits to sign. this is + * used in internal contexts like printf "%x" and in substr + * where a signal is useless. + */ + u = getfval(vp); + if(u < 0){ + if(-u > (Awkfloat)min) + return min; + l = u; /* kenc bug */ + return l; + } + if(u > (Awkfloat)max) + return max; + r = u; + return r; +} + +static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */ { char s[100]; /* BUG: unchecked */ double dtemp; @@ -367,16 +401,27 @@ if (modf(vp->fval, &dtemp) == 0) /* it's integral */ sprintf(s, "%.30g", vp->fval); else - sprintf(s, *CONVFMT, vp->fval); + sprintf(s, *fmt, vp->fval); vp->sval = tostring(s); vp->tval &= ~DONTFREE; vp->tval |= STR; } - dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) ); + dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) ); return(vp->sval); } -char *tostring(char *s) /* make a copy of string s */ +char *getsval(Cell *vp) /* get string val of a Cell */ +{ + return get_str_val(vp, CONVFMT); +} + +char *getpssval(Cell *vp) /* get string val of a Cell for print */ +{ + return get_str_val(vp, OFMT); +} + + +char *tostring(const char *s) /* make a copy of string s */ { char *p; @@ -387,13 +432,14 @@ return(p); } -char *qstring(char *s, int delim) /* collect string up to next delim */ +char *qstring(const char *is, int delim) /* collect string up to next delim */ { - char *os = s; + const char *os = is; int c, n; - char *buf, *bp; + uschar *s = (uschar *) is; + uschar *buf, *bp; - if ((buf = (char *) malloc(strlen(s)+3)) == NULL) + if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL) FATAL( "out of space in qstring(%s)", s); for (bp = buf; (c = *s) != delim; s++) { if (c == '\n') @@ -430,5 +476,5 @@ } } *bp++ = 0; - return buf; + return (char *) buf; }