allow hoc to accept utf-8 variables. this patch is a bit noisy because i changed "int c" to "int r", and 'x' to L'x' in hoc.y but the basic change is from Bgetc to Bgetrune. Notes: Sat Aug 12 02:27:35 EDT 2006 rsc I made a simpler change. I'm not as convinced about the new constnats. Reference: /n/sources/patch/applied/hoc-rune Date: Tue Jul 4 19:58:39 CES 2006 Signed-off-by: quanstro@quanstro.net Reviewed-by: rsc --- /sys/src/cmd/hoc/hoc.h Tue Jul 4 19:53:56 2006 +++ /sys/src/cmd/hoc/hoc.h Tue Jul 4 19:53:51 2006 @@ -52,6 +52,7 @@ extern void restore(Symbol*); extern void restoreall(void); extern void execerror(char*, char*); +extern void runeexecerror(char*, Rune*); extern void define(Symbol*, Formal*), verify(Symbol*); extern Datum pop(void); extern void initcode(void), push(Datum), xpop(void), constpush(void); @@ -78,5 +79,4 @@ extern void init(void); extern int yyparse(void); -extern void execerror(char*, char*); extern void *emalloc(unsigned); --- /sys/src/cmd/hoc/init.c Tue Jul 4 19:54:17 2006 +++ /sys/src/cmd/hoc/init.c Tue Jul 4 19:54:12 2006 @@ -28,6 +28,9 @@ "GAMMA", 0.57721566490153286060, /* Euler */ "DEG", 57.29577951308232087680, /* deg/radian */ "PHI", 1.61803398874989484820, /* golden ratio */ + "π", 3.14159265358979323846, + "γ", 0.57721566490153286060, /* Euler */ + "ψ", 1.61803398874989484820, /* golden ratio */ 0, 0 }; --- /sys/src/cmd/hoc/hoc.y Tue Jul 4 19:54:47 2006 +++ /sys/src/cmd/hoc/hoc.y Tue Jul 4 19:54:39 2006 @@ -1,4 +1,7 @@ %{ +#include +#include + #include "hoc.h" #define code2(c1,c2) code(c1); code(c2) #define code3(c1,c2,c3) code(c1); code(c2); code(c3) @@ -133,8 +136,6 @@ ; %% /* end of grammar */ -#include -#include #include #include char *progname; @@ -147,7 +148,7 @@ char **gargv; /* global argument list */ int gargc; -int c = '\n'; /* global for use by warning() */ +int r = L'\n'; /* global for use by warning() */ int backslash(int), follow(int, int, int); void defnonly(char*), run(void); @@ -155,65 +156,66 @@ yylex(void) /* hoc6 */ { - while ((c=Bgetc(bin)) == ' ' || c == '\t') + while ((r=Bgetrune(bin)) == L' ' || r == L'\t') ; - if (c < 0) + if (r < 0) return 0; - if (c == '\\') { - c = Bgetc(bin); - if (c == '\n') { + if (r == L'\\') { + r = Bgetrune(bin); + if (r == L'\n') { lineno++; return yylex(); } } - if (c == '#') { /* comment */ - while ((c=Bgetc(bin)) != '\n' && c >= 0) + if (r == L'#') { /* comment */ + while ((r = Bgetrune(bin)) != L'\n' && r >= 0) ; - if (c == '\n') + if (r == L'\n') lineno++; - return c; + return r; } - if (c == '.' || isdigit(c)) { /* number */ + if (r == L'.' || isdigit(r)) { /* number */ double d; - Bungetc(bin); + Bungetrune(bin); Bgetd(bin, &d); yylval.sym = install("", NUMBER, d); return NUMBER; } - if (isalpha(c) || c == '_') { + if (isalpharune(r) || r == L'_') { Symbol *s; - char sbuf[100], *p = sbuf; + Rune sbuf[100], *p = sbuf; + char cbuf[100*UTFmax]; do { if (p >= sbuf + sizeof(sbuf) - 1) { - *p = '\0'; - execerror("name too long", sbuf); + *p = 0; + runeexecerror("name too long", sbuf); } - *p++ = c; - } while ((c=Bgetc(bin)) >= 0 && (isalnum(c) || c == '_')); - Bungetc(bin); - *p = '\0'; - if ((s=lookup(sbuf)) == 0) - s = install(sbuf, UNDEF, 0.0); + *p++ = r; + } while ((r = Bgetrune(bin)) >= 0 && (isalpharune(r) || r == L'_')); + Bungetrune(bin); + *p = 0; + snprint(cbuf, sizeof cbuf, "%S", sbuf); + if ((s=lookup(cbuf)) == 0) + s = install(cbuf, UNDEF, 0.0); yylval.sym = s; return s->type == UNDEF ? VAR : s->type; } - if (c == '"') { /* quoted string */ - char sbuf[100], *p; - for (p = sbuf; (c=Bgetc(bin)) != '"'; p++) { - if (c == '\n' || c == Beof) - execerror("missing quote", ""); + if (r == L'"') { /* quoted string */ + Rune sbuf[100], *p; + for (p = sbuf; (r = Bgetrune(bin)) != L'"'; p++) { + if (r == L'\n' || r == Beof) + execerror("missing quote", 0); if (p >= sbuf + sizeof(sbuf) - 1) { - *p = '\0'; - execerror("string too long", sbuf); + *p =0; + runeexecerror("string too long", sbuf); } - *p = backslash(c); + *p = backslash(r); } *p = 0; - yylval.sym = (Symbol *)emalloc(strlen(sbuf)+1); - strcpy((char*)yylval.sym, sbuf); + yylval.sym = (Symbol*)smprint("%S", sbuf); return STRING; } - switch (c) { + switch (r) { case '+': return follow('+', INC, follow('=', ADDEQ, '+')); case '-': return follow('-', DEC, follow('=', SUBEQ, '-')); case '*': return follow('=', MULEQ, '*'); @@ -226,28 +228,34 @@ case '|': return follow('|', OR, '|'); case '&': return follow('&', AND, '&'); case '\n': lineno++; return '\n'; - default: return c; + default: return r; } } -backslash(int c) /* get next char with \'s interpreted */ +int +backslash(int r) /* get next char with \'s interpreted */ { static char transtab[] = "b\bf\fn\nr\rt\t"; - if (c != '\\') - return c; - c = Bgetc(bin); - if (islower(c) && strchr(transtab, c)) - return strchr(transtab, c)[1]; - return c; + char *p; + + if (r != L'\\') + return r; + r = Bgetrune(bin); + if(r >= Runeerror) + return r; + if (islower(r) && (p = strchr(transtab, (char)r))) + return (int)p[1]; + return r; } +int follow(int expect, int ifyes, int ifno) /* look ahead for >=, etc. */ { - int c = Bgetc(bin); + int r = Bgetrune(bin); - if (c == expect) + if (r == expect) return ifyes; - Bungetc(bin); + Bungetrune(bin); return ifno; } @@ -258,11 +266,23 @@ warning(s, (char *)0); longjmp(begin, 0); rob*/ - execerror(s, (char *)0); + execerror(s, 0); +} + +void +runeexecerror(char* s, Rune *t) /* recover from run-time error */ +{ + char buf[256]; + + snprint(buf, sizeof buf, "%S", t); + warning(s, buf); + Bseek(bin, 0L, 2); /* flush rest of file */ + restoreall(); + longjmp(begin, 0); } void -execerror(char* s, char* t) /* recover from run-time error */ +execerror(char* s, char *t) /* recover from run-time error */ { warning(s, t); Bseek(bin, 0L, 2); /* flush rest of file */ @@ -273,7 +293,7 @@ void fpecatch(void) /* catch floating point exceptions */ { - execerror("floating point exception", (char *) 0); + execerror("floating point exception", 0); } void @@ -369,7 +389,7 @@ } void -warning(char* s, char* t) /* print warning message */ +warning(char *s, char *t) /* print warning message */ { fprint(2, "%s: %s", progname, s); if (t) @@ -377,8 +397,8 @@ if (infile) fprint(2, " in %s", infile); fprint(2, " near line %d\n", lineno); - while (c != '\n' && c != Beof) - if((c = Bgetc(bin)) == '\n') /* flush rest of input line */ + while (r != L'\n' && r != Beof) + if((r = Bgetrune(bin)) == L'\n') /* flush rest of input line */ lineno++; } --- /sys/man/1/hoc Tue Jul 4 19:55:17 2006 +++ /sys/man/1/hoc Tue Jul 4 19:55:12 2006 @@ -50,12 +50,22 @@ by itself contains the value of the last expression evaluated. The variables .BR E , -.BR PI , -.BR PHI , +.BR PI, +.BR PHI, .BR GAMMA and .B DEG are predefined; the last is 59.25..., degrees per radian. +The Greek symbols +.BR π , +.BR ψ , +and +.B γ +may also be used in place of +.BR PI , +.BR PHI , +and +.B GAMMA. .PP Expressions are formed with these C-like operators, listed by decreasing precedence.