allow runes to appear in the path portion of a url. don't bother with the % escaping, since existing code is incorrect, and shouldn't be applied blindly to a url, and many servers can handle unescaped runes in the url. so while this isn't according-to-hoyle correct, it should be an improvement. Reference: /n/atom/patch/applied/webfsrunes Date: Thu May 22 20:35:36 CES 2014 Signed-off-by: quanstro@quanstro.net --- /sys/src/cmd/webfs/fns.h Thu May 22 20:34:05 2014 +++ /sys/src/cmd/webfs/fns.h Thu May 22 20:34:05 2014 @@ -49,8 +49,6 @@ void rewriteurl(Url*); int seturlquery(Url*, char*); Url* copyurl(Url*); -char* escapeurl(char*, int(*)(int)); -char* unescapeurl(char*); void initurl(void); /* util.c */ --- /sys/src/cmd/webfs/url.c Thu May 22 20:34:05 2014 +++ /sys/src/cmd/webfs/url.c Thu May 22 20:34:05 2014 @@ -111,12 +111,13 @@ */ /* RE character-class components -- these go in brackets */ +#define RUNES " -􏿿" #define UNWISE "\\[\\]|\\\\^{}`" #define PUNCT "\\-_.!~*'()" #define RES ";/?:@&=+$," #define ALNUM "a-zA-Z0-9" #define HEX "0-9a-fA-F" -#define UNRES ALNUM PUNCT UNWISE +#define UNRES ALNUM PUNCT UNWISE RUNES /* RE components; _N => has N parenthesized subexpressions when expanded */ #define ESCAPED_1 "(%[" HEX "][" HEX "])" @@ -528,7 +529,7 @@ * Newlines are not valid in a URI, but regexp(2) treats them specially * so it's best to make sure there are none before proceeding. */ - if(strchr(url, '\n')){ + if(strchr(url, '\n') != nil){ werrstr("newline in URI"); return -1; } @@ -542,7 +543,7 @@ * will still get by, but that's legitimate, and if it ends up causing * a NUL then someone is unescaping too many times. */ - if(strstr(url, "%00")){ + if(strstr(url, "%00") != nil){ werrstr("escaped NUL in URI"); return -1; } @@ -1017,77 +1018,3 @@ } return v; } - -static int -dhex(char c) -{ - if('0' <= c && c <= '9') - return c-'0'; - if('a' <= c && c <= 'f') - return c-'a'+10; - if('A' <= c && c <= 'F') - return c-'A'+10; - return 0; -} - -char* -escapeurl(char *s, int (*needesc)(int)) -{ - int n; - char *t, *u; - Rune r; - static char *hex = "0123456789abcdef"; - - n = 0; - for(t=s; *t; t++) - if((*needesc)(*t)) - n++; - - u = emalloc(strlen(s)+2*n+1); - t = u; - for(; *s; s++){ - s += chartorune(&r, s); - if(r >= 0xFF){ - werrstr("URLs cannot contain Runes > 0xFF"); - free(t); - return nil; - } - if((*needesc)(r)){ - *u++ = '%'; - *u++ = hex[(r>>4)&0xF]; - *u++ = hex[r&0xF]; - }else - *u++ = r; - } - *u = '\0'; - return t; -} - -char* -unescapeurl(char *s) -{ - char *r, *w; - Rune rune; - - s = estrdup(s); - for(r=w=s; *r; r++){ - if(*r=='%'){ - r++; - if(!isxdigit(r[0]) || !isxdigit(r[1])){ - werrstr("bad escape sequence '%.3s' in URL", r); - return nil; - } - if(r[0]=='0' && r[2]=='0'){ - werrstr("escaped NUL in URL"); - return nil; - } - rune = (dhex(r[0])<<4)|dhex(r[1]); /* latin1 */ - w += runetochar(w, &rune); - r += 2; - }else - *w++ = *r; - } - *w = '\0'; - return s; -} -