Minor improvments to html2ms: attempt to translate numeric entity references when
they look like ASCII codes. Word wrap output to make later hand editing of troff source
easier.
Reference: /n/sources/patch/applied/html2ms-better
Date: Fri Mar 4 14:35:51 CET 2011
Signed-off-by: steve@quintile.net
--- /sys/src/cmd/html2ms.c Fri Mar 4 14:34:25 2011
+++ /sys/src/cmd/html2ms.c Fri Mar 4 14:34:11 2011
@@ -250,10 +250,12 @@
main(void)
{
int c;
+ int pos;
Binit(&in, 0, OREAD);
Binit(&out, 1, OWRITE);
+ pos = 0;
for(;;){
c = Bgetc(&in);
if(c < 0)
@@ -266,6 +268,7 @@
escape();
break;
case '\r':
+ pos = 0;
break;
case '\n':
if(quoting){
@@ -280,7 +283,15 @@
lastc = c;
break;
default:
- Bputc(&out, c);
+ if(!inpre && isspace(c) && pos > 80){
+ Bputc(&out, '\n');
+ eatwhite();
+ pos = 0;
+ }
+ else{
+ pos++;
+ Bputc(&out, c);
+ }
lastc = c;
break;
}
@@ -290,6 +301,7 @@
void
escape(void)
{
+ int c;
Entity *e;
char buf[8];
@@ -302,6 +314,13 @@
Bprint(&out, "%C", e->value);
return;
}
+ if(*buf == '#'){
+ c = atoi(buf+1);
+ if(c > 32 && c < 127){ /* looks ASCII-ish */
+ Bprint(&out, "%c", c);
+ return;
+ }
+ }
Bprint(&out, "&%s;", buf);
}