xlsx upate from steve. Reference: /n/atom/patch/applied/xlsxupdate Date: Tue Mar 25 17:17:07 CET 2014 Signed-off-by: quanstro@quanstro.net --- /sys/src/cmd/opc/xlsx/excel2txt.c Tue Mar 25 17:16:55 2014 +++ /sys/src/cmd/opc/xlsx/excel2txt.c Tue Mar 25 17:16:55 2014 @@ -1,3 +1,9 @@ +/* + * Bugs: we somtimes see zero width columns which we take to be as wide as you like. + * we should have an auto mode to ignore what the file says, do 2 passes, measure + * the width of the columns and layout accordingly. + */ + #include #include #include @@ -7,7 +13,6 @@ enum { Widefield = 40 }; /* wrap fields longer than this in tbl mode */ -char *Currency = "£"; /* currency symbol */ int Epoch1904 = 1; /* disable "as broken as Lotus-123" mode (yes really) */ static char *Strtype[] = { "numeric", "inline", "shared", "boolean", "string", "error", "date" }; @@ -31,7 +36,7 @@ l = strlen(str); if(Trunc && l > *remainp -1){ if(Doquote) - Bprint(bp, "%.*q…", *remainp -2, str); + Bprint(bp, "%.*q…", *remainp -4, str); else Bprint(bp, "%.*s…", *remainp -2, str); } @@ -121,8 +126,8 @@ static int cellwidth(Elem *ep, int type, int style) { - int width, first, id; - char *fmt, buf[1024]; + int width, first, numfmt; + char buf[1024]; width = 0; first = 1; @@ -141,12 +146,8 @@ break; case Numeric: case Date: - id = style2numid(style); - if(fmtnum(buf, sizeof(buf), id, ep->pcdata, type) == -1){ - fmt = numid2fmtstr(id); - fprint(2, "%s: %d '%s' numfmt unknown\n", argv0, id, fmt); - strcpy(buf, "unknon format"); - } + numfmt = style2numfmt(style); + fmtnum(buf, sizeof(buf), numfmt, ep->pcdata, type); width += strlen(buf); break; case String: @@ -171,15 +172,17 @@ colwidth(int col) { if(col < 0 || col >= Ncols) - return Defwidth; /* default width */ + return Defwidth; + if(Colwidth[col -1] < 1) + return Defwidth; return Colwidth[col -1]; /* -1 as column indices start at 1 */ } static int rd_c(Biobuf *bp, Elem *ep, int type, int style, int col) { - int id, first, remain, strwid, colwid; - char *fmt, buf[1024]; + int remain, strwid, colwid, numfmt; + char buf[1024]; colwid = colwidth(col) -1; /* -1 to ensures there a space between columns */ remain = colwid; @@ -191,12 +194,7 @@ if(Tbl && strwid > Widefield) Bprint(bp, "T{\n"); - - first = 1; for(; ep; ep = ep->next){ - if(! first) - Bprint(bp, " "); - first = 0; if(strcmp(ep->name, "is") == 0 && type == Inline && ep->child) rd_inlinestr(bp, ep->child, &remain); @@ -208,12 +206,8 @@ break; case Numeric: case Date: - id = style2numid(style); - if(fmtnum(buf, sizeof(buf), id, ep->pcdata, type) < 0){ - fmt = numid2fmtstr(id); - fprint(2, "%s: %d '%s' numfmt unknown\n", argv0, id, fmt); - strcpy(buf, "unknon format"); - } + numfmt = style2numfmt(style); + fmtnum(buf, sizeof(buf), numfmt, ep->pcdata, type); prnt(bp, buf, &remain); break; case String: @@ -414,7 +408,6 @@ fprint(2, " -s n select sheet number to print\n"); fprint(2, " -t truncate long cells to column width\n"); fprint(2, " -T generate tbl(1) input\n"); - fprint(2, " -C x set currency symbol to x\n"); exits("usage"); } @@ -462,9 +455,6 @@ Tbl = 1; Delim = "\t"; break; - case 'C': - Currency = EARGF(usage()); - break; default: usage(); }ARGEND; @@ -490,6 +480,7 @@ if(dmpsty) dumpstyles(); } + if((xp = parsefile("%s/xl/workbook.xml", argv[0])) != nil){ if((ep = xmllook(xp->root, "/workbook/workbookPr", nil, nil)) != nil) if((v = xmlvalue(ep, "date1904")) != nil) --- /sys/src/cmd/opc/xlsx/fmtnum.c Tue Mar 25 17:16:55 2014 +++ /sys/src/cmd/opc/xlsx/fmtnum.c Tue Mar 25 17:16:55 2014 @@ -4,9 +4,6 @@ #include #include "xlsx.h" -/* for number format syntax see http://www.ozgrid.com/Excel/CustomFormats.htm */ - - static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" @@ -137,8 +134,8 @@ return localtime((long)t); } -int -fmtnum(char *buf, int len, int id, char *str, int type) +void +fmtnum(char *buf, int len, int numfmt, char *str, int type) { Tm *tm; double num, err; @@ -149,8 +146,10 @@ tm = isotime(str); else tm = exceltime(num); + if(tm == nil) + return; - switch(id){ + switch(numfmt){ case 0: // General snprint(buf, len, "%g", num); break; @@ -161,19 +160,19 @@ snprint(buf, len, "%4.2f", num); break; case 3: // #,##0 - snprint(buf, len, "%.0f", num); + snprint(buf, len, "%g", num); break; case 4: // #,##0.00 - snprint(buf, len, "%.2f", num); + snprint(buf, len, "%g", num); break; case 9: // 0% - snprint(buf, len, "%.0f%%", num * 100.0); + snprint(buf, len, "%.0f%%", num); break; case 10: // 0.00% - snprint(buf, len, "%6.2f%%", num * 100.0); + snprint(buf, len, "%6.2f%%", num); break; case 11: // 0.00E+00 - snprint(buf, len, "%.2e", num); + snprint(buf, len, "%e", num); break; case 12: // # ?/? i = (int)num; num -= i; @@ -218,12 +217,12 @@ snprint(buf, len, "%d:%02d", tm->hour, tm->min); break; case 21: // h:mm:ss - snprint(buf, len, "%d:%02d:%02d", tm->hour, tm->min, tm->sec); + snprint(buf, len, "%d:%02d%02d", tm->hour, tm->min, tm->sec); break; case 22: // m/d/yy h:mm snprint(buf, len, "%d/%s/%02d %d:%02d", tm->mday, Months[tm->mon], tm->year % 100, tm->hour, tm->min); /* - * We don't use the "proper" form as it is Locale specific and we want to sidestep that horror. + * We don't use this form as it is Locale specific and we want to sidestep that horror. * snprint(buf, len, "%d/%d/%02d %d:%02d", tm->mon+1, tm->mday, tm->year % 100, tm->hour, tm->min); */ break; @@ -231,17 +230,17 @@ if(num == 0) snprint(buf, len, "0"); else if (num < 0) - snprint(buf, len, "(%.0f)", num); + snprint(buf, len, "(%f)", num); else - snprint(buf, len, "%.0f", num); + snprint(buf, len, "%f", num); break; case 38: // #,##0 ;[Red](#,##0) if(num == 0) snprint(buf, len, "0"); else if (num < 0) - snprint(buf, len, "(%.0f)", num); + snprint(buf, len, "(%f)", num); else - snprint(buf, len, "%.0f", num); + snprint(buf, len, "%f", num); break; case 39: // #,##0.00;(#,##0.00) if(num == 0) @@ -255,19 +254,10 @@ if(num == 0) snprint(buf, len, "0"); else if (num < 0) - snprint(buf, len, "(%.2f)", num); - else - snprint(buf, len, "%.2f", num); - break; - case 44: // _("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_) - if(num == 0) - snprint(buf, len, "%s 0", Currency); - else if (num < 0) - snprint(buf, len, "(%s %.2f)", Currency, num); + snprint(buf, len, "(%.0f)", num); else - snprint(buf, len, "%s %.2f", Currency, num); + snprint(buf, len, "%.f", num); break; - case 45: // mm:ss snprint(buf, len, "%02d:%02d", tm->min, tm->sec); break; @@ -284,37 +274,19 @@ h = num / 60; num -= h * 60; m = num / 60; num -= m * 60; s = num / 60; - snprint(buf, len, "%02d:%02d.0", m, s); + if(h) + snprint(buf, len, "%d:%02d:%02d.0", h, m, s); + else + snprint(buf, len, ":%02d:%02d.0", m, s); break; case 48: // ##0.0 - snprint(buf, len, "%.1f", num); + snprint(buf, len, "%f", num); break; case 49: // @ snprint(buf, len, "%s", str); break; - -#ifdef Coraid - /* - * special custom numfmts for coraid's Spreadsheets, - * these will almost definitely be very, very wrong - * for anyone else, you have been warned. - */ - case 164: // "$"#,##0.00 - snprint(buf, len, "$ %.2f", num); - break; - case 165: // "$"#,##0.0000 - snprint(buf, len, "$ %.4f", num); - break; - case 166: // "$"#,##0.0000_);[Red]\("$"#,##0.0000\) - if(num >= 0) - snprint(buf, len, "$ %.4f", num); - else - snprint(buf, len, "($ %.4f)", num); - break; -#endif - default: - return -1; + snprint(buf, len, "'%s' '%d' ", str, numfmt); + break; } - return 0; } --- /sys/src/cmd/opc/xlsx/styles.c Tue Mar 25 17:16:55 2014 +++ /sys/src/cmd/opc/xlsx/styles.c Tue Mar 25 17:16:55 2014 @@ -6,33 +6,25 @@ typedef struct Numfmt Numfmt; struct Numfmt { + int num; int style; - int id; + char *fmt; Numfmt *left; Numfmt *right; }; -typedef struct Fmtstr Fmtstr; -struct Fmtstr { - int id; - char *fmt; - Fmtstr *left; - Fmtstr *right; -}; - -static Numfmt *Numroot; -static Fmtstr *Fmtroot; +static Numfmt *Root; static char * -lookfmtstr(Fmtstr *fs, int id) +lookfmtstr(Numfmt *nf, int style) { - if(fs == nil) + if(nf == nil) return nil; - if(fs->id < id) - return lookfmtstr(fs->left, id); - if(fs->id > id) - return lookfmtstr(fs->right, id); - return fs->fmt; + if(nf->style < style) + return lookfmtstr(nf->left, style); + if(nf->style > style) + return lookfmtstr(nf->right, style); + return nf->fmt; } static int @@ -44,108 +36,104 @@ return looknumfmt(nf->left, style); if(nf->style > style) return looknumfmt(nf->right, style); - return nf->id; + return nf->num; } int -style2numid(int style) +style2numfmt(int style) { - return looknumfmt(Numroot, style); + return looknumfmt(Root, style); } char * -numid2fmtstr(int id) +style2fmtstr(int style) { - return lookfmtstr(Fmtroot, id); -} + char *s; + static char buf[16]; + if((s = lookfmtstr(Root, style)) == nil){ + snprint(buf, sizeof(buf), "<%d>", style); + return buf; + } + return s; +} static Numfmt * -addnum(Numfmt *nf, int style, int id) +add(Numfmt *nf, int style, int num) { if(nf == nil){ nf = malloc(sizeof(Numfmt)); if(nf == nil) - sysfatal("No memory for Numfmt\n"); + sysfatal("no memory for Numfmt\n"); nf->style = style; - nf->id = id; + nf->num = num; + nf->fmt = nil; nf->left = nf->right = nil; return nf; } if(nf->style < style) - nf->left = addnum(nf->left, style, id); + nf->left = add(nf->left, style, num); if(nf->style > style) - nf->right = addnum(nf->right, style, id); + nf->right = add(nf->right, style, num); return nf; } -static Fmtstr * -addfmt(Fmtstr *fs, int id, char *fmt) +static Numfmt * +upd(Numfmt *nf, int num, char *fmt) { - if(fs == nil){ - fs = malloc(sizeof(Fmtstr)); - if(fs == nil) - sysfatal("No memory for Fmtstr\n"); - fs->id = id; - fs->fmt = strdup(fmt); - if(fs->fmt == nil) - sysfatal("No memory for fmt\n"); - fs->left = fs->right = nil; - return fs; + if(nf == nil) + sysfatal("numfmt=%d not known, bad style format\n", num); + if(nf->num == num){ + nf->fmt = strdup(fmt); + if(nf->fmt == nil) + sysfatal("No memory for numfmt\n"); } - if(fs->id < id) - fs->left = addfmt(fs->left, id, fmt); else - if(fs->id > id) - fs->right = addfmt(fs->right, id, fmt); - return fs; + if(nf->num < num) + nf->left = upd(nf->left, num, fmt); + else + if(nf->num > num) + nf->right = upd(nf->right, num, fmt); + return nf; } static void dumpsty(Numfmt *nf) { - char *fmt; - if(nf == nil) return; dumpsty(nf->right); - if((fmt = lookfmtstr(Fmtroot, nf->id)) == nil) - fmt = ""; - fprint(2, "%-6d %-6d %q\n", nf->style, nf->id, fmt); + fprint(2, "%-6d %-6d %q\n", nf->num, nf->style, nf->fmt); dumpsty(nf->left); } void dumpstyles(void) { - fprint(2, "%-6s %-6s %q\n", "styleid", "numid", "fmtstr"); - dumpsty(Numroot); + dumpsty(Root); } static void numfmts(Elem *ep) { - int id; + int num; char *fmt, *v; - id = -1; + num = -1; fmt = nil; for(; ep; ep = ep->next){ - if(strcmp(ep->name, "numFmt") == 0){ + if(strcmp(ep->name, "numFmt") == 0) if((v = xmlvalue(ep, "numFmtId")) != nil) - id = atoi(v); + num = atoi(v); + if(strcmp(ep->name, "numFmtId") == 0) if((v = xmlvalue(ep, "formatCode")) != nil) fmt = v; - } - if(id < 164) /* seems these are builtin to excel */ - continue; - - if(id != -1 && fmt != nil) - Fmtroot = addfmt(Fmtroot, id, fmt); - id = -1; + if(num != -1 && fmt != nil) + Root = upd(Root, num, fmt); + num = -1; fmt = nil; } } @@ -153,20 +141,19 @@ static void cellxfs(Elem *ep) { - int enab, style; + int style; char *v; style = 0; for(; ep; ep = ep->next) if(strcmp(ep->name, "xf") == 0){ - enab = 0; - if((v = xmlvalue(ep, "applyNumberFormat")) != nil) - enab = atoi(v); - if(enab && (v = xmlvalue(ep, "numFmtId")) != nil) - Numroot = addnum(Numroot, style, atoi(v)); + if((v = xmlvalue(ep, "numFmtId")) != nil) + Root = add(Root, style, atoi(v)); style++; } } + + void rd_styles(Elem *base) --- /sys/src/cmd/opc/xlsx/xlsx.h Tue Mar 25 17:16:55 2014 +++ /sys/src/cmd/opc/xlsx/xlsx.h Tue Mar 25 17:16:55 2014 @@ -1,5 +1,4 @@ extern int Epoch1904; /* disable "as broken as Lotus-123" mode (yes really) */ -extern char *Currency; /* Currency symbol */ enum { Numeric, /* type of field */ @@ -12,7 +11,7 @@ }; /* fmtnum.c */ -int fmtnum(char *buf, int len, int id, char *str, int type); +void fmtnum(char *buf, int len, int numfmt, char *str, int type); /* strings.c */ char *lookstring(int idx); @@ -26,7 +25,7 @@ void dumpstrings(void); /* styles.c */ -int style2numid(int style); -char *numid2fmtstr(int id); -void dumpstyles(void); +int style2numfmt(int style); +char *style2fmtstr(int style); void rd_styles(Elem *base); +void dumpstyles(void); --- /sys/man/1/xlsx2txt Tue Mar 25 17:16:56 2014 +++ /sys/man/1/xlsx2txt Tue Mar 25 17:16:56 2014 @@ -9,8 +9,6 @@ ] [ .BI -c range ] [ -.BI -C symb -] [ .BI -d str ] [ .B -q @@ -61,9 +59,6 @@ similar to standard plan9 character class specification. Any combination of comma seperated numbers and number ranges (two numbers seperated by a dash) may be specified. -.IP "-C symb" -.br -Currency symbol for financial number formats, defaults to UK pounds (£). .IP "-d str" .br Set the field delimiter to the string