updates from steve Reference: /n/atom/patch/applied/libxmlupd Date: Fri Mar 13 02:26:31 CET 2015 Signed-off-by: quanstro@quanstro.net --- /sys/src/libxml/mkfile Fri Mar 13 02:26:02 2015 +++ /sys/src/libxml/mkfile Fri Mar 13 02:26:03 2015 @@ -6,18 +6,19 @@ xmlattr.$O\ xmlelem.$O\ xmlfind.$O\ - xmlfree.$O\ + xmllook.$O\ + xmlmalloc.$O\ + xmlnew.$O\ xmlparse.$O\ xmlprint.$O\ - xmlnew.$O\ - xmllook.$O\ xmlvalue.$O\ - heap.$O\ HFILES=\ /sys/include/xml.h -CLEANFILES=doc.ps $O.doc +CLEANFILES=\ + doc.ps\ + $O.doc UPDATE=\ mkfile\ @@ -30,5 +31,5 @@ $O.doc: doc.$O $LD -o $target $prereq -doc.ps: 8.doc - 8.doc | dot '-Grotate=90' '-Gsize=10,8' -Tps > doc.ps +doc.ps: $O.doc + $O.doc | dot '-Grotate=90' '-Gsize=10,8' -Tps > doc.ps --- /sys/src/libxml/xmlheap.c Thu Jan 1 00:00:00 1970 +++ /sys/src/libxml/xmlheap.c Fri Mar 13 02:26:04 2015 @@ -0,0 +1,141 @@ +#include +#include +#include "xml.h" +#include "memalloc.h" + +#define Roundup(x, g) (((x) + (unsigned)(g-1)) & ~((unsigned)(g-1))) + +struct xmltree { + xmltree *left; + xmltree *right; + char *str; + int hits; +}; + +struct xmlblock { + xmlblock *next; + char *free; + char *end; +}; + +static int Strdups, Commons, Unique, Memblocks; + +static void * +getmem(Xml *xp, int len) +{ + int sz; + xmlblock *b; + char *ret; + + len = Roundup(len, sizeof(long long)); + + sz = xp->alloc.blksiz; /* shorthand */ + b = xp->alloc.active; + + if(len > sz) + sysfatal("xml heap: object larget than blocksize (%d > %d)\n", len, sz); + + if(xp->alloc.active == nil || b->free + len >= b->end){ + Memblocks++; + b = mallocz(sizeof(xmlblock) + sz, 0); + b->free = (char *)&b[1]; + b->end = (char *)&b->free[sz]; + + b->next = xp->alloc.active; + xp->alloc.active = b; + } + + ret = b->free; + b->free += len; + return ret; +} + +static xmltree * +lookadd(Xml *xp, xmltree *t, char *str, xmltree **match) +{ + int n; + + if(t == nil){ + Unique++; + t = getmem(xp, sizeof(xmltree) + strlen(str)+1); + t->left = nil; + t->right = nil; + t->str = (char *)&t[1]; + strcpy(t->str, str); + *match = t; + t->hits = 1; + return t; + } + + if((n = strcmp(str, t->str)) == 0){ + *match = t; + t->hits++; + } + if(n < 0) + t->left = lookadd(xp, t->left, str, match); + if(n > 0) + t->right = lookadd(xp, t->right, str, match); + return t; +} + +static void +heapfree(Xml *xp) +{ + xmlblock *b, *n; + + for(b = xp->alloc.active; b; b = n){ + n = b->next; + if(xmldebug) + memset(b, 0x7e, xp->alloc.blksiz); + free(b); + } + +} + +static void +dumpstats(Xml *) +{ + fprint(2, "total=%d common=%d -> unique=%d rare=%d memblocks=%d\n", + Strdups, Commons, Unique, Strdups - Commons, Memblocks); +} + +static char * +dostrdup(Xml *xp, char *str, int iscommon) +{ + char *s; + xmltree *t; + + Strdups++; + if(iscommon){ + Commons++; + xp->alloc.root = lookadd(xp, xp->alloc.root, str, &t); + return t->str; + } + + s = getmem(xp, strlen(str)+1); + return strcpy(s, str); +} + +static void * +docalloc(Xml *xp, long n, long m) +{ + void *v; + + v = getmem(xp, n * m); + memset(v, 0, n * m); + return v; +} + +static void +dofree(Xml *xp) +{ + heapfree(xp); + free(xp); +} + +Memalloc _xheapalloc = { + dostrdup, + docalloc, + dofree, + dumpstats +}; --- /sys/src/libxml/xmlparse.c Fri Mar 13 02:26:08 2015 +++ /sys/src/libxml/xmlparse.c Fri Mar 13 02:26:09 2015 @@ -11,7 +11,7 @@ Grain = 16 }; -#define isname1(c) (isalpha((c)) || c == '_') /* FIXME: not enforced yet */ +#define isname1(c) (isalpha((c)) || c == '_') /* FIxmlME: not enforced yet */ #define isnameN(r) (isalpharune((r)) || isdigitrune((r)) || r == L'_' || r == L'-' || r == L'.' || r == L':') #define Roundup(x, g) (((x) + (unsigned)(g-1)) & ~((unsigned)(g-1))) @@ -195,9 +195,7 @@ if(memcmp(Entities[i].name, buf, l) == 0) return Entities[i].rune; - for(i = --l; i >= 0; i--) - unget(st, buf[i]); - fprint(2, "%d: '%s' unknown/unsupported entity reference\n", st->line, buf); + fprint(2, "%d: '&%s;' unknown/unsupported entity reference\n", st->line, buf); return L'?'; } @@ -250,7 +248,7 @@ { long r; char *p; - int startline; + int startline, depth; startline = st->line; @@ -262,8 +260,20 @@ if(lb->buf) lb->buf[0] = 0; - while((r = get(st)) != -1 && r != L'>') + depth = 1; + while((r = get(st)) != -1){ + switch(r){ + case L'<': + depth++; + break; + case L'>': + depth--; + break; + } + if(depth <= 0) + break; growrune(st, lb, r); + } if(r == -1){ failed(st, "EOF in DOCTYPE (re: line %d)", startline); @@ -470,7 +480,8 @@ failed(st, "'%s' is an illegal element name", lb->buf); if(st->flags & Fstripnamespace) stripns(lb->buf); - assert((ep = xmlelem(st->xml, &root, parent, lb->buf)) != nil); + ep = xmlelem(st->xml, &root, parent, lb->buf); + assert(ep != nil); ep->line = st->line; break; case Apcdata: @@ -485,7 +496,8 @@ failed(st, "'%s' is an illegal attribute name", lb->buf); if(st->flags & Fstripnamespace) stripns(lb->buf); - assert((ap = xmlattr(st->xml, &(ep->attrs), ep, lb->buf, nil)) != nil); + ap = xmlattr(st->xml, &(ep->attrs), ep, lb->buf, nil); + assert(ap != nil); break; case Avalue: assert(ep != nil); --- /sys/src/libxml/xmlheap.c Thu Jan 1 00:00:00 1970 +++ /sys/src/libxml/xmlheap.c Fri Mar 13 02:26:10 2015 @@ -0,0 +1,141 @@ +#include +#include +#include "xml.h" +#include "memalloc.h" + +#define Roundup(x, g) (((x) + (unsigned)(g-1)) & ~((unsigned)(g-1))) + +struct xmltree { + xmltree *left; + xmltree *right; + char *str; + int hits; +}; + +struct xmlblock { + xmlblock *next; + char *free; + char *end; +}; + +static int Strdups, Commons, Unique, Memblocks; + +static void * +getmem(Xml *xp, int len) +{ + int sz; + xmlblock *b; + char *ret; + + len = Roundup(len, sizeof(long long)); + + sz = xp->alloc.blksiz; /* shorthand */ + b = xp->alloc.active; + + if(len > sz) + sysfatal("xml heap: object larget than blocksize (%d > %d)\n", len, sz); + + if(xp->alloc.active == nil || b->free + len >= b->end){ + Memblocks++; + b = mallocz(sizeof(xmlblock) + sz, 0); + b->free = (char *)&b[1]; + b->end = (char *)&b->free[sz]; + + b->next = xp->alloc.active; + xp->alloc.active = b; + } + + ret = b->free; + b->free += len; + return ret; +} + +static xmltree * +lookadd(Xml *xp, xmltree *t, char *str, xmltree **match) +{ + int n; + + if(t == nil){ + Unique++; + t = getmem(xp, sizeof(xmltree) + strlen(str)+1); + t->left = nil; + t->right = nil; + t->str = (char *)&t[1]; + strcpy(t->str, str); + *match = t; + t->hits = 1; + return t; + } + + if((n = strcmp(str, t->str)) == 0){ + *match = t; + t->hits++; + } + if(n < 0) + t->left = lookadd(xp, t->left, str, match); + if(n > 0) + t->right = lookadd(xp, t->right, str, match); + return t; +} + +static void +heapfree(Xml *xp) +{ + xmlblock *b, *n; + + for(b = xp->alloc.active; b; b = n){ + n = b->next; + if(xmldebug) + memset(b, 0x7e, xp->alloc.blksiz); + free(b); + } + +} + +static void +dumpstats(Xml *) +{ + fprint(2, "total=%d common=%d -> unique=%d rare=%d memblocks=%d\n", + Strdups, Commons, Unique, Strdups - Commons, Memblocks); +} + +static char * +dostrdup(Xml *xp, char *str, int iscommon) +{ + char *s; + xmltree *t; + + Strdups++; + if(iscommon){ + Commons++; + xp->alloc.root = lookadd(xp, xp->alloc.root, str, &t); + return t->str; + } + + s = getmem(xp, strlen(str)+1); + return strcpy(s, str); +} + +static void * +docalloc(Xml *xp, long n, long m) +{ + void *v; + + v = getmem(xp, n * m); + memset(v, 0, n * m); + return v; +} + +static void +dofree(Xml *xp) +{ + heapfree(xp); + free(xp); +} + +Memalloc _xheapalloc = { + dostrdup, + docalloc, + dofree, + dumpstats +}; --- /sys/man/2/xml Fri Mar 13 02:26:13 2015 +++ /sys/man/2/xml Fri Mar 13 02:26:15 2015 @@ -12,7 +12,7 @@ xmlprint, xmlstrdup, xmlvalue -\- XML parser +\- DOM model XML library .SH SYNOPSIS .de PB .PP @@ -29,7 +29,7 @@ Fstripnamespace = 2, }; .PB -struct Xml{ +struct Xml { Elem *root; /* root of tree */ char *doctype; /* DOCTYPE structured comment, or nil */ ... @@ -100,7 +100,10 @@ reads the given file and builds an in-memory tree. .I Blocksize controls the granularity of allocation of the string heap described above, -8192 is typically used. +8192 is typically used; a value of zero disabled the string heap and uses +traditional +.IR malloc (2) +calls. The .I flags field allows some control over the parser, it is a bitwise