update dict to handle andrey's bulgarian dictionary, and akumar's japanese->english work for edict2. again, see Les Misérables from jan 16 2009 in the 9fans archive. Reference: /n/atom/patch/applied/dictbgjp Date: Fri Feb 28 14:01:12 CET 2014 Signed-off-by: quanstro@quanstro.net --- /sys/src/cmd/dict/dict.h Fri Feb 28 13:59:20 2014 +++ /sys/src/cmd/dict/dict.h Fri Feb 28 13:59:21 2014 @@ -149,6 +149,9 @@ long worldnextoff(long); void worldprintentry(Entry, int); void worldprintkey(void); +long dictdnextoff(long); +void dictdprintentry(Entry, int); +void dictdprintkey(void); extern Biobuf *bdict; extern Biobuf *bout; --- /sys/src/cmd/dict/dictd.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/dict/dictd.c Fri Feb 28 13:59:21 2014 @@ -0,0 +1,75 @@ +#include +#include +#include +#include "dict.h" + +/* + * dictd-based dictionary files + * slightly modified + */ + +/* + * the index file contains the name of the world, + * in lowercase, and the location in the dictionary file + * as a character offset from the beginning. + * + * the index file is created separately by gunzipping + * the original dictionary file and running a program + * which looks for the beginning of each word. + * + * the dictionary is a flat file, unencrypted (and utf-8 + * encoded!) + * + * entries in the dictionary are separated by an empty + * line. + */ + +void +dictdprintentry(Entry e, int cmd) +{ + uchar *p, *pe; + + + p = (uchar *)e.start; + pe = (uchar *)e.end; + + if(cmd == 'h') + while(*p != '\n' && p < pe) + outchar(*p++); + else + while(p < pe) { + if(*p == '\n') + *p = ' '; + outchar(*p++); + } + + + outnl(0); +} + +long +dictdnextoff(long fromoff) +{ + int o, n, a; + + o = 0; + a = Bseek(bdict, fromoff, 0); + if(a < 0) + return -1; + for(;;) { + n = Bgetc(bdict); + if(!n) + break; + if(n == '\n' && o == '\n') + return (Boffset(bdict)-2); + o = n; + } + return -1; +} + +void +dictdprintkey(void) +{ + Bprint(bout, "No key\n"); +} + --- /sys/src/cmd/dict/mkdictd.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/dict/mkdictd.c Fri Feb 28 13:59:21 2014 @@ -0,0 +1,59 @@ +#include +#include +#include + +void +usage(void) +{ + fprint(2, "usage: mkdictd\n"); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + char w[2048], o; + int wflag, c; + ulong index, oindex, wlen; + Biobuf bin; + + ARGBEGIN{ + default: + usage(); + }ARGEND + if(argc != 0) + usage(); + + if(Binit(&bin, 0, OREAD) == -1) + sysfatal("mkdictd: Binit: %r"); + + wlen = 0; + index = 0; + oindex = 0; + wflag = 0; + o = 0; + while((c = Bgetc(&bin)) != Beof) { + index++; + switch(c) { + case '\n': + if(o == '\n') { + w[wlen] = '\0'; + print("%s\t%lud\n", w, oindex); + wlen = 0; + wflag = 0; + oindex = index; + } else { + wflag = 1; + } + break; + default: + if(!wflag && wlen < sizeof(w)-1) + w[wlen++] = c; + break; + } + o = c; + } + Bterm(&bin); + + exits(""); +} --- /sys/src/cmd/dict/mkfile Fri Feb 28 13:59:22 2014 +++ /sys/src/cmd/dict/mkfile Fri Feb 28 13:59:22 2014 @@ -2,7 +2,8 @@ TARG=dict LFILES=oed.$O ahd.$O pcollins.$O pcollinsg.$O movie.$O slang.$O robert.$O\ - world.$O jis208.$O gb2312.$O thesaurus.$O simple.$O pgw.$O roget.$O + world.$O jis208.$O gb2312.$O thesaurus.$O simple.$O pgw.$O roget.$O\ + dictd.$O\ OFILES=dict.$O\ $LFILES\ @@ -10,6 +11,10 @@ HFILES=dict.h kuten.h +CLEANFILES=\ + $O.mkindex\ + $O.mkdictd\ + BIN=/$objtype/bin UPDATE=\ @@ -19,5 +24,8 @@