Add charset support windows-1251 koi8 Tested with abaco Sergey Reva (rs_rlab@mail.ru) Notes: Thu Mar 2 13:27:22 EST 2006 rsc This is not the right approach. You should pipe the data through tcs, which is the official repository of such data sets. Otherwise we end up with those tables in half a dozen different places. Reference: /n/sources/patch/sorry/libhtml-win-koi Date: Thu Mar 2 16:56:00 CET 2006 Reviewed-by: rsc --- /sys/src/libhtml/lex.c Thu Mar 2 16:52:51 2006 +++ /sys/src/libhtml/lex.c Thu Mar 2 16:52:44 2006 @@ -5,6 +5,9 @@ #include #include "impl.h" +extern Rune win2unicode[]; +extern Rune koi2unicode[]; + typedef struct TokenSource TokenSource; struct TokenSource { @@ -451,7 +454,7 @@ TokenSource* ans; assert(chset == US_Ascii || chset == ISO_8859_1 || - chset == UTF_8 || chset == Unicode); + chset == UTF_8 || chset == Unicode || chset == WIN_1251 || chset == KOI8); ans = (TokenSource*)emalloc(sizeof(TokenSource)); ans->i = 0; ans->data = data; @@ -1176,6 +1179,16 @@ buf = ts->data; c = buf[ts->i]; switch(ts->chset) { + case WIN_1251: + if (c>0x80) + c=win2unicode[c-0x80]; + ts->i++; + break; + case KOI8: + if (c>0x80) + c=koi2unicode[c-0x80]; + ts->i++; + break; case ISO_8859_1: if(c >= Winstart && c <= Winend) c = winchars[c - Winstart]; --- /sys/src/libhtml/utils.c Thu Mar 2 16:53:11 2006 +++ /sys/src/libhtml/utils.c Thu Mar 2 16:53:06 2006 @@ -4,6 +4,10 @@ #include #include "impl.h" +Rune win2unicode[]={0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,0x0088,0x0089,0x008a,0x008b,0x008c,0x008d,0x008e,0x008f,0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,0x0098,0x0099,0x009a,0x009b,0x009c,0x009d,0x009e,0x009f,0x00a0,0x00a1,0x00a2,0x00a3,0x00a4,0x0490,0x00a6,0x00a7,0x0401,0x00a9,0x0404,0x00ab,0x00ac,0x00ad,0x00ae,0x0407,0x00b0,0x00b1,0x00b2,0x0456,0x0491,0x00b5,0x00b6,0x00b7,0x0451,0x00b9,0x0454,0x00bb,0x00bc,0x00bd,0x00be,0x0457,0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,0x041f,0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,0x0428,0x0429,0x042a,0x042b,0x042c,0x042d,0x042e,0x042f,0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,0x043f,0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,0x0448,0x0449,0x044a,0x044b,0x044c,0x044d,0x044e,0x044f}; + +Rune koi2unicode[]={0x2500,0x2502,0x250C,0x2510,0x2514,0x2518,0x251C,0x2524,0x252C,0x2534,0x253C,0x2580,0x2584,0x2588,0x258C,0x2590,0x2591,0x2592,0x2593,0x2320,0x25A0,0x2219,0x221A,0x2248,0x2264,0x2265,0x00A0,0x2321,0x00B0,0x00B2,0x00B7,0x00F7,0x2550,0x2551,0x2552,0x0451,0x2553,0x2554,0x2555,0x2556,0x2557,0x2558,0x2559,0x255A,0x255B,0x255C,0x255D,0x255E,0x255F,0x2560,0x2561,0x0401,0x2562,0x2563,0x2564,0x2565,0x2566,0x2567,0x2568,0x2569,0x256A,0x256B,0x256C,0x00A9,0x044E,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433,0x0445,0x0438,0x0439,0x043A,0x043B,0x043C,0x043D,0x043E,0x043F,0x044F,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432,0x044C,0x044B,0x0437,0x0448,0x044D,0x0449,0x0447,0x044A,0x042E,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413,0x0425,0x0418,0x0419,0x041A,0x041B,0x041C,0x041D,0x041E,0x041F,0x042F,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,0x042C,0x042B,0x0417,0x0428,0x042D,0x0429,0x0427,0x042A}; + Rune* whitespace = L" \t\n\r"; Rune* notwhitespace = L"^ \t\n\r"; @@ -510,7 +514,6 @@ ans[i] = buf[i]; ans[n] = 0; break; - case UTF_8: m = 0; for(i = 0; i < n; ) { @@ -525,7 +528,26 @@ } ans[m] = 0; break; + case KOI8: + ans=(Rune*)emalloc((n+1)*sizeof(Rune)); + for(i=0;ievents of containing item + void* aux; }; enum { @@ -457,6 +460,7 @@ int row; // row of upper left corner int col; // col of upper left corner Point pos; // nw corner of cell contents, in cell + Rectangle r; }; // Anchor is for info about hyperlinks that go somewhere