add isrunepunct to man page, libc.h and libc/port. note isrunepunct returns the unicode type of the punctuation for punctuation runes, not just a 1 or 0. Reference: /n/atom/patch/applied2013/runepunct Date: Mon Nov 25 15:43:25 CET 2013 Signed-off-by: quanstro@quanstro.net --- /sys/src/libc/port/runepunct.c Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/port/runepunct.c Mon Nov 25 15:42:31 2013 @@ -0,0 +1,37 @@ +#include +#include +#include "runepunct.h" + +static Rune* +bsearch(Rune c, Rune *t, int n, int ne) +{ + Rune *p; + int m; + + while(n > 1) { + m = n/2; + p = t + m*ne; + if(c >= p[0]) { + t = p; + n = n-m; + } else + n = m; + } + if(n && c >= t[0]) + return t; + return 0; +} + +int +ispunctrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __punct3, nelem(__punct3)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return p[2]; + p = bsearch(c, __punct2, nelem(__punct2), 2); + if(p && c == p[0]) + return p[1]; + return 0; +} --- /sys/src/libc/port/runepunct.h Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/port/runepunct.h Mon Nov 25 15:42:32 2013 @@ -0,0 +1,335 @@ +static +Rune __punct3[] = +{ + 0x0021, 0x0023, 'o', /* !-#; exclamation mark - number sign */ + 0x0025, 0x0027, 's', /* %-'; percent sign - apostrophe */ + 0x002e, 0x002f, 'o', /* .-/; full stop - solidus */ + 0x003a, 0x003b, 'o', /* :-;; colon - semicolon */ + 0x003f, 0x0040, 's', /* ?-@; question mark - commercial at */ + 0x00b6, 0x00b7, 'f', /* ¶-·; pilcrow sign - middle dot */ + 0x055a, 0x055f, 'o', /* ՚-՟; armenian apostrophe - armenian abbreviation mark */ + 0x05f3, 0x05f4, 'o', /* ׳-״; hebrew punctuation geresh - hebrew punctuation gershayim */ + 0x0609, 0x060a, 'o', /* ؉-؊; arabic-indic per mille sign - arabic-indic per ten thousand sign */ + 0x060c, 0x060d, 'o', /* ،-؍; arabic comma - arabic date separator */ + 0x061e, 0x061f, 'o', /* ؞-؟; arabic triple dot punctuation mark - arabic question mark */ + 0x066a, 0x066d, 'o', /* ٪-٭; arabic percent sign - arabic five pointed star */ + 0x0700, 0x070d, 'o', /* ܀-܍; syriac end of paragraph - syriac harklean asteriscus */ + 0x07f7, 0x07f9, 'o', /* ߷-߹; nko symbol gbakurunen - nko exclamation mark */ + 0x0830, 0x083e, 'o', /* ࠰-࠾; samaritan punctuation nequdaa - samaritan punctuation annaau */ + 0x0964, 0x0965, 'o', /* ।-॥; devanagari danda - devanagari double danda */ + 0x0e5a, 0x0e5b, 'o', /* ๚-๛; thai character angkhankhu - thai character khomut */ + 0x0f04, 0x0f12, 'o', /* ༄-༒; tibetan mark initial yig mgo mdun ma - tibetan mark rgya gram shad */ + 0x0fd0, 0x0fd4, 'o', /* ࿐-࿔; tibetan mark bska- shog gi mgo rgyan - tibetan mark closing brda rnying yig mgo sgab ma */ + 0x0fd9, 0x0fda, 'o', /* ࿙-࿚; tibetan mark leading mchan rtags - tibetan mark trailing mchan rtags */ + 0x104a, 0x104f, 'o', /* ၊-၏; myanmar sign little section - myanmar symbol genitive */ + 0x1360, 0x1368, 'd', /* ፠-፨; ethiopic section mark - ethiopic paragraph separator */ + 0x166d, 0x166e, 's', /* ᙭-᙮; canadian syllabics chi sign - canadian syllabics full stop */ + 0x16eb, 0x16ed, 'o', /* ᛫-᛭; runic single punctuation - runic cross punctuation */ + 0x1735, 0x1736, 'o', /* ᜵-᜶; philippine single punctuation - philippine double punctuation */ + 0x17d4, 0x17d6, 'o', /* ។-៖; khmer sign khan - khmer sign camnuc pii kuuh */ + 0x17d8, 0x17da, 'o', /* ៘-៚; khmer sign beyyal - khmer sign koomuut */ + 0x1800, 0x1805, 'd', /* ᠀-᠅; mongolian birga - mongolian four dots */ + 0x1807, 0x180a, 'o', /* ᠇-᠊; mongolian sibe syllable boundary marker - mongolian nirugu */ + 0x1944, 0x1945, 'o', /* ᥄-᥅; limbu exclamation mark - limbu question mark */ + 0x1a1e, 0x1a1f, 'o', /* ᨞-᨟; buginese pallawa - buginese end of section */ + 0x1aa0, 0x1aa6, 'o', /* ᪠-᪦; tai tham sign wiang - tai tham sign reversed rotated rana */ + 0x1aa8, 0x1aad, 'o', /* ᪨-᪭; tai tham sign kaan - tai tham sign caang */ + 0x1b5a, 0x1b60, 'o', /* ᭚-᭠; balinese panti - balinese pameneng */ + 0x1bfc, 0x1bff, 'o', /* ᯼-᯿; batak symbol bindu na metek - batak symbol bindu pangolat */ + 0x1c3b, 0x1c3f, 'o', /* ᰻-᰿; lepcha punctuation ta-rol - lepcha punctuation tshook */ + 0x1c7e, 0x1c7f, 'o', /* ᱾-᱿; ol chiki punctuation mucaad - ol chiki punctuation double mucaad */ + 0x1cc0, 0x1cc7, 'o', /* ᳀-᳇; sundanese punctuation bindu surya - sundanese punctuation bindu ba satanga */ + 0x2010, 0x2015, 'o', /* ‐-―; hyphen - horizontal bar */ + 0x2016, 0x2017, 'i', /* ‖-‗; double vertical line - double low line */ + 0x201b, 0x201c, 'f', /* ‛-“; single high-reversed-9 quotation mark - left double quotation mark */ + 0x2020, 0x2027, 'o', /* †-‧; dagger - hyphenation point */ + 0x2030, 0x2038, 'i', /* ‰-‸; per mille sign - caret */ + 0x203b, 0x203e, 'c', /* ※-‾; reference mark - overline */ + 0x203f, 0x2040, 'o', /* ‿-⁀; undertie - character tie */ + 0x2041, 0x2043, 's', /* ⁁-⁃; caret insertion point - hyphen bullet */ + 0x2047, 0x2051, 'o', /* ⁇-⁑; double question mark - two asterisks aligned vertically */ + 0x2055, 0x205e, 's', /* ⁕-⁞; flower punctuation mark - vertical four dots */ + 0x2cf9, 0x2cfc, 'o', /* ⳹-⳼; coptic old nubian full stop - coptic old nubian verse divider */ + 0x2cfe, 0x2cff, 'o', /* ⳾-⳿; coptic full stop - coptic morphological divider */ + 0x2e00, 0x2e01, 'i', /* ⸀-⸁; right angle substitution marker - right angle dotted substitution marker */ + 0x2e06, 0x2e08, 'i', /* ⸆-⸈; raised interpolation marker - dotted transposition marker */ + 0x2e0e, 0x2e16, 'd', /* ⸎-⸖; editorial coronis - dotted right-pointing angle */ + 0x2e18, 0x2e19, 'd', /* ⸘-⸙; inverted interrobang - palm branch */ + 0x2e1e, 0x2e1f, 'i', /* ⸞-⸟; tilde with dot above - tilde with dot below */ + 0x2e2a, 0x2e2e, 'o', /* ⸪-⸮; two dots over one dot punctuation - reversed question mark */ + 0x2e30, 0x2e39, 'd', /* ⸰-⸹; ring point - top half section sign */ + 0x2e3a, 0x2e3b, 'o', /* ⸺-⸻; two-em dash - three-em dash */ + 0x3001, 0x3003, 's', /* 、-〃; ideographic comma - ditto mark */ + 0x301e, 0x301f, 'd', /* 〞-〟; double prime quotation mark - low double prime quotation mark */ + 0xa4fe, 0xa4ff, 'o', /* ꓾-꓿; lisu punctuation comma - lisu punctuation full stop */ + 0xa60d, 0xa60f, 'o', /* ꘍-꘏; vai comma - vai question mark */ + 0xa6f2, 0xa6f7, 'o', /* ꛲-꛷; bamum njaemli - bamum question mark */ + 0xa874, 0xa877, 'o', /* ꡴-꡷; phags-pa single head mark - phags-pa mark double shad */ + 0xa8ce, 0xa8cf, 'o', /* ꣎-꣏; saurashtra danda - saurashtra double danda */ + 0xa8f8, 0xa8fa, 'o', /* ꣸-꣺; devanagari sign pushpika - devanagari caret */ + 0xa92e, 0xa92f, 'o', /* ꤮-꤯; kayah li sign cwi - kayah li sign shya */ + 0xa9c1, 0xa9cd, 'o', /* ꧁-꧍; javanese left rerenggan - javanese turned pada piseleh */ + 0xa9de, 0xa9df, 'o', /* ꧞-꧟; javanese pada tirta tumetes - javanese pada isen-isen */ + 0xaa5c, 0xaa5f, 'o', /* ꩜-꩟; cham punctuation spiral - cham punctuation triple danda */ + 0xaade, 0xaadf, 'o', /* ꫞-꫟; tai viet symbol ho hoi - tai viet symbol koi koi */ + 0xaaf0, 0xaaf1, 'o', /* ꫰-꫱; meetei mayek cheikhan - meetei mayek ahang khudam */ + 0xfe10, 0xfe16, 's', /* ︐-︖; presentation form for vertical comma - presentation form for vertical question mark */ + 0xfe31, 0xfe32, 'c', /* ︱-︲; presentation form for vertical em dash - presentation form for vertical en dash */ + 0xfe33, 0xfe34, 's', /* ︳-︴; presentation form for vertical low line - presentation form for vertical wavy low line */ + 0xfe45, 0xfe46, 's', /* ﹅-﹆; sesame dot - white sesame dot */ + 0xfe49, 0xfe4c, 'c', /* ﹉-﹌; dashed overline - double wavy overline */ + 0xfe4d, 0xfe4f, 'o', /* ﹍-﹏; dashed low line - wavy low line */ + 0xfe50, 0xfe52, 'o', /* ﹐-﹒; small comma - small full stop */ + 0xfe54, 0xfe57, 'd', /* ﹔-﹗; small semicolon - small exclamation mark */ + 0xfe5f, 0xfe61, 'd', /* ﹟-﹡; small number sign - small asterisk */ + 0xfe6a, 0xfe6b, 'o', /* ﹪-﹫; small percent sign - small commercial at */ + 0xff01, 0xff03, 'o', /* !-#; fullwidth exclamation mark - fullwidth number sign */ + 0xff05, 0xff07, 's', /* %-'; fullwidth percent sign - fullwidth apostrophe */ + 0xff0e, 0xff0f, 'o', /* .-/; fullwidth full stop - fullwidth solidus */ + 0xff1a, 0xff1b, 'o', /* :-;; fullwidth colon - fullwidth semicolon */ + 0xff1f, 0xff20, 's', /* ?-@; fullwidth question mark - fullwidth commercial at */ + 0xff64, 0xff65, 'o', /* 、-・; halfwidth ideographic comma - halfwidth katakana middle dot */ + 0x10100, 0x10102, 'o', /* 𐄀-𐄂; aegean word separator line - aegean check mark */ + 0x10a50, 0x10a58, 'o', /* 𐩐-𐩘; kharoshthi punctuation dot - kharoshthi punctuation lines */ + 0x10b39, 0x10b3f, 'o', /* 𐬹-𐬿; avestan abbreviation mark - large one ring over two rings punctuation */ + 0x11047, 0x1104d, 'o', /* 𑁇-𑁍; brahmi danda - brahmi punctuation lotus */ + 0x110bb, 0x110bc, 'o', /* 𑂻-𑂼; kaithi abbreviation sign - kaithi enumeration sign */ + 0x110be, 0x110c1, 'o', /* 𑂾-𑃁; kaithi section mark - kaithi double danda */ + 0x11140, 0x11143, 'o', /* 𑅀-𑅃; chakma section mark - chakma question mark */ + 0x111c5, 0x111c8, 'o', /* 𑇅-𑇈; sharada danda - sharada separator */ + 0x12470, 0x12473, 'o', /* 𒑰-𒑳; cuneiform punctuation sign old assyrian word divider - cuneiform punctuation sign diagonal tricolon */ +}; + +static +Rune __punct2[] = +{ + 0x0028, 0x0028, 'e', /* (; left parenthesis */ + 0x0029, 0x0029, 'o', /* ); right parenthesis */ + 0x002a, 0x002a, 'o', /* *; asterisk */ + 0x002c, 0x002c, 'd', /* ,; comma */ + 0x002d, 0x002d, 'o', /* -; hyphen-minus */ + 0x005b, 0x005b, 'o', /* [; left square bracket */ + 0x005c, 0x005c, 'e', /* \; reverse solidus */ + 0x005d, 0x005d, 'c', /* ]; right square bracket */ + 0x005f, 0x005f, 's', /* _; low line */ + 0x007b, 0x007b, 'e', /* {; left curly bracket */ + 0x007d, 0x007d, 'o', /* }; right curly bracket */ + 0x00a1, 0x00a1, 'o', /* ¡; inverted exclamation mark */ + 0x00a7, 0x00a7, 'i', /* §; section sign */ + 0x00ab, 0x00ab, 'o', /* «; left-pointing double angle quotation mark */ + 0x00bb, 0x00bb, 'o', /* »; right-pointing double angle quotation mark */ + 0x00bf, 0x00bf, 'o', /* ¿; inverted question mark */ + 0x037e, 0x037e, 'o', /* ;; greek question mark */ + 0x0387, 0x0387, 'o', /* ·; greek ano teleia */ + 0x0589, 0x0589, 'd', /* ։; armenian full stop */ + 0x058a, 0x058a, 'd', /* ֊; armenian hyphen */ + 0x05be, 0x05be, 'o', /* ־; hebrew punctuation maqaf */ + 0x05c0, 0x05c0, 'o', /* ׀; hebrew punctuation paseq */ + 0x05c3, 0x05c3, 'o', /* ׃; hebrew punctuation sof pasuq */ + 0x05c6, 0x05c6, 'o', /* ׆; hebrew punctuation nun hafukha */ + 0x061b, 0x061b, 'o', /* ؛; arabic semicolon */ + 0x06d4, 0x06d4, 'o', /* ۔; arabic full stop */ + 0x085e, 0x085e, 'o', /* ࡞; mandaic punctuation */ + 0x0970, 0x0970, 'o', /* ॰; devanagari abbreviation sign */ + 0x0af0, 0x0af0, 'o', /* ૰; gujarati abbreviation sign */ + 0x0df4, 0x0df4, 'o', /* ෴; sinhala punctuation kunddaliya */ + 0x0e4f, 0x0e4f, 'o', /* ๏; thai character fongman */ + 0x0f14, 0x0f14, 's', /* ༔; tibetan mark gter tsheg */ + 0x0f3a, 0x0f3a, 'e', /* ༺; tibetan mark gug rtags gyon */ + 0x0f3b, 0x0f3b, 's', /* ༻; tibetan mark gug rtags gyas */ + 0x0f3c, 0x0f3c, 'e', /* ༼; tibetan mark ang khang gyon */ + 0x0f3d, 0x0f3d, 'o', /* ༽; tibetan mark ang khang gyas */ + 0x0f85, 0x0f85, 'o', /* ྅; tibetan mark paluta */ + 0x10fb, 0x10fb, 'o', /* ჻; georgian paragraph separator */ + 0x1400, 0x1400, 'o', /* ᐀; canadian syllabics hyphen */ + 0x169b, 0x169b, 'e', /* ᚛; ogham feather mark */ + 0x169c, 0x169c, 'o', /* ᚜; ogham reversed feather mark */ + 0x1806, 0x1806, 'o', /* ᠆; mongolian todo soft hyphen */ + 0x1cd3, 0x1cd3, 'd', /* ᳓; vedic sign nihshvasa */ + 0x2018, 0x2018, 'f', /* ‘; left single quotation mark */ + 0x2019, 0x2019, 's', /* ’; right single quotation mark */ + 0x201a, 0x201a, 'i', /* ‚; single low-9 quotation mark */ + 0x201d, 0x201d, 's', /* ”; right double quotation mark */ + 0x201e, 0x201e, 'i', /* „; double low-9 quotation mark */ + 0x201f, 0x201f, 'o', /* ‟; double high-reversed-9 quotation mark */ + 0x2039, 0x2039, 'f', /* ‹; single left-pointing angle quotation mark */ + 0x203a, 0x203a, 'o', /* ›; single right-pointing angle quotation mark */ + 0x2045, 0x2045, 'e', /* ⁅; left square bracket with quill */ + 0x2046, 0x2046, 'o', /* ⁆; right square bracket with quill */ + 0x2053, 0x2053, 'c', /* ⁓; swung dash */ + 0x2054, 0x2054, 'o', /* ⁔; inverted undertie */ + 0x207d, 0x207d, 'e', /* ⁽; superscript left parenthesis */ + 0x207e, 0x207e, 's', /* ⁾; superscript right parenthesis */ + 0x208d, 0x208d, 'e', /* ₍; subscript left parenthesis */ + 0x208e, 0x208e, 's', /* ₎; subscript right parenthesis */ + 0x2308, 0x2308, 'e', /* ⌈; left ceiling */ + 0x2309, 0x2309, 's', /* ⌉; right ceiling */ + 0x230a, 0x230a, 'e', /* ⌊; left floor */ + 0x230b, 0x230b, 's', /* ⌋; right floor */ + 0x2329, 0x2329, 'e', /* 〈; left-pointing angle bracket */ + 0x232a, 0x232a, 's', /* 〉; right-pointing angle bracket */ + 0x2768, 0x2768, 'e', /* ❨; medium left parenthesis ornament */ + 0x2769, 0x2769, 's', /* ❩; medium right parenthesis ornament */ + 0x276a, 0x276a, 'e', /* ❪; medium flattened left parenthesis ornament */ + 0x276b, 0x276b, 's', /* ❫; medium flattened right parenthesis ornament */ + 0x276c, 0x276c, 'e', /* ❬; medium left-pointing angle bracket ornament */ + 0x276d, 0x276d, 's', /* ❭; medium right-pointing angle bracket ornament */ + 0x276e, 0x276e, 'e', /* ❮; heavy left-pointing angle quotation mark ornament */ + 0x276f, 0x276f, 's', /* ❯; heavy right-pointing angle quotation mark ornament */ + 0x2770, 0x2770, 'e', /* ❰; heavy left-pointing angle bracket ornament */ + 0x2771, 0x2771, 's', /* ❱; heavy right-pointing angle bracket ornament */ + 0x2772, 0x2772, 'e', /* ❲; light left tortoise shell bracket ornament */ + 0x2773, 0x2773, 's', /* ❳; light right tortoise shell bracket ornament */ + 0x2774, 0x2774, 'e', /* ❴; medium left curly bracket ornament */ + 0x2775, 0x2775, 's', /* ❵; medium right curly bracket ornament */ + 0x27c5, 0x27c5, 'e', /* ⟅; left s-shaped bag delimiter */ + 0x27c6, 0x27c6, 's', /* ⟆; right s-shaped bag delimiter */ + 0x27e6, 0x27e6, 'e', /* ⟦; mathematical left white square bracket */ + 0x27e7, 0x27e7, 's', /* ⟧; mathematical right white square bracket */ + 0x27e8, 0x27e8, 'e', /* ⟨; mathematical left angle bracket */ + 0x27e9, 0x27e9, 's', /* ⟩; mathematical right angle bracket */ + 0x27ea, 0x27ea, 'e', /* ⟪; mathematical left double angle bracket */ + 0x27eb, 0x27eb, 's', /* ⟫; mathematical right double angle bracket */ + 0x27ec, 0x27ec, 'e', /* ⟬; mathematical left white tortoise shell bracket */ + 0x27ed, 0x27ed, 's', /* ⟭; mathematical right white tortoise shell bracket */ + 0x27ee, 0x27ee, 'e', /* ⟮; mathematical left flattened parenthesis */ + 0x27ef, 0x27ef, 's', /* ⟯; mathematical right flattened parenthesis */ + 0x2983, 0x2983, 'e', /* ⦃; left white curly bracket */ + 0x2984, 0x2984, 's', /* ⦄; right white curly bracket */ + 0x2985, 0x2985, 'e', /* ⦅; left white parenthesis */ + 0x2986, 0x2986, 's', /* ⦆; right white parenthesis */ + 0x2987, 0x2987, 'e', /* ⦇; z notation left image bracket */ + 0x2988, 0x2988, 's', /* ⦈; z notation right image bracket */ + 0x2989, 0x2989, 'e', /* ⦉; z notation left binding bracket */ + 0x298a, 0x298a, 's', /* ⦊; z notation right binding bracket */ + 0x298b, 0x298b, 'e', /* ⦋; left square bracket with underbar */ + 0x298c, 0x298c, 's', /* ⦌; right square bracket with underbar */ + 0x298d, 0x298d, 'e', /* ⦍; left square bracket with tick in top corner */ + 0x298e, 0x298e, 's', /* ⦎; right square bracket with tick in bottom corner */ + 0x298f, 0x298f, 'e', /* ⦏; left square bracket with tick in bottom corner */ + 0x2990, 0x2990, 's', /* ⦐; right square bracket with tick in top corner */ + 0x2991, 0x2991, 'e', /* ⦑; left angle bracket with dot */ + 0x2992, 0x2992, 's', /* ⦒; right angle bracket with dot */ + 0x2993, 0x2993, 'e', /* ⦓; left arc less-than bracket */ + 0x2994, 0x2994, 's', /* ⦔; right arc greater-than bracket */ + 0x2995, 0x2995, 'e', /* ⦕; double left arc greater-than bracket */ + 0x2996, 0x2996, 's', /* ⦖; double right arc less-than bracket */ + 0x2997, 0x2997, 'e', /* ⦗; left black tortoise shell bracket */ + 0x2998, 0x2998, 's', /* ⦘; right black tortoise shell bracket */ + 0x29d8, 0x29d8, 'e', /* ⧘; left wiggly fence */ + 0x29d9, 0x29d9, 's', /* ⧙; right wiggly fence */ + 0x29da, 0x29da, 'e', /* ⧚; left double wiggly fence */ + 0x29db, 0x29db, 's', /* ⧛; right double wiggly fence */ + 0x29fc, 0x29fc, 'e', /* ⧼; left-pointing curved angle bracket */ + 0x29fd, 0x29fd, 'o', /* ⧽; right-pointing curved angle bracket */ + 0x2d70, 0x2d70, 'o', /* ⵰; tifinagh separator mark */ + 0x2e02, 0x2e02, 'f', /* ⸂; left substitution bracket */ + 0x2e03, 0x2e03, 'i', /* ⸃; right substitution bracket */ + 0x2e04, 0x2e04, 'f', /* ⸄; left dotted substitution bracket */ + 0x2e05, 0x2e05, 'o', /* ⸅; right dotted substitution bracket */ + 0x2e09, 0x2e09, 'f', /* ⸉; left transposition bracket */ + 0x2e0a, 0x2e0a, 'o', /* ⸊; right transposition bracket */ + 0x2e0b, 0x2e0b, 'i', /* ⸋; raised square */ + 0x2e0c, 0x2e0c, 'f', /* ⸌; left raised omission bracket */ + 0x2e0d, 0x2e0d, 'o', /* ⸍; right raised omission bracket */ + 0x2e17, 0x2e17, 'o', /* ⸗; double oblique hyphen */ + 0x2e1a, 0x2e1a, 'o', /* ⸚; hyphen with diaeresis */ + 0x2e1b, 0x2e1b, 'i', /* ⸛; tilde with ring above */ + 0x2e1c, 0x2e1c, 'f', /* ⸜; left low paraphrase bracket */ + 0x2e1d, 0x2e1d, 'o', /* ⸝; right low paraphrase bracket */ + 0x2e20, 0x2e20, 'f', /* ⸠; left vertical bar with quill */ + 0x2e21, 0x2e21, 's', /* ⸡; right vertical bar with quill */ + 0x2e22, 0x2e22, 'e', /* ⸢; top left half bracket */ + 0x2e23, 0x2e23, 's', /* ⸣; top right half bracket */ + 0x2e24, 0x2e24, 'e', /* ⸤; bottom left half bracket */ + 0x2e25, 0x2e25, 's', /* ⸥; bottom right half bracket */ + 0x2e26, 0x2e26, 'e', /* ⸦; left sideways u bracket */ + 0x2e27, 0x2e27, 's', /* ⸧; right sideways u bracket */ + 0x2e28, 0x2e28, 'e', /* ⸨; left double parenthesis */ + 0x2e29, 0x2e29, 'o', /* ⸩; right double parenthesis */ + 0x3008, 0x3008, 'e', /* 〈; left angle bracket */ + 0x3009, 0x3009, 's', /* 〉; right angle bracket */ + 0x300a, 0x300a, 'e', /* 《; left double angle bracket */ + 0x300b, 0x300b, 's', /* 》; right double angle bracket */ + 0x300c, 0x300c, 'e', /* 「; left corner bracket */ + 0x300d, 0x300d, 's', /* 」; right corner bracket */ + 0x300e, 0x300e, 'e', /* 『; left white corner bracket */ + 0x300f, 0x300f, 's', /* 』; right white corner bracket */ + 0x3010, 0x3010, 'e', /* 【; left black lenticular bracket */ + 0x3011, 0x3011, 's', /* 】; right black lenticular bracket */ + 0x3014, 0x3014, 'e', /* 〔; left tortoise shell bracket */ + 0x3015, 0x3015, 's', /* 〕; right tortoise shell bracket */ + 0x3016, 0x3016, 'e', /* 〖; left white lenticular bracket */ + 0x3017, 0x3017, 's', /* 〗; right white lenticular bracket */ + 0x3018, 0x3018, 'e', /* 〘; left white tortoise shell bracket */ + 0x3019, 0x3019, 's', /* 〙; right white tortoise shell bracket */ + 0x301a, 0x301a, 'e', /* 〚; left white square bracket */ + 0x301b, 0x301b, 'd', /* 〛; right white square bracket */ + 0x301c, 0x301c, 's', /* 〜; wave dash */ + 0x301d, 0x301d, 'e', /* 〝; reversed double prime quotation mark */ + 0x3030, 0x3030, 'o', /* 〰; wavy dash */ + 0x303d, 0x303d, 'd', /* 〽; part alternation mark */ + 0x30a0, 0x30a0, 'o', /* ゠; katakana-hiragana double hyphen */ + 0x30fb, 0x30fb, 'o', /* ・; katakana middle dot */ + 0xa673, 0xa673, 'o', /* ꙳; slavonic asterisk */ + 0xa67e, 0xa67e, 'o', /* ꙾; cyrillic kavyka */ + 0xa95f, 0xa95f, 'o', /* ꥟; rejang section mark */ + 0xabeb, 0xabeb, 's', /* ꯫; meetei mayek cheikhei */ + 0xfd3e, 0xfd3e, 'e', /* ﴾; ornate left parenthesis */ + 0xfd3f, 0xfd3f, 'o', /* ﴿; ornate right parenthesis */ + 0xfe17, 0xfe17, 'e', /* ︗; presentation form for vertical left white lenticular bracket */ + 0xfe18, 0xfe18, 'o', /* ︘; presentation form for vertical right white lenticular brakcet */ + 0xfe19, 0xfe19, 'o', /* ︙; presentation form for vertical horizontal ellipsis */ + 0xfe30, 0xfe30, 'd', /* ︰; presentation form for vertical two dot leader */ + 0xfe35, 0xfe35, 'e', /* ︵; presentation form for vertical left parenthesis */ + 0xfe36, 0xfe36, 's', /* ︶; presentation form for vertical right parenthesis */ + 0xfe37, 0xfe37, 'e', /* ︷; presentation form for vertical left curly bracket */ + 0xfe38, 0xfe38, 's', /* ︸; presentation form for vertical right curly bracket */ + 0xfe39, 0xfe39, 'e', /* ︹; presentation form for vertical left tortoise shell bracket */ + 0xfe3a, 0xfe3a, 's', /* ︺; presentation form for vertical right tortoise shell bracket */ + 0xfe3b, 0xfe3b, 'e', /* ︻; presentation form for vertical left black lenticular bracket */ + 0xfe3c, 0xfe3c, 's', /* ︼; presentation form for vertical right black lenticular bracket */ + 0xfe3d, 0xfe3d, 'e', /* ︽; presentation form for vertical left double angle bracket */ + 0xfe3e, 0xfe3e, 's', /* ︾; presentation form for vertical right double angle bracket */ + 0xfe3f, 0xfe3f, 'e', /* ︿; presentation form for vertical left angle bracket */ + 0xfe40, 0xfe40, 's', /* ﹀; presentation form for vertical right angle bracket */ + 0xfe41, 0xfe41, 'e', /* ﹁; presentation form for vertical left corner bracket */ + 0xfe42, 0xfe42, 's', /* ﹂; presentation form for vertical right corner bracket */ + 0xfe43, 0xfe43, 'e', /* ﹃; presentation form for vertical left white corner bracket */ + 0xfe44, 0xfe44, 'o', /* ﹄; presentation form for vertical right white corner bracket */ + 0xfe47, 0xfe47, 'e', /* ﹇; presentation form for vertical left square bracket */ + 0xfe48, 0xfe48, 'o', /* ﹈; presentation form for vertical right square bracket */ + 0xfe58, 0xfe58, 's', /* ﹘; small em dash */ + 0xfe59, 0xfe59, 'e', /* ﹙; small left parenthesis */ + 0xfe5a, 0xfe5a, 's', /* ﹚; small right parenthesis */ + 0xfe5b, 0xfe5b, 'e', /* ﹛; small left curly bracket */ + 0xfe5c, 0xfe5c, 's', /* ﹜; small right curly bracket */ + 0xfe5d, 0xfe5d, 'e', /* ﹝; small left tortoise shell bracket */ + 0xfe5e, 0xfe5e, 'o', /* ﹞; small right tortoise shell bracket */ + 0xfe63, 0xfe63, 'o', /* ﹣; small hyphen-minus */ + 0xfe68, 0xfe68, 'o', /* ﹨; small reverse solidus */ + 0xff08, 0xff08, 'e', /* (; fullwidth left parenthesis */ + 0xff09, 0xff09, 'o', /* ); fullwidth right parenthesis */ + 0xff0a, 0xff0a, 'o', /* *; fullwidth asterisk */ + 0xff0c, 0xff0c, 'd', /* ,; fullwidth comma */ + 0xff0d, 0xff0d, 'o', /* -; fullwidth hyphen-minus */ + 0xff3b, 0xff3b, 'o', /* [; fullwidth left square bracket */ + 0xff3c, 0xff3c, 'e', /* \; fullwidth reverse solidus */ + 0xff3d, 0xff3d, 'c', /* ]; fullwidth right square bracket */ + 0xff3f, 0xff3f, 's', /* _; fullwidth low line */ + 0xff5b, 0xff5b, 'e', /* {; fullwidth left curly bracket */ + 0xff5d, 0xff5d, 's', /* }; fullwidth right curly bracket */ + 0xff5f, 0xff5f, 'e', /* ⦅; fullwidth left white parenthesis */ + 0xff60, 0xff60, 'o', /* ⦆; fullwidth right white parenthesis */ + 0xff61, 0xff61, 's', /* 。; halfwidth ideographic full stop */ + 0xff62, 0xff62, 'e', /* 「; halfwidth left corner bracket */ + 0xff63, 0xff63, 'o', /* 」; halfwidth right corner bracket */ + 0x1039f, 0x1039f, 'o', /* 𐎟; ugaritic word divider */ + 0x103d0, 0x103d0, 'o', /* 𐏐; old persian word divider */ + 0x10857, 0x10857, 'o', /* 𐡗; imperial aramaic section sign */ + 0x1091f, 0x1091f, 'o', /* 𐤟; phoenician word separator */ + 0x1093f, 0x1093f, 'o', /* 𐤿; lydian triangular mark */ + 0x10a7f, 0x10a7f, 'o', /* 𐩿; old south arabian numeric indicator */ + +}; + --- /sys/src/libc/port/mkfile Mon Nov 25 15:42:33 2013 +++ /sys/src/libc/port/mkfile Mon Nov 25 15:42:33 2013 @@ -65,6 +65,7 @@ runecompose.c\ runeclass.c\ runedecompose.c\ + runepunct.c\ runestrcat.c\ runestrchr.c\ runestrcmp.c\ --- /sys/man/2/isalpharune Mon Nov 25 15:42:34 2013 +++ /sys/man/2/isalpharune Mon Nov 25 15:42:35 2013 @@ -28,6 +28,9 @@ int isdigitrune(Rune c) .PP .B +int ispunctrune(Rune c) +.PP +.B Rune tobaserune(Rune c) .PP .B @@ -51,10 +54,22 @@ these routines test types and modify cases for Unicode characters. The names are self-explanatory. +.P +As an extension, +.I ispunctrune +returns the Unicode punctuation type character, +or 0 if the rune is not punctuation. +For example, +.L · +is type +.LR f , +final punctuation. .PP The case-conversion routines return the character unchanged if it has no case. .SH SOURCE .B /sys/src/libc/port/runetype.c +.br +.B /sys/src/libc/port/runepunct.c .SH "SEE ALSO .IR ctype (2) , .IR "The Unicode Standard" . --- /sys/include/libc.h Mon Nov 25 15:42:36 2013 +++ /sys/include/libc.h Mon Nov 25 15:42:37 2013 @@ -83,6 +83,7 @@ extern int isalpharune(Rune); extern int isdigitrune(Rune); extern int islowerrune(Rune); +extern int ispunctrune(Rune); extern int isspacerune(Rune); extern int istitlerune(Rune); extern int isupperrune(Rune); @@ -425,12 +426,25 @@ /* * synchronization */ + typedef struct Lock { int val; } Lock; -extern int _tas(int*); +extern long ainc(long*); +extern long adec(long*); +extern int cas32(u32int *addr, u32int ov, u32int nv); +extern int cas64(u64int *addr, u64int ov, u64int nv); +extern int cas(int *addr, int ov, int nv); +extern int casp(void **addr, void *ov, void *nv); +extern int casl(ulong *addr, ulong ov, ulong nv); +extern u64int fas64(u64int*, u64int); +extern u32int fas32(u32int*, u32int); +extern void* fasp(void**, void*); +extern int _tas(int*); +extern ulong loadlink(ulong*); +extern int storecond(ulong*, ulong); extern void lock(Lock*); extern void unlock(Lock*);