> 6)) . chr(0x80 | ($cp & 0x3f)); if ($cp <= 0xffff) return $cache[$cp] = chr(0xe0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3f)) . chr(0x80 | ($cp & 0x3f)); if ($cp <= 0x10ffff) return $cache[$cp] = chr(0xf0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3f)) . chr(0x80 | (($cp >> 6) & 0x3f)) . chr(0x80 | ($cp & 0x3f)); #U+FFFD REPLACEMENT CHARACTER return $cache[$cp] = "\xEF\xBF\xBD"; } function utf8_html_entity_encode($s) { $table = array_flip(array( #Latin-1 Entities: ' ' => "\xc2\xa0", #no-break space = non-breaking space '¡' => "\xc2\xa1", #inverted exclamation mark '¢' => "\xc2\xa2", #cent sign '£' => "\xc2\xa3", #pound sign '¤' => "\xc2\xa4", #currency sign '¥' => "\xc2\xa5", #yen sign = yuan sign '¦' => "\xc2\xa6", #broken bar = broken vertical bar '§' => "\xc2\xa7", #section sign '¨' => "\xc2\xa8", #diaeresis = spacing diaeresis '©' => "\xc2\xa9", #copyright sign 'ª' => "\xc2\xaa", #feminine ordinal indicator '«' => "\xc2\xab", #left-pointing double angle quotation mark = left pointing guillemet («) '¬' => "\xc2\xac", #not sign '­' => "\xc2\xad", #soft hyphen = discretionary hyphen; #non-breaking hyphen (неразрывный дефис): "\xe2\x80\x91" (U+2011) '®' => "\xc2\xae", #registered sign = registered trade mark sign '¯' => "\xc2\xaf", #macron = spacing macron = overline = APL overbar '°' => "\xc2\xb0", #degree sign '±' => "\xc2\xb1", #plus-minus sign = plus-or-minus sign '²' => "\xc2\xb2", #superscript two = superscript digit two = squared '³' => "\xc2\xb3", #superscript three = superscript digit three = cubed '´' => "\xc2\xb4", #acute accent = spacing acute 'µ' => "\xc2\xb5", #micro sign '¶' => "\xc2\xb6", #pilcrow sign = paragraph sign '·' => "\xc2\xb7", #middle dot = Georgian comma = Greek middle dot '¸' => "\xc2\xb8", #cedilla = spacing cedilla '¹' => "\xc2\xb9", #superscript one = superscript digit one 'º' => "\xc2\xba", #masculine ordinal indicator '»' => "\xc2\xbb", #right-pointing double angle quotation mark = right pointing guillemet (ї) '¼' => "\xc2\xbc", #vulgar fraction one quarter = fraction one quarter '½' => "\xc2\xbd", #vulgar fraction one half = fraction one half '¾' => "\xc2\xbe", #vulgar fraction three quarters = fraction three quarters '¿' => "\xc2\xbf", #inverted question mark = turned question mark #Latin capital letter 'À' => "\xc3\x80", #Latin capital letter A with grave = Latin capital letter A grave 'Á' => "\xc3\x81", #Latin capital letter A with acute 'Â' => "\xc3\x82", #Latin capital letter A with circumflex 'Ã' => "\xc3\x83", #Latin capital letter A with tilde 'Ä' => "\xc3\x84", #Latin capital letter A with diaeresis 'Å' => "\xc3\x85", #Latin capital letter A with ring above = Latin capital letter A ring 'Æ' => "\xc3\x86", #Latin capital letter AE = Latin capital ligature AE 'Ç' => "\xc3\x87", #Latin capital letter C with cedilla 'È' => "\xc3\x88", #Latin capital letter E with grave 'É' => "\xc3\x89", #Latin capital letter E with acute 'Ê' => "\xc3\x8a", #Latin capital letter E with circumflex 'Ë' => "\xc3\x8b", #Latin capital letter E with diaeresis 'Ì' => "\xc3\x8c", #Latin capital letter I with grave 'Í' => "\xc3\x8d", #Latin capital letter I with acute 'Î' => "\xc3\x8e", #Latin capital letter I with circumflex 'Ï' => "\xc3\x8f", #Latin capital letter I with diaeresis 'Ð' => "\xc3\x90", #Latin capital letter ETH 'Ñ' => "\xc3\x91", #Latin capital letter N with tilde 'Ò' => "\xc3\x92", #Latin capital letter O with grave 'Ó' => "\xc3\x93", #Latin capital letter O with acute 'Ô' => "\xc3\x94", #Latin capital letter O with circumflex 'Õ' => "\xc3\x95", #Latin capital letter O with tilde 'Ö' => "\xc3\x96", #Latin capital letter O with diaeresis '×' => "\xc3\x97", #multiplication sign 'Ø' => "\xc3\x98", #Latin capital letter O with stroke = Latin capital letter O slash 'Ù' => "\xc3\x99", #Latin capital letter U with grave 'Ú' => "\xc3\x9a", #Latin capital letter U with acute 'Û' => "\xc3\x9b", #Latin capital letter U with circumflex 'Ü' => "\xc3\x9c", #Latin capital letter U with diaeresis 'Ý' => "\xc3\x9d", #Latin capital letter Y with acute 'Þ' => "\xc3\x9e", #Latin capital letter THORN #Latin small letter 'ß' => "\xc3\x9f", #Latin small letter sharp s = ess-zed 'à' => "\xc3\xa0", #Latin small letter a with grave = Latin small letter a grave 'á' => "\xc3\xa1", #Latin small letter a with acute 'â' => "\xc3\xa2", #Latin small letter a with circumflex 'ã' => "\xc3\xa3", #Latin small letter a with tilde 'ä' => "\xc3\xa4", #Latin small letter a with diaeresis 'å' => "\xc3\xa5", #Latin small letter a with ring above = Latin small letter a ring 'æ' => "\xc3\xa6", #Latin small letter ae = Latin small ligature ae 'ç' => "\xc3\xa7", #Latin small letter c with cedilla 'è' => "\xc3\xa8", #Latin small letter e with grave 'é' => "\xc3\xa9", #Latin small letter e with acute 'ê' => "\xc3\xaa", #Latin small letter e with circumflex 'ë' => "\xc3\xab", #Latin small letter e with diaeresis 'ì' => "\xc3\xac", #Latin small letter i with grave 'í' => "\xc3\xad", #Latin small letter i with acute 'î' => "\xc3\xae", #Latin small letter i with circumflex 'ï' => "\xc3\xaf", #Latin small letter i with diaeresis 'ð' => "\xc3\xb0", #Latin small letter eth 'ñ' => "\xc3\xb1", #Latin small letter n with tilde 'ò' => "\xc3\xb2", #Latin small letter o with grave 'ó' => "\xc3\xb3", #Latin small letter o with acute 'ô' => "\xc3\xb4", #Latin small letter o with circumflex 'õ' => "\xc3\xb5", #Latin small letter o with tilde 'ö' => "\xc3\xb6", #Latin small letter o with diaeresis '÷' => "\xc3\xb7", #division sign 'ø' => "\xc3\xb8", #Latin small letter o with stroke = Latin small letter o slash 'ù' => "\xc3\xb9", #Latin small letter u with grave 'ú' => "\xc3\xba", #Latin small letter u with acute 'û' => "\xc3\xbb", #Latin small letter u with circumflex 'ü' => "\xc3\xbc", #Latin small letter u with diaeresis 'ý' => "\xc3\xbd", #Latin small letter y with acute 'þ' => "\xc3\xbe", #Latin small letter thorn 'ÿ' => "\xc3\xbf", #Latin small letter y with diaeresis #Symbols and Greek Letters: 'ƒ' => "\xc6\x92", #Latin small f with hook = function = florin 'Α' => "\xce\x91", #Greek capital letter alpha 'Β' => "\xce\x92", #Greek capital letter beta 'Γ' => "\xce\x93", #Greek capital letter gamma 'Δ' => "\xce\x94", #Greek capital letter delta 'Ε' => "\xce\x95", #Greek capital letter epsilon 'Ζ' => "\xce\x96", #Greek capital letter zeta 'Η' => "\xce\x97", #Greek capital letter eta 'Θ' => "\xce\x98", #Greek capital letter theta 'Ι' => "\xce\x99", #Greek capital letter iota 'Κ' => "\xce\x9a", #Greek capital letter kappa 'Λ' => "\xce\x9b", #Greek capital letter lambda 'Μ' => "\xce\x9c", #Greek capital letter mu 'Ν' => "\xce\x9d", #Greek capital letter nu 'Ξ' => "\xce\x9e", #Greek capital letter xi 'Ο' => "\xce\x9f", #Greek capital letter omicron 'Π' => "\xce\xa0", #Greek capital letter pi 'Ρ' => "\xce\xa1", #Greek capital letter rho 'Σ' => "\xce\xa3", #Greek capital letter sigma 'Τ' => "\xce\xa4", #Greek capital letter tau 'Υ' => "\xce\xa5", #Greek capital letter upsilon 'Φ' => "\xce\xa6", #Greek capital letter phi 'Χ' => "\xce\xa7", #Greek capital letter chi 'Ψ' => "\xce\xa8", #Greek capital letter psi 'Ω' => "\xce\xa9", #Greek capital letter omega 'α' => "\xce\xb1", #Greek small letter alpha 'β' => "\xce\xb2", #Greek small letter beta 'γ' => "\xce\xb3", #Greek small letter gamma 'δ' => "\xce\xb4", #Greek small letter delta 'ε' => "\xce\xb5", #Greek small letter epsilon 'ζ' => "\xce\xb6", #Greek small letter zeta 'η' => "\xce\xb7", #Greek small letter eta 'θ' => "\xce\xb8", #Greek small letter theta 'ι' => "\xce\xb9", #Greek small letter iota 'κ' => "\xce\xba", #Greek small letter kappa 'λ' => "\xce\xbb", #Greek small letter lambda 'μ' => "\xce\xbc", #Greek small letter mu 'ν' => "\xce\xbd", #Greek small letter nu 'ξ' => "\xce\xbe", #Greek small letter xi 'ο' => "\xce\xbf", #Greek small letter omicron 'π' => "\xcf\x80", #Greek small letter pi 'ρ' => "\xcf\x81", #Greek small letter rho 'ς' => "\xcf\x82", #Greek small letter final sigma 'σ' => "\xcf\x83", #Greek small letter sigma 'τ' => "\xcf\x84", #Greek small letter tau 'υ' => "\xcf\x85", #Greek small letter upsilon 'φ' => "\xcf\x86", #Greek small letter phi 'χ' => "\xcf\x87", #Greek small letter chi 'ψ' => "\xcf\x88", #Greek small letter psi 'ω' => "\xcf\x89", #Greek small letter omega 'ϑ'=> "\xcf\x91", #Greek small letter theta symbol 'ϒ' => "\xcf\x92", #Greek upsilon with hook symbol 'ϖ' => "\xcf\x96", #Greek pi symbol '•' => "\xe2\x80\xa2", #bullet = black small circle '…' => "\xe2\x80\xa6", #horizontal ellipsis = three dot leader '′' => "\xe2\x80\xb2", #prime = minutes = feet (для обозначения минут и футов) '″' => "\xe2\x80\xb3", #double prime = seconds = inches (для обозначения секунд и діймов). '‾' => "\xe2\x80\xbe", #overline = spacing overscore '⁄' => "\xe2\x81\x84", #fraction slash '℘' => "\xe2\x84\x98", #script capital P = power set = Weierstrass p 'ℑ' => "\xe2\x84\x91", #blackletter capital I = imaginary part 'ℜ' => "\xe2\x84\x9c", #blackletter capital R = real part symbol '™' => "\xe2\x84\xa2", #trade mark sign 'ℵ' => "\xe2\x84\xb5", #alef symbol = first transfinite cardinal '←' => "\xe2\x86\x90", #leftwards arrow '↑' => "\xe2\x86\x91", #upwards arrow '→' => "\xe2\x86\x92", #rightwards arrow '↓' => "\xe2\x86\x93", #downwards arrow '↔' => "\xe2\x86\x94", #left right arrow '↵' => "\xe2\x86\xb5", #downwards arrow with corner leftwards = carriage return '⇐' => "\xe2\x87\x90", #leftwards double arrow '⇑' => "\xe2\x87\x91", #upwards double arrow '⇒' => "\xe2\x87\x92", #rightwards double arrow '⇓' => "\xe2\x87\x93", #downwards double arrow '⇔' => "\xe2\x87\x94", #left right double arrow '∀' => "\xe2\x88\x80", #for all '∂' => "\xe2\x88\x82", #partial differential '∃' => "\xe2\x88\x83", #there exists '∅' => "\xe2\x88\x85", #empty set = null set = diameter '∇' => "\xe2\x88\x87", #nabla = backward difference '∈' => "\xe2\x88\x88", #element of '∉' => "\xe2\x88\x89", #not an element of '∋' => "\xe2\x88\x8b", #contains as member '∏' => "\xe2\x88\x8f", #n-ary product = product sign '∑' => "\xe2\x88\x91", #n-ary sumation '−' => "\xe2\x88\x92", #minus sign '∗' => "\xe2\x88\x97", #asterisk operator '√' => "\xe2\x88\x9a", #square root = radical sign '∝' => "\xe2\x88\x9d", #proportional to '∞' => "\xe2\x88\x9e", #infinity '∠' => "\xe2\x88\xa0", #angle '∧' => "\xe2\x88\xa7", #logical and = wedge '∨' => "\xe2\x88\xa8", #logical or = vee '∩' => "\xe2\x88\xa9", #intersection = cap '∪' => "\xe2\x88\xaa", #union = cup '∫' => "\xe2\x88\xab", #integral '∴' => "\xe2\x88\xb4", #therefore '∼' => "\xe2\x88\xbc", #tilde operator = varies with = similar to '≅' => "\xe2\x89\x85", #approximately equal to '≈' => "\xe2\x89\x88", #almost equal to = asymptotic to '≠' => "\xe2\x89\xa0", #not equal to '≡' => "\xe2\x89\xa1", #identical to '≤' => "\xe2\x89\xa4", #less-than or equal to '≥' => "\xe2\x89\xa5", #greater-than or equal to '⊂' => "\xe2\x8a\x82", #subset of '⊃' => "\xe2\x8a\x83", #superset of '⊄' => "\xe2\x8a\x84", #not a subset of '⊆' => "\xe2\x8a\x86", #subset of or equal to '⊇' => "\xe2\x8a\x87", #superset of or equal to '⊕' => "\xe2\x8a\x95", #circled plus = direct sum '⊗' => "\xe2\x8a\x97", #circled times = vector product '⊥' => "\xe2\x8a\xa5", #up tack = orthogonal to = perpendicular '⋅' => "\xe2\x8b\x85", #dot operator '⌈' => "\xe2\x8c\x88", #left ceiling = APL upstile '⌉' => "\xe2\x8c\x89", #right ceiling '⌊' => "\xe2\x8c\x8a", #left floor = APL downstile '⌋' => "\xe2\x8c\x8b", #right floor '⟨' => "\xe2\x8c\xa9", #left-pointing angle bracket = bra '⟩' => "\xe2\x8c\xaa", #right-pointing angle bracket = ket '◊' => "\xe2\x97\x8a", #lozenge '♠' => "\xe2\x99\xa0", #black spade suit '♣' => "\xe2\x99\xa3", #black club suit = shamrock '♥' => "\xe2\x99\xa5", #black heart suit = valentine '♦' => "\xe2\x99\xa6", #black diamond suit #Other Special Characters: 'Œ' => "\xc5\x92", #Latin capital ligature OE 'œ' => "\xc5\x93", #Latin small ligature oe 'Š' => "\xc5\xa0", #Latin capital letter S with caron 'š' => "\xc5\xa1", #Latin small letter s with caron 'Ÿ' => "\xc5\xb8", #Latin capital letter Y with diaeresis 'ˆ' => "\xcb\x86", #modifier letter circumflex accent '˜' => "\xcb\x9c", #small tilde ' ' => "\xe2\x80\x82", #en space ' ' => "\xe2\x80\x83", #em space ' ' => "\xe2\x80\x89", #thin space '‌' => "\xe2\x80\x8c", #zero width non-joiner '‍' => "\xe2\x80\x8d", #zero width joiner '‎' => "\xe2\x80\x8e", #left-to-right mark '‏' => "\xe2\x80\x8f", #right-to-left mark '–' => "\xe2\x80\x93", #en dash '—' => "\xe2\x80\x94", #em dash '‘' => "\xe2\x80\x98", #left single quotation mark '’' => "\xe2\x80\x99", #right single quotation mark (and apostrophe!) '‚' => "\xe2\x80\x9a", #single low-9 quotation mark '“' => "\xe2\x80\x9c", #left double quotation mark '”' => "\xe2\x80\x9d", #right double quotation mark '„' => "\xe2\x80\x9e", #double low-9 quotation mark '†' => "\xe2\x80\xa0", #dagger '‡' => "\xe2\x80\xa1", #double dagger '‰' => "\xe2\x80\xb0", #per mille sign '‹' => "\xe2\x80\xb9", #single left-pointing angle quotation mark '›' => "\xe2\x80\xba", #single right-pointing angle quotation mark '€' => "\xe2\x82\xac" )); $s = str_replace("\x22", '"', $s); //$s = str_replace("\x3c", '<', $s); //$s = str_replace("\x3e", '>', $s); #заменяем utf8-символы на именованные сущности: #оптимизация скорости: заменяем только те символы, которые используются в html коде! preg_match_all('/ [\xc2\xc3\xc5\xc6\xcb\xce\xcf][\x80-\xbf] #2 bytes | \xe2[\x80-\x99][\x82-\xac] #3 bytes /sxSX', $s, $m); foreach (array_unique($m[0]) as $char) { if (array_key_exists($char, $table)) $s = str_replace($char, $table[$char], $s); }#foreach return $s; } /** * Convert all HTML entities to native UTF-8 characters * Функция декодирует гораздо больше именованных сущностей, чем стандартная html_entity_decode() * Все dec и hex сущности так же переводятся в UTF-8. * * Example: '"' or '"' or '"' will be converted to '"'. * * @param string $s * @param bool $is_htmlspecialchars обрабатывать специальные html сущности? (< > & ") * @return string * @link http://www.htmlhelp.com/reference/html40/entities/ * @link http://www.alanwood.net/demos/ent4_frame.html (HTML 4.01 Character Entity References) * @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset1.asp?frame=true * @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset2.asp?frame=true * @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset3.asp?frame=true * * @license http://creativecommons.org/licenses/by-sa/3.0/ * @author Nasibullin Rinat, http://orangetie.ru/ * @charset ANSI * @version 2.1.12 */ function utf8_html_entity_decode($s, $is_htmlspecialchars = false) { #оптимизация скорости if (strlen($s) < 4 #по минимальной длине сущности - 4 байта: &#d; &xx; || ($pos = strpos($s, '&') === false) || strpos($s, ';', $pos) === false) return $s; $table = array( #Latin-1 Entities: ' ' => "\xc2\xa0", #no-break space = non-breaking space '¡' => "\xc2\xa1", #inverted exclamation mark '¢' => "\xc2\xa2", #cent sign '£' => "\xc2\xa3", #pound sign '¤' => "\xc2\xa4", #currency sign '¥' => "\xc2\xa5", #yen sign = yuan sign '¦' => "\xc2\xa6", #broken bar = broken vertical bar '§' => "\xc2\xa7", #section sign '¨' => "\xc2\xa8", #diaeresis = spacing diaeresis '©' => "\xc2\xa9", #copyright sign 'ª' => "\xc2\xaa", #feminine ordinal indicator '«' => "\xc2\xab", #left-pointing double angle quotation mark = left pointing guillemet («) '¬' => "\xc2\xac", #not sign '­' => "\xc2\xad", #soft hyphen = discretionary hyphen '®' => "\xc2\xae", #registered sign = registered trade mark sign '¯' => "\xc2\xaf", #macron = spacing macron = overline = APL overbar '°' => "\xc2\xb0", #degree sign '±' => "\xc2\xb1", #plus-minus sign = plus-or-minus sign '²' => "\xc2\xb2", #superscript two = superscript digit two = squared '³' => "\xc2\xb3", #superscript three = superscript digit three = cubed '´' => "\xc2\xb4", #acute accent = spacing acute 'µ' => "\xc2\xb5", #micro sign '¶' => "\xc2\xb6", #pilcrow sign = paragraph sign '·' => "\xc2\xb7", #middle dot = Georgian comma = Greek middle dot '¸' => "\xc2\xb8", #cedilla = spacing cedilla '¹' => "\xc2\xb9", #superscript one = superscript digit one 'º' => "\xc2\xba", #masculine ordinal indicator '»' => "\xc2\xbb", #right-pointing double angle quotation mark = right pointing guillemet (») '¼' => "\xc2\xbc", #vulgar fraction one quarter = fraction one quarter '½' => "\xc2\xbd", #vulgar fraction one half = fraction one half '¾' => "\xc2\xbe", #vulgar fraction three quarters = fraction three quarters '¿' => "\xc2\xbf", #inverted question mark = turned question mark #Latin capital letter 'À' => "\xc3\x80", #Latin capital letter A with grave = Latin capital letter A grave 'Á' => "\xc3\x81", #Latin capital letter A with acute 'Â' => "\xc3\x82", #Latin capital letter A with circumflex 'Ã' => "\xc3\x83", #Latin capital letter A with tilde 'Ä' => "\xc3\x84", #Latin capital letter A with diaeresis 'Å' => "\xc3\x85", #Latin capital letter A with ring above = Latin capital letter A ring 'Æ' => "\xc3\x86", #Latin capital letter AE = Latin capital ligature AE 'Ç' => "\xc3\x87", #Latin capital letter C with cedilla 'È' => "\xc3\x88", #Latin capital letter E with grave 'É' => "\xc3\x89", #Latin capital letter E with acute 'Ê' => "\xc3\x8a", #Latin capital letter E with circumflex 'Ë' => "\xc3\x8b", #Latin capital letter E with diaeresis 'Ì' => "\xc3\x8c", #Latin capital letter I with grave 'Í' => "\xc3\x8d", #Latin capital letter I with acute 'Î' => "\xc3\x8e", #Latin capital letter I with circumflex 'Ï' => "\xc3\x8f", #Latin capital letter I with diaeresis 'Ð' => "\xc3\x90", #Latin capital letter ETH 'Ñ' => "\xc3\x91", #Latin capital letter N with tilde 'Ò' => "\xc3\x92", #Latin capital letter O with grave 'Ó' => "\xc3\x93", #Latin capital letter O with acute 'Ô' => "\xc3\x94", #Latin capital letter O with circumflex 'Õ' => "\xc3\x95", #Latin capital letter O with tilde 'Ö' => "\xc3\x96", #Latin capital letter O with diaeresis '×' => "\xc3\x97", #multiplication sign 'Ø' => "\xc3\x98", #Latin capital letter O with stroke = Latin capital letter O slash 'Ù' => "\xc3\x99", #Latin capital letter U with grave 'Ú' => "\xc3\x9a", #Latin capital letter U with acute 'Û' => "\xc3\x9b", #Latin capital letter U with circumflex 'Ü' => "\xc3\x9c", #Latin capital letter U with diaeresis 'Ý' => "\xc3\x9d", #Latin capital letter Y with acute 'Þ' => "\xc3\x9e", #Latin capital letter THORN #Latin small letter 'ß' => "\xc3\x9f", #Latin small letter sharp s = ess-zed 'à' => "\xc3\xa0", #Latin small letter a with grave = Latin small letter a grave 'á' => "\xc3\xa1", #Latin small letter a with acute 'â' => "\xc3\xa2", #Latin small letter a with circumflex 'ã' => "\xc3\xa3", #Latin small letter a with tilde 'ä' => "\xc3\xa4", #Latin small letter a with diaeresis 'å' => "\xc3\xa5", #Latin small letter a with ring above = Latin small letter a ring 'æ' => "\xc3\xa6", #Latin small letter ae = Latin small ligature ae 'ç' => "\xc3\xa7", #Latin small letter c with cedilla 'è' => "\xc3\xa8", #Latin small letter e with grave 'é' => "\xc3\xa9", #Latin small letter e with acute 'ê' => "\xc3\xaa", #Latin small letter e with circumflex 'ë' => "\xc3\xab", #Latin small letter e with diaeresis 'ì' => "\xc3\xac", #Latin small letter i with grave 'í' => "\xc3\xad", #Latin small letter i with acute 'î' => "\xc3\xae", #Latin small letter i with circumflex 'ï' => "\xc3\xaf", #Latin small letter i with diaeresis 'ð' => "\xc3\xb0", #Latin small letter eth 'ñ' => "\xc3\xb1", #Latin small letter n with tilde 'ò' => "\xc3\xb2", #Latin small letter o with grave 'ó' => "\xc3\xb3", #Latin small letter o with acute 'ô' => "\xc3\xb4", #Latin small letter o with circumflex 'õ' => "\xc3\xb5", #Latin small letter o with tilde 'ö' => "\xc3\xb6", #Latin small letter o with diaeresis '÷' => "\xc3\xb7", #division sign 'ø' => "\xc3\xb8", #Latin small letter o with stroke = Latin small letter o slash 'ù' => "\xc3\xb9", #Latin small letter u with grave 'ú' => "\xc3\xba", #Latin small letter u with acute 'û' => "\xc3\xbb", #Latin small letter u with circumflex 'ü' => "\xc3\xbc", #Latin small letter u with diaeresis 'ý' => "\xc3\xbd", #Latin small letter y with acute 'þ' => "\xc3\xbe", #Latin small letter thorn 'ÿ' => "\xc3\xbf", #Latin small letter y with diaeresis #Symbols and Greek Letters: 'ƒ' => "\xc6\x92", #Latin small f with hook = function = florin 'Α' => "\xce\x91", #Greek capital letter alpha 'Β' => "\xce\x92", #Greek capital letter beta 'Γ' => "\xce\x93", #Greek capital letter gamma 'Δ' => "\xce\x94", #Greek capital letter delta 'Ε' => "\xce\x95", #Greek capital letter epsilon 'Ζ' => "\xce\x96", #Greek capital letter zeta 'Η' => "\xce\x97", #Greek capital letter eta 'Θ' => "\xce\x98", #Greek capital letter theta 'Ι' => "\xce\x99", #Greek capital letter iota 'Κ' => "\xce\x9a", #Greek capital letter kappa 'Λ' => "\xce\x9b", #Greek capital letter lambda 'Μ' => "\xce\x9c", #Greek capital letter mu 'Ν' => "\xce\x9d", #Greek capital letter nu 'Ξ' => "\xce\x9e", #Greek capital letter xi 'Ο' => "\xce\x9f", #Greek capital letter omicron 'Π' => "\xce\xa0", #Greek capital letter pi 'Ρ' => "\xce\xa1", #Greek capital letter rho 'Σ' => "\xce\xa3", #Greek capital letter sigma 'Τ' => "\xce\xa4", #Greek capital letter tau 'Υ' => "\xce\xa5", #Greek capital letter upsilon 'Φ' => "\xce\xa6", #Greek capital letter phi 'Χ' => "\xce\xa7", #Greek capital letter chi 'Ψ' => "\xce\xa8", #Greek capital letter psi 'Ω' => "\xce\xa9", #Greek capital letter omega 'α' => "\xce\xb1", #Greek small letter alpha 'β' => "\xce\xb2", #Greek small letter beta 'γ' => "\xce\xb3", #Greek small letter gamma 'δ' => "\xce\xb4", #Greek small letter delta 'ε' => "\xce\xb5", #Greek small letter epsilon 'ζ' => "\xce\xb6", #Greek small letter zeta 'η' => "\xce\xb7", #Greek small letter eta 'θ' => "\xce\xb8", #Greek small letter theta 'ι' => "\xce\xb9", #Greek small letter iota 'κ' => "\xce\xba", #Greek small letter kappa 'λ' => "\xce\xbb", #Greek small letter lambda 'μ' => "\xce\xbc", #Greek small letter mu 'ν' => "\xce\xbd", #Greek small letter nu 'ξ' => "\xce\xbe", #Greek small letter xi 'ο' => "\xce\xbf", #Greek small letter omicron 'π' => "\xcf\x80", #Greek small letter pi 'ρ' => "\xcf\x81", #Greek small letter rho 'ς' => "\xcf\x82", #Greek small letter final sigma 'σ' => "\xcf\x83", #Greek small letter sigma 'τ' => "\xcf\x84", #Greek small letter tau 'υ' => "\xcf\x85", #Greek small letter upsilon 'φ' => "\xcf\x86", #Greek small letter phi 'χ' => "\xcf\x87", #Greek small letter chi 'ψ' => "\xcf\x88", #Greek small letter psi 'ω' => "\xcf\x89", #Greek small letter omega 'ϑ'=> "\xcf\x91", #Greek small letter theta symbol 'ϒ' => "\xcf\x92", #Greek upsilon with hook symbol 'ϖ' => "\xcf\x96", #Greek pi symbol '•' => "\xe2\x80\xa2", #bullet = black small circle '…' => "\xe2\x80\xa6", #horizontal ellipsis = three dot leader '′' => "\xe2\x80\xb2", #prime = minutes = feet (для обозначения минут и футов) '″' => "\xe2\x80\xb3", #double prime = seconds = inches (для обозначения секунд и дюймов). '‾' => "\xe2\x80\xbe", #overline = spacing overscore '⁄' => "\xe2\x81\x84", #fraction slash '℘' => "\xe2\x84\x98", #script capital P = power set = Weierstrass p 'ℑ' => "\xe2\x84\x91", #blackletter capital I = imaginary part 'ℜ' => "\xe2\x84\x9c", #blackletter capital R = real part symbol '™' => "\xe2\x84\xa2", #trade mark sign 'ℵ' => "\xe2\x84\xb5", #alef symbol = first transfinite cardinal '←' => "\xe2\x86\x90", #leftwards arrow '↑' => "\xe2\x86\x91", #upwards arrow '→' => "\xe2\x86\x92", #rightwards arrow '↓' => "\xe2\x86\x93", #downwards arrow '↔' => "\xe2\x86\x94", #left right arrow '↵' => "\xe2\x86\xb5", #downwards arrow with corner leftwards = carriage return '⇐' => "\xe2\x87\x90", #leftwards double arrow '⇑' => "\xe2\x87\x91", #upwards double arrow '⇒' => "\xe2\x87\x92", #rightwards double arrow '⇓' => "\xe2\x87\x93", #downwards double arrow '⇔' => "\xe2\x87\x94", #left right double arrow '∀' => "\xe2\x88\x80", #for all '∂' => "\xe2\x88\x82", #partial differential '∃' => "\xe2\x88\x83", #there exists '∅' => "\xe2\x88\x85", #empty set = null set = diameter '∇' => "\xe2\x88\x87", #nabla = backward difference '∈' => "\xe2\x88\x88", #element of '∉' => "\xe2\x88\x89", #not an element of '∋' => "\xe2\x88\x8b", #contains as member '∏' => "\xe2\x88\x8f", #n-ary product = product sign '∑' => "\xe2\x88\x91", #n-ary sumation '−' => "\xe2\x88\x92", #minus sign '∗' => "\xe2\x88\x97", #asterisk operator '√' => "\xe2\x88\x9a", #square root = radical sign '∝' => "\xe2\x88\x9d", #proportional to '∞' => "\xe2\x88\x9e", #infinity '∠' => "\xe2\x88\xa0", #angle '∧' => "\xe2\x88\xa7", #logical and = wedge '∨' => "\xe2\x88\xa8", #logical or = vee '∩' => "\xe2\x88\xa9", #intersection = cap '∪' => "\xe2\x88\xaa", #union = cup '∫' => "\xe2\x88\xab", #integral '∴' => "\xe2\x88\xb4", #therefore '∼' => "\xe2\x88\xbc", #tilde operator = varies with = similar to '≅' => "\xe2\x89\x85", #approximately equal to '≈' => "\xe2\x89\x88", #almost equal to = asymptotic to '≠' => "\xe2\x89\xa0", #not equal to '≡' => "\xe2\x89\xa1", #identical to '≤' => "\xe2\x89\xa4", #less-than or equal to '≥' => "\xe2\x89\xa5", #greater-than or equal to '⊂' => "\xe2\x8a\x82", #subset of '⊃' => "\xe2\x8a\x83", #superset of '⊄' => "\xe2\x8a\x84", #not a subset of '⊆' => "\xe2\x8a\x86", #subset of or equal to '⊇' => "\xe2\x8a\x87", #superset of or equal to '⊕' => "\xe2\x8a\x95", #circled plus = direct sum '⊗' => "\xe2\x8a\x97", #circled times = vector product '⊥' => "\xe2\x8a\xa5", #up tack = orthogonal to = perpendicular '⋅' => "\xe2\x8b\x85", #dot operator '⌈' => "\xe2\x8c\x88", #left ceiling = APL upstile '⌉' => "\xe2\x8c\x89", #right ceiling '⌊' => "\xe2\x8c\x8a", #left floor = APL downstile '⌋' => "\xe2\x8c\x8b", #right floor '⟨' => "\xe2\x8c\xa9", #left-pointing angle bracket = bra '⟩' => "\xe2\x8c\xaa", #right-pointing angle bracket = ket '◊' => "\xe2\x97\x8a", #lozenge '♠' => "\xe2\x99\xa0", #black spade suit '♣' => "\xe2\x99\xa3", #black club suit = shamrock '♥' => "\xe2\x99\xa5", #black heart suit = valentine '♦' => "\xe2\x99\xa6", #black diamond suit #Other Special Characters: 'Œ' => "\xc5\x92", #Latin capital ligature OE 'œ' => "\xc5\x93", #Latin small ligature oe 'Š' => "\xc5\xa0", #Latin capital letter S with caron 'š' => "\xc5\xa1", #Latin small letter s with caron 'Ÿ' => "\xc5\xb8", #Latin capital letter Y with diaeresis 'ˆ' => "\xcb\x86", #modifier letter circumflex accent '˜' => "\xcb\x9c", #small tilde ' ' => "\xe2\x80\x82", #en space ' ' => "\xe2\x80\x83", #em space ' ' => "\xe2\x80\x89", #thin space '‌' => "\xe2\x80\x8c", #zero width non-joiner '‍' => "\xe2\x80\x8d", #zero width joiner '‎' => "\xe2\x80\x8e", #left-to-right mark '‏' => "\xe2\x80\x8f", #right-to-left mark '–' => "\xe2\x80\x93", #en dash '—' => "\xe2\x80\x94", #em dash '‘' => "\xe2\x80\x98", #left single quotation mark '’' => "\xe2\x80\x99", #right single quotation mark (and apostrophe!) '‚' => "\xe2\x80\x9a", #single low-9 quotation mark '“' => "\xe2\x80\x9c", #left double quotation mark '”' => "\xe2\x80\x9d", #right double quotation mark '„' => "\xe2\x80\x9e", #double low-9 quotation mark '†' => "\xe2\x80\xa0", #dagger '‡' => "\xe2\x80\xa1", #double dagger '‰' => "\xe2\x80\xb0", #per mille sign '‹' => "\xe2\x80\xb9", #single left-pointing angle quotation mark '›' => "\xe2\x80\xba", #single right-pointing angle quotation mark '€' => "\xe2\x82\xac", #euro sign ); $htmlspecialchars = array( '"' => "\x22", #quotation mark = APL quote (") " '&' => "\x26", #ampersand (&) & '<' => "\x3c", #less-than sign (<) < '>' => "\x3e", #greater-than sign (>) > ); if ($is_htmlspecialchars) $table += $htmlspecialchars; #заменяем именованные сущности: #оптимизация скорости: заменяем только те сущности, которые используются в html коде! #эта часть кода работает быстрее, чем $s = strtr($s, $table); preg_match_all('/&[a-zA-Z]++\d*+;/sSX', $s, $m, null, $pos); foreach (array_unique($m[0]) as $entity) { if (array_key_exists($entity, $table)) $s = str_replace($entity, $table[$entity], $s); }#foreach if (($pos = strpos($s, '&#')) !== false) #speed optimization { #заменяем числовые dec и hex сущности: $htmlspecialchars_flip = array_flip($htmlspecialchars); $s = preg_replace('/&#((x)[\da-fA-F]{1,6}+|\d{1,7}+);/seS', #1,114,112 sumbols total in UTF-16 '(array_key_exists($char = pack("C", $codepoint = ("$2") ? hexdec("$1") : "$1"), $htmlspecialchars_flip ) && ! $is_htmlspecialchars ) ? $htmlspecialchars_flip[$char] : utf8_chr($codepoint)', $s, -1, $pos); } return $s; } if (!function_exists('mb_str_replace')) { function mb_str_replace($search, $replace, $subject) { if (is_array($subject)) { foreach ($subject as $key => $val) { $subject[$key] = mb_str_replace((string)$search, $replace, $subject[$key]); } return $subject; } $pattern = '/['.preg_quote(implode('', (array)$search), '/').']/u'; if (is_array($search)) { if (is_array($replace)) { $len = min(count($search), count($replace)); $table = array_combine(array_slice($search, 0, $len), array_slice($replace, 0, $len)); $f = create_function('$match', '$table = '.var_export($table, true).'; return array_key_exists($match[0], $table) ? $table[$match[0]] : $match[0];'); $subject = preg_replace_callback($pattern, $f, $subject); return $subject; } } $subject = preg_replace($pattern, (string)$replace, $subject); return $subject; } }?>