653 lines
40 KiB
PHP
653 lines
40 KiB
PHP
|
|
<?
|
|||
|
|
/**
|
|||
|
|
* Converts a UNICODE codepoint to a UTF-8 character
|
|||
|
|
*
|
|||
|
|
* @param int $cp Unicode codepoint
|
|||
|
|
* @return string UTF-8 character
|
|||
|
|
*
|
|||
|
|
* @license http://creativecommons.org/licenses/by-sa/3.0/
|
|||
|
|
* @author Nasibullin Rinat, http://orangetie.ru/
|
|||
|
|
* @charset ANSI
|
|||
|
|
* @version 1.0.0
|
|||
|
|
*/
|
|||
|
|
function utf8_chr($cp) # = utf8_from_unicode() or unicode_to_utf8()
|
|||
|
|
{
|
|||
|
|
static $cache = array();
|
|||
|
|
$cp = intval($cp);
|
|||
|
|
if (array_key_exists($cp, $cache)) return $cache[$cp]; #speed improve
|
|||
|
|
|
|||
|
|
if ($cp <= 0x7f) return $cache[$cp] = chr($cp);
|
|||
|
|
if ($cp <= 0x7ff) return $cache[$cp] = chr(0xc0 | ($cp >> 6)) .
|
|||
|
|
chr(0x80 | ($cp & 0x3f));
|
|||
|
|
if ($cp <= 0xffff) return $cache[$cp] = chr(0xe0 | ($cp >> 12)) .
|
|||
|
|
chr(0x80 | (($cp >> 6) & 0x3f)) .
|
|||
|
|
chr(0x80 | ($cp & 0x3f));
|
|||
|
|
if ($cp <= 0x10ffff) return $cache[$cp] = chr(0xf0 | ($cp >> 18)) .
|
|||
|
|
chr(0x80 | (($cp >> 12) & 0x3f)) .
|
|||
|
|
chr(0x80 | (($cp >> 6) & 0x3f)) .
|
|||
|
|
chr(0x80 | ($cp & 0x3f));
|
|||
|
|
#U+FFFD REPLACEMENT CHARACTER
|
|||
|
|
return $cache[$cp] = "\xEF\xBF\xBD";
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
function utf8_html_entity_encode($s)
|
|||
|
|
{
|
|||
|
|
$table = array_flip(array(
|
|||
|
|
#Latin-1 Entities:
|
|||
|
|
' ' => "\xc2\xa0", #no-break space = non-breaking space
|
|||
|
|
'¡' => "\xc2\xa1", #inverted exclamation mark
|
|||
|
|
'¢' => "\xc2\xa2", #cent sign
|
|||
|
|
'£' => "\xc2\xa3", #pound sign
|
|||
|
|
'¤' => "\xc2\xa4", #currency sign
|
|||
|
|
'¥' => "\xc2\xa5", #yen sign = yuan sign
|
|||
|
|
'¦' => "\xc2\xa6", #broken bar = broken vertical bar
|
|||
|
|
'§' => "\xc2\xa7", #section sign
|
|||
|
|
'¨' => "\xc2\xa8", #diaeresis = spacing diaeresis
|
|||
|
|
'©' => "\xc2\xa9", #copyright sign
|
|||
|
|
'ª' => "\xc2\xaa", #feminine ordinal indicator
|
|||
|
|
'«' => "\xc2\xab", #left-pointing double angle quotation mark = left pointing guillemet («)
|
|||
|
|
'¬' => "\xc2\xac", #not sign
|
|||
|
|
'­' => "\xc2\xad", #soft hyphen = discretionary hyphen;
|
|||
|
|
#non-breaking hyphen (неразрывный дефис): "\xe2\x80\x91" (U+2011)
|
|||
|
|
'®' => "\xc2\xae", #registered sign = registered trade mark sign
|
|||
|
|
'¯' => "\xc2\xaf", #macron = spacing macron = overline = APL overbar
|
|||
|
|
'°' => "\xc2\xb0", #degree sign
|
|||
|
|
'±' => "\xc2\xb1", #plus-minus sign = plus-or-minus sign
|
|||
|
|
'²' => "\xc2\xb2", #superscript two = superscript digit two = squared
|
|||
|
|
'³' => "\xc2\xb3", #superscript three = superscript digit three = cubed
|
|||
|
|
'´' => "\xc2\xb4", #acute accent = spacing acute
|
|||
|
|
'µ' => "\xc2\xb5", #micro sign
|
|||
|
|
'¶' => "\xc2\xb6", #pilcrow sign = paragraph sign
|
|||
|
|
'·' => "\xc2\xb7", #middle dot = Georgian comma = Greek middle dot
|
|||
|
|
'¸' => "\xc2\xb8", #cedilla = spacing cedilla
|
|||
|
|
'¹' => "\xc2\xb9", #superscript one = superscript digit one
|
|||
|
|
'º' => "\xc2\xba", #masculine ordinal indicator
|
|||
|
|
'»' => "\xc2\xbb", #right-pointing double angle quotation mark = right pointing guillemet (ї)
|
|||
|
|
'¼' => "\xc2\xbc", #vulgar fraction one quarter = fraction one quarter
|
|||
|
|
'½' => "\xc2\xbd", #vulgar fraction one half = fraction one half
|
|||
|
|
'¾' => "\xc2\xbe", #vulgar fraction three quarters = fraction three quarters
|
|||
|
|
'¿' => "\xc2\xbf", #inverted question mark = turned question mark
|
|||
|
|
#Latin capital letter
|
|||
|
|
'À' => "\xc3\x80", #Latin capital letter A with grave = Latin capital letter A grave
|
|||
|
|
'Á' => "\xc3\x81", #Latin capital letter A with acute
|
|||
|
|
'Â' => "\xc3\x82", #Latin capital letter A with circumflex
|
|||
|
|
'Ã' => "\xc3\x83", #Latin capital letter A with tilde
|
|||
|
|
'Ä' => "\xc3\x84", #Latin capital letter A with diaeresis
|
|||
|
|
'Å' => "\xc3\x85", #Latin capital letter A with ring above = Latin capital letter A ring
|
|||
|
|
'Æ' => "\xc3\x86", #Latin capital letter AE = Latin capital ligature AE
|
|||
|
|
'Ç' => "\xc3\x87", #Latin capital letter C with cedilla
|
|||
|
|
'È' => "\xc3\x88", #Latin capital letter E with grave
|
|||
|
|
'É' => "\xc3\x89", #Latin capital letter E with acute
|
|||
|
|
'Ê' => "\xc3\x8a", #Latin capital letter E with circumflex
|
|||
|
|
'Ë' => "\xc3\x8b", #Latin capital letter E with diaeresis
|
|||
|
|
'Ì' => "\xc3\x8c", #Latin capital letter I with grave
|
|||
|
|
'Í' => "\xc3\x8d", #Latin capital letter I with acute
|
|||
|
|
'Î' => "\xc3\x8e", #Latin capital letter I with circumflex
|
|||
|
|
'Ï' => "\xc3\x8f", #Latin capital letter I with diaeresis
|
|||
|
|
'Ð' => "\xc3\x90", #Latin capital letter ETH
|
|||
|
|
'Ñ' => "\xc3\x91", #Latin capital letter N with tilde
|
|||
|
|
'Ò' => "\xc3\x92", #Latin capital letter O with grave
|
|||
|
|
'Ó' => "\xc3\x93", #Latin capital letter O with acute
|
|||
|
|
'Ô' => "\xc3\x94", #Latin capital letter O with circumflex
|
|||
|
|
'Õ' => "\xc3\x95", #Latin capital letter O with tilde
|
|||
|
|
'Ö' => "\xc3\x96", #Latin capital letter O with diaeresis
|
|||
|
|
'×' => "\xc3\x97", #multiplication sign
|
|||
|
|
'Ø' => "\xc3\x98", #Latin capital letter O with stroke = Latin capital letter O slash
|
|||
|
|
'Ù' => "\xc3\x99", #Latin capital letter U with grave
|
|||
|
|
'Ú' => "\xc3\x9a", #Latin capital letter U with acute
|
|||
|
|
'Û' => "\xc3\x9b", #Latin capital letter U with circumflex
|
|||
|
|
'Ü' => "\xc3\x9c", #Latin capital letter U with diaeresis
|
|||
|
|
'Ý' => "\xc3\x9d", #Latin capital letter Y with acute
|
|||
|
|
'Þ' => "\xc3\x9e", #Latin capital letter THORN
|
|||
|
|
#Latin small letter
|
|||
|
|
'ß' => "\xc3\x9f", #Latin small letter sharp s = ess-zed
|
|||
|
|
'à' => "\xc3\xa0", #Latin small letter a with grave = Latin small letter a grave
|
|||
|
|
'á' => "\xc3\xa1", #Latin small letter a with acute
|
|||
|
|
'â' => "\xc3\xa2", #Latin small letter a with circumflex
|
|||
|
|
'ã' => "\xc3\xa3", #Latin small letter a with tilde
|
|||
|
|
'ä' => "\xc3\xa4", #Latin small letter a with diaeresis
|
|||
|
|
'å' => "\xc3\xa5", #Latin small letter a with ring above = Latin small letter a ring
|
|||
|
|
'æ' => "\xc3\xa6", #Latin small letter ae = Latin small ligature ae
|
|||
|
|
'ç' => "\xc3\xa7", #Latin small letter c with cedilla
|
|||
|
|
'è' => "\xc3\xa8", #Latin small letter e with grave
|
|||
|
|
'é' => "\xc3\xa9", #Latin small letter e with acute
|
|||
|
|
'ê' => "\xc3\xaa", #Latin small letter e with circumflex
|
|||
|
|
'ë' => "\xc3\xab", #Latin small letter e with diaeresis
|
|||
|
|
'ì' => "\xc3\xac", #Latin small letter i with grave
|
|||
|
|
'í' => "\xc3\xad", #Latin small letter i with acute
|
|||
|
|
'î' => "\xc3\xae", #Latin small letter i with circumflex
|
|||
|
|
'ï' => "\xc3\xaf", #Latin small letter i with diaeresis
|
|||
|
|
'ð' => "\xc3\xb0", #Latin small letter eth
|
|||
|
|
'ñ' => "\xc3\xb1", #Latin small letter n with tilde
|
|||
|
|
'ò' => "\xc3\xb2", #Latin small letter o with grave
|
|||
|
|
'ó' => "\xc3\xb3", #Latin small letter o with acute
|
|||
|
|
'ô' => "\xc3\xb4", #Latin small letter o with circumflex
|
|||
|
|
'õ' => "\xc3\xb5", #Latin small letter o with tilde
|
|||
|
|
'ö' => "\xc3\xb6", #Latin small letter o with diaeresis
|
|||
|
|
'÷' => "\xc3\xb7", #division sign
|
|||
|
|
'ø' => "\xc3\xb8", #Latin small letter o with stroke = Latin small letter o slash
|
|||
|
|
'ù' => "\xc3\xb9", #Latin small letter u with grave
|
|||
|
|
'ú' => "\xc3\xba", #Latin small letter u with acute
|
|||
|
|
'û' => "\xc3\xbb", #Latin small letter u with circumflex
|
|||
|
|
'ü' => "\xc3\xbc", #Latin small letter u with diaeresis
|
|||
|
|
'ý' => "\xc3\xbd", #Latin small letter y with acute
|
|||
|
|
'þ' => "\xc3\xbe", #Latin small letter thorn
|
|||
|
|
'ÿ' => "\xc3\xbf", #Latin small letter y with diaeresis
|
|||
|
|
#Symbols and Greek Letters:
|
|||
|
|
'ƒ' => "\xc6\x92", #Latin small f with hook = function = florin
|
|||
|
|
'Α' => "\xce\x91", #Greek capital letter alpha
|
|||
|
|
'Β' => "\xce\x92", #Greek capital letter beta
|
|||
|
|
'Γ' => "\xce\x93", #Greek capital letter gamma
|
|||
|
|
'Δ' => "\xce\x94", #Greek capital letter delta
|
|||
|
|
'Ε' => "\xce\x95", #Greek capital letter epsilon
|
|||
|
|
'Ζ' => "\xce\x96", #Greek capital letter zeta
|
|||
|
|
'Η' => "\xce\x97", #Greek capital letter eta
|
|||
|
|
'Θ' => "\xce\x98", #Greek capital letter theta
|
|||
|
|
'Ι' => "\xce\x99", #Greek capital letter iota
|
|||
|
|
'Κ' => "\xce\x9a", #Greek capital letter kappa
|
|||
|
|
'Λ' => "\xce\x9b", #Greek capital letter lambda
|
|||
|
|
'Μ' => "\xce\x9c", #Greek capital letter mu
|
|||
|
|
'Ν' => "\xce\x9d", #Greek capital letter nu
|
|||
|
|
'Ξ' => "\xce\x9e", #Greek capital letter xi
|
|||
|
|
'Ο' => "\xce\x9f", #Greek capital letter omicron
|
|||
|
|
'Π' => "\xce\xa0", #Greek capital letter pi
|
|||
|
|
'Ρ' => "\xce\xa1", #Greek capital letter rho
|
|||
|
|
'Σ' => "\xce\xa3", #Greek capital letter sigma
|
|||
|
|
'Τ' => "\xce\xa4", #Greek capital letter tau
|
|||
|
|
'Υ' => "\xce\xa5", #Greek capital letter upsilon
|
|||
|
|
'Φ' => "\xce\xa6", #Greek capital letter phi
|
|||
|
|
'Χ' => "\xce\xa7", #Greek capital letter chi
|
|||
|
|
'Ψ' => "\xce\xa8", #Greek capital letter psi
|
|||
|
|
'Ω' => "\xce\xa9", #Greek capital letter omega
|
|||
|
|
'α' => "\xce\xb1", #Greek small letter alpha
|
|||
|
|
'β' => "\xce\xb2", #Greek small letter beta
|
|||
|
|
'γ' => "\xce\xb3", #Greek small letter gamma
|
|||
|
|
'δ' => "\xce\xb4", #Greek small letter delta
|
|||
|
|
'ε' => "\xce\xb5", #Greek small letter epsilon
|
|||
|
|
'ζ' => "\xce\xb6", #Greek small letter zeta
|
|||
|
|
'η' => "\xce\xb7", #Greek small letter eta
|
|||
|
|
'θ' => "\xce\xb8", #Greek small letter theta
|
|||
|
|
'ι' => "\xce\xb9", #Greek small letter iota
|
|||
|
|
'κ' => "\xce\xba", #Greek small letter kappa
|
|||
|
|
'λ' => "\xce\xbb", #Greek small letter lambda
|
|||
|
|
'μ' => "\xce\xbc", #Greek small letter mu
|
|||
|
|
'ν' => "\xce\xbd", #Greek small letter nu
|
|||
|
|
'ξ' => "\xce\xbe", #Greek small letter xi
|
|||
|
|
'ο' => "\xce\xbf", #Greek small letter omicron
|
|||
|
|
'π' => "\xcf\x80", #Greek small letter pi
|
|||
|
|
'ρ' => "\xcf\x81", #Greek small letter rho
|
|||
|
|
'ς' => "\xcf\x82", #Greek small letter final sigma
|
|||
|
|
'σ' => "\xcf\x83", #Greek small letter sigma
|
|||
|
|
'τ' => "\xcf\x84", #Greek small letter tau
|
|||
|
|
'υ' => "\xcf\x85", #Greek small letter upsilon
|
|||
|
|
'φ' => "\xcf\x86", #Greek small letter phi
|
|||
|
|
'χ' => "\xcf\x87", #Greek small letter chi
|
|||
|
|
'ψ' => "\xcf\x88", #Greek small letter psi
|
|||
|
|
'ω' => "\xcf\x89", #Greek small letter omega
|
|||
|
|
'ϑ'=> "\xcf\x91", #Greek small letter theta symbol
|
|||
|
|
'ϒ' => "\xcf\x92", #Greek upsilon with hook symbol
|
|||
|
|
'ϖ' => "\xcf\x96", #Greek pi symbol
|
|||
|
|
|
|||
|
|
'•' => "\xe2\x80\xa2", #bullet = black small circle
|
|||
|
|
'…' => "\xe2\x80\xa6", #horizontal ellipsis = three dot leader
|
|||
|
|
'′' => "\xe2\x80\xb2", #prime = minutes = feet (для обозначения минут и футов)
|
|||
|
|
'″' => "\xe2\x80\xb3", #double prime = seconds = inches (для обозначения секунд и діймов).
|
|||
|
|
'‾' => "\xe2\x80\xbe", #overline = spacing overscore
|
|||
|
|
'⁄' => "\xe2\x81\x84", #fraction slash
|
|||
|
|
'℘' => "\xe2\x84\x98", #script capital P = power set = Weierstrass p
|
|||
|
|
'ℑ' => "\xe2\x84\x91", #blackletter capital I = imaginary part
|
|||
|
|
'ℜ' => "\xe2\x84\x9c", #blackletter capital R = real part symbol
|
|||
|
|
'™' => "\xe2\x84\xa2", #trade mark sign
|
|||
|
|
'ℵ' => "\xe2\x84\xb5", #alef symbol = first transfinite cardinal
|
|||
|
|
'←' => "\xe2\x86\x90", #leftwards arrow
|
|||
|
|
'↑' => "\xe2\x86\x91", #upwards arrow
|
|||
|
|
'→' => "\xe2\x86\x92", #rightwards arrow
|
|||
|
|
'↓' => "\xe2\x86\x93", #downwards arrow
|
|||
|
|
'↔' => "\xe2\x86\x94", #left right arrow
|
|||
|
|
'↵' => "\xe2\x86\xb5", #downwards arrow with corner leftwards = carriage return
|
|||
|
|
'⇐' => "\xe2\x87\x90", #leftwards double arrow
|
|||
|
|
'⇑' => "\xe2\x87\x91", #upwards double arrow
|
|||
|
|
'⇒' => "\xe2\x87\x92", #rightwards double arrow
|
|||
|
|
'⇓' => "\xe2\x87\x93", #downwards double arrow
|
|||
|
|
'⇔' => "\xe2\x87\x94", #left right double arrow
|
|||
|
|
'∀' => "\xe2\x88\x80", #for all
|
|||
|
|
'∂' => "\xe2\x88\x82", #partial differential
|
|||
|
|
'∃' => "\xe2\x88\x83", #there exists
|
|||
|
|
'∅' => "\xe2\x88\x85", #empty set = null set = diameter
|
|||
|
|
'∇' => "\xe2\x88\x87", #nabla = backward difference
|
|||
|
|
'∈' => "\xe2\x88\x88", #element of
|
|||
|
|
'∉' => "\xe2\x88\x89", #not an element of
|
|||
|
|
'∋' => "\xe2\x88\x8b", #contains as member
|
|||
|
|
'∏' => "\xe2\x88\x8f", #n-ary product = product sign
|
|||
|
|
'∑' => "\xe2\x88\x91", #n-ary sumation
|
|||
|
|
'−' => "\xe2\x88\x92", #minus sign
|
|||
|
|
'∗' => "\xe2\x88\x97", #asterisk operator
|
|||
|
|
'√' => "\xe2\x88\x9a", #square root = radical sign
|
|||
|
|
'∝' => "\xe2\x88\x9d", #proportional to
|
|||
|
|
'∞' => "\xe2\x88\x9e", #infinity
|
|||
|
|
'∠' => "\xe2\x88\xa0", #angle
|
|||
|
|
'∧' => "\xe2\x88\xa7", #logical and = wedge
|
|||
|
|
'∨' => "\xe2\x88\xa8", #logical or = vee
|
|||
|
|
'∩' => "\xe2\x88\xa9", #intersection = cap
|
|||
|
|
'∪' => "\xe2\x88\xaa", #union = cup
|
|||
|
|
'∫' => "\xe2\x88\xab", #integral
|
|||
|
|
'∴' => "\xe2\x88\xb4", #therefore
|
|||
|
|
'∼' => "\xe2\x88\xbc", #tilde operator = varies with = similar to
|
|||
|
|
'≅' => "\xe2\x89\x85", #approximately equal to
|
|||
|
|
'≈' => "\xe2\x89\x88", #almost equal to = asymptotic to
|
|||
|
|
'≠' => "\xe2\x89\xa0", #not equal to
|
|||
|
|
'≡' => "\xe2\x89\xa1", #identical to
|
|||
|
|
'≤' => "\xe2\x89\xa4", #less-than or equal to
|
|||
|
|
'≥' => "\xe2\x89\xa5", #greater-than or equal to
|
|||
|
|
'⊂' => "\xe2\x8a\x82", #subset of
|
|||
|
|
'⊃' => "\xe2\x8a\x83", #superset of
|
|||
|
|
'⊄' => "\xe2\x8a\x84", #not a subset of
|
|||
|
|
'⊆' => "\xe2\x8a\x86", #subset of or equal to
|
|||
|
|
'⊇' => "\xe2\x8a\x87", #superset of or equal to
|
|||
|
|
'⊕' => "\xe2\x8a\x95", #circled plus = direct sum
|
|||
|
|
'⊗' => "\xe2\x8a\x97", #circled times = vector product
|
|||
|
|
'⊥' => "\xe2\x8a\xa5", #up tack = orthogonal to = perpendicular
|
|||
|
|
'⋅' => "\xe2\x8b\x85", #dot operator
|
|||
|
|
'⌈' => "\xe2\x8c\x88", #left ceiling = APL upstile
|
|||
|
|
'⌉' => "\xe2\x8c\x89", #right ceiling
|
|||
|
|
'⌊' => "\xe2\x8c\x8a", #left floor = APL downstile
|
|||
|
|
'⌋' => "\xe2\x8c\x8b", #right floor
|
|||
|
|
'⟨' => "\xe2\x8c\xa9", #left-pointing angle bracket = bra
|
|||
|
|
'⟩' => "\xe2\x8c\xaa", #right-pointing angle bracket = ket
|
|||
|
|
'◊' => "\xe2\x97\x8a", #lozenge
|
|||
|
|
'♠' => "\xe2\x99\xa0", #black spade suit
|
|||
|
|
'♣' => "\xe2\x99\xa3", #black club suit = shamrock
|
|||
|
|
'♥' => "\xe2\x99\xa5", #black heart suit = valentine
|
|||
|
|
'♦' => "\xe2\x99\xa6", #black diamond suit
|
|||
|
|
#Other Special Characters:
|
|||
|
|
'Œ' => "\xc5\x92", #Latin capital ligature OE
|
|||
|
|
'œ' => "\xc5\x93", #Latin small ligature oe
|
|||
|
|
'Š' => "\xc5\xa0", #Latin capital letter S with caron
|
|||
|
|
'š' => "\xc5\xa1", #Latin small letter s with caron
|
|||
|
|
'Ÿ' => "\xc5\xb8", #Latin capital letter Y with diaeresis
|
|||
|
|
'ˆ' => "\xcb\x86", #modifier letter circumflex accent
|
|||
|
|
'˜' => "\xcb\x9c", #small tilde
|
|||
|
|
' ' => "\xe2\x80\x82", #en space
|
|||
|
|
' ' => "\xe2\x80\x83", #em space
|
|||
|
|
' ' => "\xe2\x80\x89", #thin space
|
|||
|
|
'‌' => "\xe2\x80\x8c", #zero width non-joiner
|
|||
|
|
'‍' => "\xe2\x80\x8d", #zero width joiner
|
|||
|
|
'‎' => "\xe2\x80\x8e", #left-to-right mark
|
|||
|
|
'‏' => "\xe2\x80\x8f", #right-to-left mark
|
|||
|
|
'–' => "\xe2\x80\x93", #en dash
|
|||
|
|
'—' => "\xe2\x80\x94", #em dash
|
|||
|
|
'‘' => "\xe2\x80\x98", #left single quotation mark
|
|||
|
|
'’' => "\xe2\x80\x99", #right single quotation mark (and apostrophe!)
|
|||
|
|
'‚' => "\xe2\x80\x9a", #single low-9 quotation mark
|
|||
|
|
'“' => "\xe2\x80\x9c", #left double quotation mark
|
|||
|
|
'”' => "\xe2\x80\x9d", #right double quotation mark
|
|||
|
|
'„' => "\xe2\x80\x9e", #double low-9 quotation mark
|
|||
|
|
'†' => "\xe2\x80\xa0", #dagger
|
|||
|
|
'‡' => "\xe2\x80\xa1", #double dagger
|
|||
|
|
'‰' => "\xe2\x80\xb0", #per mille sign
|
|||
|
|
'‹' => "\xe2\x80\xb9", #single left-pointing angle quotation mark
|
|||
|
|
'›' => "\xe2\x80\xba", #single right-pointing angle quotation mark
|
|||
|
|
'€' => "\xe2\x82\xac"
|
|||
|
|
));
|
|||
|
|
|
|||
|
|
|
|||
|
|
$s = str_replace("\x22", '"', $s);
|
|||
|
|
//$s = str_replace("\x3c", '<', $s);
|
|||
|
|
//$s = str_replace("\x3e", '>', $s);
|
|||
|
|
|
|||
|
|
#заменяем utf8-символы на именованные сущности:
|
|||
|
|
#оптимизация скорости: заменяем только те символы, которые используются в html коде!
|
|||
|
|
preg_match_all('/ [\xc2\xc3\xc5\xc6\xcb\xce\xcf][\x80-\xbf] #2 bytes
|
|||
|
|
| \xe2[\x80-\x99][\x82-\xac] #3 bytes
|
|||
|
|
/sxSX', $s, $m);
|
|||
|
|
foreach (array_unique($m[0]) as $char)
|
|||
|
|
{
|
|||
|
|
if (array_key_exists($char, $table)) $s = str_replace($char, $table[$char], $s);
|
|||
|
|
}#foreach
|
|||
|
|
|
|||
|
|
return $s;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Convert all HTML entities to native UTF-8 characters
|
|||
|
|
* Функция декодирует гораздо больше именованных сущностей, чем стандартная html_entity_decode()
|
|||
|
|
* Все dec и hex сущности так же переводятся в UTF-8.
|
|||
|
|
*
|
|||
|
|
* Example: '"' or '"' or '"' will be converted to '"'.
|
|||
|
|
*
|
|||
|
|
* @param string $s
|
|||
|
|
* @param bool $is_htmlspecialchars обрабатывать специальные html сущности? (< > & ")
|
|||
|
|
* @return string
|
|||
|
|
* @link http://www.htmlhelp.com/reference/html40/entities/
|
|||
|
|
* @link http://www.alanwood.net/demos/ent4_frame.html (HTML 4.01 Character Entity References)
|
|||
|
|
* @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset1.asp?frame=true
|
|||
|
|
* @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset2.asp?frame=true
|
|||
|
|
* @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset3.asp?frame=true
|
|||
|
|
*
|
|||
|
|
* @license http://creativecommons.org/licenses/by-sa/3.0/
|
|||
|
|
* @author Nasibullin Rinat, http://orangetie.ru/
|
|||
|
|
* @charset ANSI
|
|||
|
|
* @version 2.1.12
|
|||
|
|
*/
|
|||
|
|
function utf8_html_entity_decode($s, $is_htmlspecialchars = false)
|
|||
|
|
{
|
|||
|
|
#оптимизация скорости
|
|||
|
|
if (strlen($s) < 4 #по минимальной длине сущности - 4 байта: &#d; &xx;
|
|||
|
|
|| ($pos = strpos($s, '&') === false) || strpos($s, ';', $pos) === false) return $s;
|
|||
|
|
$table = array(
|
|||
|
|
#Latin-1 Entities:
|
|||
|
|
' ' => "\xc2\xa0", #no-break space = non-breaking space
|
|||
|
|
'¡' => "\xc2\xa1", #inverted exclamation mark
|
|||
|
|
'¢' => "\xc2\xa2", #cent sign
|
|||
|
|
'£' => "\xc2\xa3", #pound sign
|
|||
|
|
'¤' => "\xc2\xa4", #currency sign
|
|||
|
|
'¥' => "\xc2\xa5", #yen sign = yuan sign
|
|||
|
|
'¦' => "\xc2\xa6", #broken bar = broken vertical bar
|
|||
|
|
'§' => "\xc2\xa7", #section sign
|
|||
|
|
'¨' => "\xc2\xa8", #diaeresis = spacing diaeresis
|
|||
|
|
'©' => "\xc2\xa9", #copyright sign
|
|||
|
|
'ª' => "\xc2\xaa", #feminine ordinal indicator
|
|||
|
|
'«' => "\xc2\xab", #left-pointing double angle quotation mark = left pointing guillemet («)
|
|||
|
|
'¬' => "\xc2\xac", #not sign
|
|||
|
|
'­' => "\xc2\xad", #soft hyphen = discretionary hyphen
|
|||
|
|
'®' => "\xc2\xae", #registered sign = registered trade mark sign
|
|||
|
|
'¯' => "\xc2\xaf", #macron = spacing macron = overline = APL overbar
|
|||
|
|
'°' => "\xc2\xb0", #degree sign
|
|||
|
|
'±' => "\xc2\xb1", #plus-minus sign = plus-or-minus sign
|
|||
|
|
'²' => "\xc2\xb2", #superscript two = superscript digit two = squared
|
|||
|
|
'³' => "\xc2\xb3", #superscript three = superscript digit three = cubed
|
|||
|
|
'´' => "\xc2\xb4", #acute accent = spacing acute
|
|||
|
|
'µ' => "\xc2\xb5", #micro sign
|
|||
|
|
'¶' => "\xc2\xb6", #pilcrow sign = paragraph sign
|
|||
|
|
'·' => "\xc2\xb7", #middle dot = Georgian comma = Greek middle dot
|
|||
|
|
'¸' => "\xc2\xb8", #cedilla = spacing cedilla
|
|||
|
|
'¹' => "\xc2\xb9", #superscript one = superscript digit one
|
|||
|
|
'º' => "\xc2\xba", #masculine ordinal indicator
|
|||
|
|
'»' => "\xc2\xbb", #right-pointing double angle quotation mark = right pointing guillemet (»)
|
|||
|
|
'¼' => "\xc2\xbc", #vulgar fraction one quarter = fraction one quarter
|
|||
|
|
'½' => "\xc2\xbd", #vulgar fraction one half = fraction one half
|
|||
|
|
'¾' => "\xc2\xbe", #vulgar fraction three quarters = fraction three quarters
|
|||
|
|
'¿' => "\xc2\xbf", #inverted question mark = turned question mark
|
|||
|
|
#Latin capital letter
|
|||
|
|
'À' => "\xc3\x80", #Latin capital letter A with grave = Latin capital letter A grave
|
|||
|
|
'Á' => "\xc3\x81", #Latin capital letter A with acute
|
|||
|
|
'Â' => "\xc3\x82", #Latin capital letter A with circumflex
|
|||
|
|
'Ã' => "\xc3\x83", #Latin capital letter A with tilde
|
|||
|
|
'Ä' => "\xc3\x84", #Latin capital letter A with diaeresis
|
|||
|
|
'Å' => "\xc3\x85", #Latin capital letter A with ring above = Latin capital letter A ring
|
|||
|
|
'Æ' => "\xc3\x86", #Latin capital letter AE = Latin capital ligature AE
|
|||
|
|
'Ç' => "\xc3\x87", #Latin capital letter C with cedilla
|
|||
|
|
'È' => "\xc3\x88", #Latin capital letter E with grave
|
|||
|
|
'É' => "\xc3\x89", #Latin capital letter E with acute
|
|||
|
|
'Ê' => "\xc3\x8a", #Latin capital letter E with circumflex
|
|||
|
|
'Ë' => "\xc3\x8b", #Latin capital letter E with diaeresis
|
|||
|
|
'Ì' => "\xc3\x8c", #Latin capital letter I with grave
|
|||
|
|
'Í' => "\xc3\x8d", #Latin capital letter I with acute
|
|||
|
|
'Î' => "\xc3\x8e", #Latin capital letter I with circumflex
|
|||
|
|
'Ï' => "\xc3\x8f", #Latin capital letter I with diaeresis
|
|||
|
|
'Ð' => "\xc3\x90", #Latin capital letter ETH
|
|||
|
|
'Ñ' => "\xc3\x91", #Latin capital letter N with tilde
|
|||
|
|
'Ò' => "\xc3\x92", #Latin capital letter O with grave
|
|||
|
|
'Ó' => "\xc3\x93", #Latin capital letter O with acute
|
|||
|
|
'Ô' => "\xc3\x94", #Latin capital letter O with circumflex
|
|||
|
|
'Õ' => "\xc3\x95", #Latin capital letter O with tilde
|
|||
|
|
'Ö' => "\xc3\x96", #Latin capital letter O with diaeresis
|
|||
|
|
'×' => "\xc3\x97", #multiplication sign
|
|||
|
|
'Ø' => "\xc3\x98", #Latin capital letter O with stroke = Latin capital letter O slash
|
|||
|
|
'Ù' => "\xc3\x99", #Latin capital letter U with grave
|
|||
|
|
'Ú' => "\xc3\x9a", #Latin capital letter U with acute
|
|||
|
|
'Û' => "\xc3\x9b", #Latin capital letter U with circumflex
|
|||
|
|
'Ü' => "\xc3\x9c", #Latin capital letter U with diaeresis
|
|||
|
|
'Ý' => "\xc3\x9d", #Latin capital letter Y with acute
|
|||
|
|
'Þ' => "\xc3\x9e", #Latin capital letter THORN
|
|||
|
|
#Latin small letter
|
|||
|
|
'ß' => "\xc3\x9f", #Latin small letter sharp s = ess-zed
|
|||
|
|
'à' => "\xc3\xa0", #Latin small letter a with grave = Latin small letter a grave
|
|||
|
|
'á' => "\xc3\xa1", #Latin small letter a with acute
|
|||
|
|
'â' => "\xc3\xa2", #Latin small letter a with circumflex
|
|||
|
|
'ã' => "\xc3\xa3", #Latin small letter a with tilde
|
|||
|
|
'ä' => "\xc3\xa4", #Latin small letter a with diaeresis
|
|||
|
|
'å' => "\xc3\xa5", #Latin small letter a with ring above = Latin small letter a ring
|
|||
|
|
'æ' => "\xc3\xa6", #Latin small letter ae = Latin small ligature ae
|
|||
|
|
'ç' => "\xc3\xa7", #Latin small letter c with cedilla
|
|||
|
|
'è' => "\xc3\xa8", #Latin small letter e with grave
|
|||
|
|
'é' => "\xc3\xa9", #Latin small letter e with acute
|
|||
|
|
'ê' => "\xc3\xaa", #Latin small letter e with circumflex
|
|||
|
|
'ë' => "\xc3\xab", #Latin small letter e with diaeresis
|
|||
|
|
'ì' => "\xc3\xac", #Latin small letter i with grave
|
|||
|
|
'í' => "\xc3\xad", #Latin small letter i with acute
|
|||
|
|
'î' => "\xc3\xae", #Latin small letter i with circumflex
|
|||
|
|
'ï' => "\xc3\xaf", #Latin small letter i with diaeresis
|
|||
|
|
'ð' => "\xc3\xb0", #Latin small letter eth
|
|||
|
|
'ñ' => "\xc3\xb1", #Latin small letter n with tilde
|
|||
|
|
'ò' => "\xc3\xb2", #Latin small letter o with grave
|
|||
|
|
'ó' => "\xc3\xb3", #Latin small letter o with acute
|
|||
|
|
'ô' => "\xc3\xb4", #Latin small letter o with circumflex
|
|||
|
|
'õ' => "\xc3\xb5", #Latin small letter o with tilde
|
|||
|
|
'ö' => "\xc3\xb6", #Latin small letter o with diaeresis
|
|||
|
|
'÷' => "\xc3\xb7", #division sign
|
|||
|
|
'ø' => "\xc3\xb8", #Latin small letter o with stroke = Latin small letter o slash
|
|||
|
|
'ù' => "\xc3\xb9", #Latin small letter u with grave
|
|||
|
|
'ú' => "\xc3\xba", #Latin small letter u with acute
|
|||
|
|
'û' => "\xc3\xbb", #Latin small letter u with circumflex
|
|||
|
|
'ü' => "\xc3\xbc", #Latin small letter u with diaeresis
|
|||
|
|
'ý' => "\xc3\xbd", #Latin small letter y with acute
|
|||
|
|
'þ' => "\xc3\xbe", #Latin small letter thorn
|
|||
|
|
'ÿ' => "\xc3\xbf", #Latin small letter y with diaeresis
|
|||
|
|
#Symbols and Greek Letters:
|
|||
|
|
'ƒ' => "\xc6\x92", #Latin small f with hook = function = florin
|
|||
|
|
'Α' => "\xce\x91", #Greek capital letter alpha
|
|||
|
|
'Β' => "\xce\x92", #Greek capital letter beta
|
|||
|
|
'Γ' => "\xce\x93", #Greek capital letter gamma
|
|||
|
|
'Δ' => "\xce\x94", #Greek capital letter delta
|
|||
|
|
'Ε' => "\xce\x95", #Greek capital letter epsilon
|
|||
|
|
'Ζ' => "\xce\x96", #Greek capital letter zeta
|
|||
|
|
'Η' => "\xce\x97", #Greek capital letter eta
|
|||
|
|
'Θ' => "\xce\x98", #Greek capital letter theta
|
|||
|
|
'Ι' => "\xce\x99", #Greek capital letter iota
|
|||
|
|
'Κ' => "\xce\x9a", #Greek capital letter kappa
|
|||
|
|
'Λ' => "\xce\x9b", #Greek capital letter lambda
|
|||
|
|
'Μ' => "\xce\x9c", #Greek capital letter mu
|
|||
|
|
'Ν' => "\xce\x9d", #Greek capital letter nu
|
|||
|
|
'Ξ' => "\xce\x9e", #Greek capital letter xi
|
|||
|
|
'Ο' => "\xce\x9f", #Greek capital letter omicron
|
|||
|
|
'Π' => "\xce\xa0", #Greek capital letter pi
|
|||
|
|
'Ρ' => "\xce\xa1", #Greek capital letter rho
|
|||
|
|
'Σ' => "\xce\xa3", #Greek capital letter sigma
|
|||
|
|
'Τ' => "\xce\xa4", #Greek capital letter tau
|
|||
|
|
'Υ' => "\xce\xa5", #Greek capital letter upsilon
|
|||
|
|
'Φ' => "\xce\xa6", #Greek capital letter phi
|
|||
|
|
'Χ' => "\xce\xa7", #Greek capital letter chi
|
|||
|
|
'Ψ' => "\xce\xa8", #Greek capital letter psi
|
|||
|
|
'Ω' => "\xce\xa9", #Greek capital letter omega
|
|||
|
|
'α' => "\xce\xb1", #Greek small letter alpha
|
|||
|
|
'β' => "\xce\xb2", #Greek small letter beta
|
|||
|
|
'γ' => "\xce\xb3", #Greek small letter gamma
|
|||
|
|
'δ' => "\xce\xb4", #Greek small letter delta
|
|||
|
|
'ε' => "\xce\xb5", #Greek small letter epsilon
|
|||
|
|
'ζ' => "\xce\xb6", #Greek small letter zeta
|
|||
|
|
'η' => "\xce\xb7", #Greek small letter eta
|
|||
|
|
'θ' => "\xce\xb8", #Greek small letter theta
|
|||
|
|
'ι' => "\xce\xb9", #Greek small letter iota
|
|||
|
|
'κ' => "\xce\xba", #Greek small letter kappa
|
|||
|
|
'λ' => "\xce\xbb", #Greek small letter lambda
|
|||
|
|
'μ' => "\xce\xbc", #Greek small letter mu
|
|||
|
|
'ν' => "\xce\xbd", #Greek small letter nu
|
|||
|
|
'ξ' => "\xce\xbe", #Greek small letter xi
|
|||
|
|
'ο' => "\xce\xbf", #Greek small letter omicron
|
|||
|
|
'π' => "\xcf\x80", #Greek small letter pi
|
|||
|
|
'ρ' => "\xcf\x81", #Greek small letter rho
|
|||
|
|
'ς' => "\xcf\x82", #Greek small letter final sigma
|
|||
|
|
'σ' => "\xcf\x83", #Greek small letter sigma
|
|||
|
|
'τ' => "\xcf\x84", #Greek small letter tau
|
|||
|
|
'υ' => "\xcf\x85", #Greek small letter upsilon
|
|||
|
|
'φ' => "\xcf\x86", #Greek small letter phi
|
|||
|
|
'χ' => "\xcf\x87", #Greek small letter chi
|
|||
|
|
'ψ' => "\xcf\x88", #Greek small letter psi
|
|||
|
|
'ω' => "\xcf\x89", #Greek small letter omega
|
|||
|
|
'ϑ'=> "\xcf\x91", #Greek small letter theta symbol
|
|||
|
|
'ϒ' => "\xcf\x92", #Greek upsilon with hook symbol
|
|||
|
|
'ϖ' => "\xcf\x96", #Greek pi symbol
|
|||
|
|
|
|||
|
|
'•' => "\xe2\x80\xa2", #bullet = black small circle
|
|||
|
|
'…' => "\xe2\x80\xa6", #horizontal ellipsis = three dot leader
|
|||
|
|
'′' => "\xe2\x80\xb2", #prime = minutes = feet (для обозначения минут и футов)
|
|||
|
|
'″' => "\xe2\x80\xb3", #double prime = seconds = inches (для обозначения секунд и дюймов).
|
|||
|
|
'‾' => "\xe2\x80\xbe", #overline = spacing overscore
|
|||
|
|
'⁄' => "\xe2\x81\x84", #fraction slash
|
|||
|
|
'℘' => "\xe2\x84\x98", #script capital P = power set = Weierstrass p
|
|||
|
|
'ℑ' => "\xe2\x84\x91", #blackletter capital I = imaginary part
|
|||
|
|
'ℜ' => "\xe2\x84\x9c", #blackletter capital R = real part symbol
|
|||
|
|
'™' => "\xe2\x84\xa2", #trade mark sign
|
|||
|
|
'ℵ' => "\xe2\x84\xb5", #alef symbol = first transfinite cardinal
|
|||
|
|
'←' => "\xe2\x86\x90", #leftwards arrow
|
|||
|
|
'↑' => "\xe2\x86\x91", #upwards arrow
|
|||
|
|
'→' => "\xe2\x86\x92", #rightwards arrow
|
|||
|
|
'↓' => "\xe2\x86\x93", #downwards arrow
|
|||
|
|
'↔' => "\xe2\x86\x94", #left right arrow
|
|||
|
|
'↵' => "\xe2\x86\xb5", #downwards arrow with corner leftwards = carriage return
|
|||
|
|
'⇐' => "\xe2\x87\x90", #leftwards double arrow
|
|||
|
|
'⇑' => "\xe2\x87\x91", #upwards double arrow
|
|||
|
|
'⇒' => "\xe2\x87\x92", #rightwards double arrow
|
|||
|
|
'⇓' => "\xe2\x87\x93", #downwards double arrow
|
|||
|
|
'⇔' => "\xe2\x87\x94", #left right double arrow
|
|||
|
|
'∀' => "\xe2\x88\x80", #for all
|
|||
|
|
'∂' => "\xe2\x88\x82", #partial differential
|
|||
|
|
'∃' => "\xe2\x88\x83", #there exists
|
|||
|
|
'∅' => "\xe2\x88\x85", #empty set = null set = diameter
|
|||
|
|
'∇' => "\xe2\x88\x87", #nabla = backward difference
|
|||
|
|
'∈' => "\xe2\x88\x88", #element of
|
|||
|
|
'∉' => "\xe2\x88\x89", #not an element of
|
|||
|
|
'∋' => "\xe2\x88\x8b", #contains as member
|
|||
|
|
'∏' => "\xe2\x88\x8f", #n-ary product = product sign
|
|||
|
|
'∑' => "\xe2\x88\x91", #n-ary sumation
|
|||
|
|
'−' => "\xe2\x88\x92", #minus sign
|
|||
|
|
'∗' => "\xe2\x88\x97", #asterisk operator
|
|||
|
|
'√' => "\xe2\x88\x9a", #square root = radical sign
|
|||
|
|
'∝' => "\xe2\x88\x9d", #proportional to
|
|||
|
|
'∞' => "\xe2\x88\x9e", #infinity
|
|||
|
|
'∠' => "\xe2\x88\xa0", #angle
|
|||
|
|
'∧' => "\xe2\x88\xa7", #logical and = wedge
|
|||
|
|
'∨' => "\xe2\x88\xa8", #logical or = vee
|
|||
|
|
'∩' => "\xe2\x88\xa9", #intersection = cap
|
|||
|
|
'∪' => "\xe2\x88\xaa", #union = cup
|
|||
|
|
'∫' => "\xe2\x88\xab", #integral
|
|||
|
|
'∴' => "\xe2\x88\xb4", #therefore
|
|||
|
|
'∼' => "\xe2\x88\xbc", #tilde operator = varies with = similar to
|
|||
|
|
'≅' => "\xe2\x89\x85", #approximately equal to
|
|||
|
|
'≈' => "\xe2\x89\x88", #almost equal to = asymptotic to
|
|||
|
|
'≠' => "\xe2\x89\xa0", #not equal to
|
|||
|
|
'≡' => "\xe2\x89\xa1", #identical to
|
|||
|
|
'≤' => "\xe2\x89\xa4", #less-than or equal to
|
|||
|
|
'≥' => "\xe2\x89\xa5", #greater-than or equal to
|
|||
|
|
'⊂' => "\xe2\x8a\x82", #subset of
|
|||
|
|
'⊃' => "\xe2\x8a\x83", #superset of
|
|||
|
|
'⊄' => "\xe2\x8a\x84", #not a subset of
|
|||
|
|
'⊆' => "\xe2\x8a\x86", #subset of or equal to
|
|||
|
|
'⊇' => "\xe2\x8a\x87", #superset of or equal to
|
|||
|
|
'⊕' => "\xe2\x8a\x95", #circled plus = direct sum
|
|||
|
|
'⊗' => "\xe2\x8a\x97", #circled times = vector product
|
|||
|
|
'⊥' => "\xe2\x8a\xa5", #up tack = orthogonal to = perpendicular
|
|||
|
|
'⋅' => "\xe2\x8b\x85", #dot operator
|
|||
|
|
'⌈' => "\xe2\x8c\x88", #left ceiling = APL upstile
|
|||
|
|
'⌉' => "\xe2\x8c\x89", #right ceiling
|
|||
|
|
'⌊' => "\xe2\x8c\x8a", #left floor = APL downstile
|
|||
|
|
'⌋' => "\xe2\x8c\x8b", #right floor
|
|||
|
|
'⟨' => "\xe2\x8c\xa9", #left-pointing angle bracket = bra
|
|||
|
|
'⟩' => "\xe2\x8c\xaa", #right-pointing angle bracket = ket
|
|||
|
|
'◊' => "\xe2\x97\x8a", #lozenge
|
|||
|
|
'♠' => "\xe2\x99\xa0", #black spade suit
|
|||
|
|
'♣' => "\xe2\x99\xa3", #black club suit = shamrock
|
|||
|
|
'♥' => "\xe2\x99\xa5", #black heart suit = valentine
|
|||
|
|
'♦' => "\xe2\x99\xa6", #black diamond suit
|
|||
|
|
#Other Special Characters:
|
|||
|
|
'Œ' => "\xc5\x92", #Latin capital ligature OE
|
|||
|
|
'œ' => "\xc5\x93", #Latin small ligature oe
|
|||
|
|
'Š' => "\xc5\xa0", #Latin capital letter S with caron
|
|||
|
|
'š' => "\xc5\xa1", #Latin small letter s with caron
|
|||
|
|
'Ÿ' => "\xc5\xb8", #Latin capital letter Y with diaeresis
|
|||
|
|
'ˆ' => "\xcb\x86", #modifier letter circumflex accent
|
|||
|
|
'˜' => "\xcb\x9c", #small tilde
|
|||
|
|
' ' => "\xe2\x80\x82", #en space
|
|||
|
|
' ' => "\xe2\x80\x83", #em space
|
|||
|
|
' ' => "\xe2\x80\x89", #thin space
|
|||
|
|
'‌' => "\xe2\x80\x8c", #zero width non-joiner
|
|||
|
|
'‍' => "\xe2\x80\x8d", #zero width joiner
|
|||
|
|
'‎' => "\xe2\x80\x8e", #left-to-right mark
|
|||
|
|
'‏' => "\xe2\x80\x8f", #right-to-left mark
|
|||
|
|
'–' => "\xe2\x80\x93", #en dash
|
|||
|
|
'—' => "\xe2\x80\x94", #em dash
|
|||
|
|
'‘' => "\xe2\x80\x98", #left single quotation mark
|
|||
|
|
'’' => "\xe2\x80\x99", #right single quotation mark (and apostrophe!)
|
|||
|
|
'‚' => "\xe2\x80\x9a", #single low-9 quotation mark
|
|||
|
|
'“' => "\xe2\x80\x9c", #left double quotation mark
|
|||
|
|
'”' => "\xe2\x80\x9d", #right double quotation mark
|
|||
|
|
'„' => "\xe2\x80\x9e", #double low-9 quotation mark
|
|||
|
|
'†' => "\xe2\x80\xa0", #dagger
|
|||
|
|
'‡' => "\xe2\x80\xa1", #double dagger
|
|||
|
|
'‰' => "\xe2\x80\xb0", #per mille sign
|
|||
|
|
'‹' => "\xe2\x80\xb9", #single left-pointing angle quotation mark
|
|||
|
|
'›' => "\xe2\x80\xba", #single right-pointing angle quotation mark
|
|||
|
|
'€' => "\xe2\x82\xac", #euro sign
|
|||
|
|
);
|
|||
|
|
$htmlspecialchars = array(
|
|||
|
|
'"' => "\x22", #quotation mark = APL quote (") "
|
|||
|
|
'&' => "\x26", #ampersand (&) &
|
|||
|
|
'<' => "\x3c", #less-than sign (<) <
|
|||
|
|
'>' => "\x3e", #greater-than sign (>) >
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
if ($is_htmlspecialchars) $table += $htmlspecialchars;
|
|||
|
|
|
|||
|
|
#заменяем именованные сущности:
|
|||
|
|
#оптимизация скорости: заменяем только те сущности, которые используются в html коде!
|
|||
|
|
#эта часть кода работает быстрее, чем $s = strtr($s, $table);
|
|||
|
|
preg_match_all('/&[a-zA-Z]++\d*+;/sSX', $s, $m, null, $pos);
|
|||
|
|
foreach (array_unique($m[0]) as $entity)
|
|||
|
|
{
|
|||
|
|
if (array_key_exists($entity, $table)) $s = str_replace($entity, $table[$entity], $s);
|
|||
|
|
}#foreach
|
|||
|
|
|
|||
|
|
if (($pos = strpos($s, '&#')) !== false) #speed optimization
|
|||
|
|
{
|
|||
|
|
#заменяем числовые dec и hex сущности:
|
|||
|
|
$htmlspecialchars_flip = array_flip($htmlspecialchars);
|
|||
|
|
$s = preg_replace('/&#((x)[\da-fA-F]{1,6}+|\d{1,7}+);/seS', #1,114,112 sumbols total in UTF-16
|
|||
|
|
'(array_key_exists($char = pack("C", $codepoint = ("$2") ? hexdec("$1") : "$1"),
|
|||
|
|
$htmlspecialchars_flip
|
|||
|
|
)
|
|||
|
|
&& ! $is_htmlspecialchars
|
|||
|
|
) ? $htmlspecialchars_flip[$char]
|
|||
|
|
: utf8_chr($codepoint)', $s, -1, $pos);
|
|||
|
|
}
|
|||
|
|
return $s;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
if (!function_exists('mb_str_replace')) {
|
|||
|
|
function mb_str_replace($search, $replace, $subject) {
|
|||
|
|
if (is_array($subject)) {
|
|||
|
|
foreach ($subject as $key => $val) {
|
|||
|
|
$subject[$key] = mb_str_replace((string)$search, $replace, $subject[$key]);
|
|||
|
|
}
|
|||
|
|
return $subject;
|
|||
|
|
}
|
|||
|
|
$pattern = '/['.preg_quote(implode('', (array)$search), '/').']/u';
|
|||
|
|
if (is_array($search)) {
|
|||
|
|
if (is_array($replace)) {
|
|||
|
|
$len = min(count($search), count($replace));
|
|||
|
|
$table = array_combine(array_slice($search, 0, $len), array_slice($replace, 0, $len));
|
|||
|
|
$f = create_function('$match', '$table = '.var_export($table, true).'; return array_key_exists($match[0], $table) ? $table[$match[0]] : $match[0];');
|
|||
|
|
$subject = preg_replace_callback($pattern, $f, $subject);
|
|||
|
|
return $subject;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
$subject = preg_replace($pattern, (string)$replace, $subject);
|
|||
|
|
return $subject;
|
|||
|
|
}
|
|||
|
|
}?>
|