Allerdings hatte ich Probleme mit Sonderzeichen. Hier die Lösung:
Code: Alles auswählen
/* html_convert_entities($string) -- convert named HTML entities to
* XML-compatible numeric entities.
*/
function html_convert_entities($string) {
return preg_replace_callback('/&([a-zA-Z][a-zA-Z0-9]+);/S',
'convert_entity', $string);
}
/* Swap HTML named entity with its numeric equivalent. If the entity
* isn't in the lookup table, this function returns a blank, which
* destroys the character in the output - this is probably the
* desired behaviour when producing XML. */
function convert_entity($matches) {
static $table = array('quot' => '"',
'amp' => '&',
'lt' => '<',
'gt' => '>',
'OElig' => 'Œ',
'oelig' => 'œ',
'Scaron' => 'Š',
'scaron' => 'š',
'Yuml' => 'Ÿ',
'circ' => 'ˆ',
'tilde' => '˜',
'ensp' => ' ',
'emsp' => ' ',
'thinsp' => ' ',
'zwnj' => '‌',
'zwj' => '‍',
'lrm' => '‎',
'rlm' => '‏',
'ndash' => '–',
'mdash' => '—',
'lsquo' => '‘',
'rsquo' => '’',
'sbquo' => '‚',
'ldquo' => '“',
'rdquo' => '”',
'bdquo' => '„',
'dagger' => '†',
'Dagger' => '‡',
'permil' => '‰',
'lsaquo' => '‹',
'rsaquo' => '›',
'euro' => '€',
'fnof' => 'ƒ',
'Alpha' => 'Α',
'Beta' => 'Β',
'Gamma' => 'Γ',
'Delta' => 'Δ',
'Epsilon' => 'Ε',
'Zeta' => 'Ζ',
'Eta' => 'Η',
'Theta' => 'Θ',
'Iota' => 'Ι',
'Kappa' => 'Κ',
'Lambda' => 'Λ',
'Mu' => 'Μ',
'Nu' => 'Ν',
'Xi' => 'Ξ',
'Omicron' => 'Ο',
'Pi' => 'Π',
'Rho' => 'Ρ',
'Sigma' => 'Σ',
'Tau' => 'Τ',
'Upsilon' => 'Υ',
'Phi' => 'Φ',
'Chi' => 'Χ',
'Psi' => 'Ψ',
'Omega' => 'Ω',
'alpha' => 'α',
'beta' => 'β',
'gamma' => 'γ',
'delta' => 'δ',
'epsilon' => 'ε',
'zeta' => 'ζ',
'eta' => 'η',
'theta' => 'θ',
'iota' => 'ι',
'kappa' => 'κ',
'lambda' => 'λ',
'mu' => 'μ',
'nu' => 'ν',
'xi' => 'ξ',
'omicron' => 'ο',
'pi' => 'π',
'rho' => 'ρ',
'sigmaf' => 'ς',
'sigma' => 'σ',
'tau' => 'τ',
'upsilon' => 'υ',
'phi' => 'φ',
'chi' => 'χ',
'psi' => 'ψ',
'omega' => 'ω',
'thetasym' => 'ϑ',
'upsih' => 'ϒ',
'piv' => 'ϖ',
'bull' => '•',
'hellip' => '…',
'prime' => '′',
'Prime' => '″',
'oline' => '‾',
'frasl' => '⁄',
'weierp' => '℘',
'image' => 'ℑ',
'real' => 'ℜ',
'trade' => '™',
'alefsym' => 'ℵ',
'larr' => '←',
'uarr' => '↑',
'rarr' => '→',
'darr' => '↓',
'harr' => '↔',
'crarr' => '↵',
'lArr' => '⇐',
'uArr' => '⇑',
'rArr' => '⇒',
'dArr' => '⇓',
'hArr' => '⇔',
'forall' => '∀',
'part' => '∂',
'exist' => '∃',
'empty' => '∅',
'nabla' => '∇',
'isin' => '∈',
'notin' => '∉',
'ni' => '∋',
'prod' => '∏',
'sum' => '∑',
'minus' => '−',
'lowast' => '∗',
'radic' => '√',
'prop' => '∝',
'infin' => '∞',
'ang' => '∠',
'and' => '∧',
'or' => '∨',
'cap' => '∩',
'cup' => '∪',
'int' => '∫',
'there4' => '∴',
'sim' => '∼',
'cong' => '≅',
'asymp' => '≈',
'ne' => '≠',
'equiv' => '≡',
'le' => '≤',
'ge' => '≥',
'sub' => '⊂',
'sup' => '⊃',
'nsub' => '⊄',
'sube' => '⊆',
'supe' => '⊇',
'oplus' => '⊕',
'otimes' => '⊗',
'perp' => '⊥',
'sdot' => '⋅',
'lceil' => '⌈',
'rceil' => '⌉',
'lfloor' => '⌊',
'rfloor' => '⌋',
'lang' => '〈',
'rang' => '〉',
'loz' => '◊',
'spades' => '♠',
'clubs' => '♣',
'hearts' => '♥',
'diams' => '♦',
'nbsp' => ' ',
'iexcl' => '¡',
'cent' => '¢',
'pound' => '£',
'curren' => '¤',
'yen' => '¥',
'brvbar' => '¦',
'sect' => '§',
'uml' => '¨',
'copy' => '©',
'ordf' => 'ª',
'laquo' => '«',
'not' => '¬',
'shy' => '­',
'reg' => '®',
'macr' => '¯',
'deg' => '°',
'plusmn' => '±',
'sup2' => '²',
'sup3' => '³',
'acute' => '´',
'micro' => 'µ',
'para' => '¶',
'middot' => '·',
'cedil' => '¸',
'sup1' => '¹',
'ordm' => 'º',
'raquo' => '»',
'frac14' => '¼',
'frac12' => '½',
'frac34' => '¾',
'iquest' => '¿',
'Agrave' => 'À',
'Aacute' => 'Á',
'Acirc' => 'Â',
'Atilde' => 'Ã',
'Auml' => 'Ä',
'Aring' => 'Å',
'AElig' => 'Æ',
'Ccedil' => 'Ç',
'Egrave' => 'È',
'Eacute' => 'É',
'Ecirc' => 'Ê',
'Euml' => 'Ë',
'Igrave' => 'Ì',
'Iacute' => 'Í',
'Icirc' => 'Î',
'Iuml' => 'Ï',
'ETH' => 'Ð',
'Ntilde' => 'Ñ',
'Ograve' => 'Ò',
'Oacute' => 'Ó',
'Ocirc' => 'Ô',
'Otilde' => 'Õ',
'Ouml' => 'Ö',
'times' => '×',
'Oslash' => 'Ø',
'Ugrave' => 'Ù',
'Uacute' => 'Ú',
'Ucirc' => 'Û',
'Uuml' => 'Ü',
'Yacute' => 'Ý',
'THORN' => 'Þ',
'szlig' => 'ß',
'agrave' => 'à',
'aacute' => 'á',
'acirc' => 'â',
'atilde' => 'ã',
'auml' => 'ä',
'aring' => 'å',
'aelig' => 'æ',
'ccedil' => 'ç',
'egrave' => 'è',
'eacute' => 'é',
'ecirc' => 'ê',
'euml' => 'ë',
'igrave' => 'ì',
'iacute' => 'í',
'icirc' => 'î',
'iuml' => 'ï',
'eth' => 'ð',
'ntilde' => 'ñ',
'ograve' => 'ò',
'oacute' => 'ó',
'ocirc' => 'ô',
'otilde' => 'õ',
'ouml' => 'ö',
'divide' => '÷',
'oslash' => 'ø',
'ugrave' => 'ù',
'uacute' => 'ú',
'ucirc' => 'û',
'uuml' => 'ü',
'yacute' => 'ý',
'thorn' => 'þ',
'yuml' => 'ÿ'
);
// Entity not found? Destroy it.
return isset($table[$matches[1]]) ? $table[$matches[1]] : '';
}
Diese Funktionen ersetzen die html-Entities, die in RSS-Feeds nicht vorgesehen sind, durch die erlaubten numerischen Entities.