3 * HTML/XHTML filter that only allows some elements and attributes
5 * Added wp_ prefix to avoid conflicts with existing kses users
8 * @copyright (C) 2002, 2003, 2005
9 * @author Ulf Harnhammar <metaur@users.sourceforge.net>
15 * *** CONTACT INFORMATION ***
16 * E-mail: metaur at users dot sourceforge dot net
17 * Web page: http://sourceforge.net/projects/kses
18 * Paper mail: Ulf Harnhammar
23 * [kses strips evil scripts!]
27 * You can override this in a plugin.
31 if ( ! defined( 'CUSTOM_TAGS' ) )
32 define( 'CUSTOM_TAGS', false );
34 if ( ! CUSTOM_TAGS ) {
36 * Kses global for default allowable HTML tags.
38 * Can be override by using CUSTOM_TAGS constant.
40 * @global array $allowedposttags
43 $allowedposttags = array(
65 'xml:lang' => array(),
73 'xml:lang' => array(),
77 'blockquote' => array(
82 'xml:lang' => array()),
86 'disabled' => array (),
103 'charoff' => array (),
107 'valign' => array (),
108 'width' => array ()),
110 'datetime' => array ()),
119 'xml:lang' => array(),
127 'xml:lang' => array()),
131 'fieldset' => array(),
138 'xml:lang' => array(),
140 'figcaption' => array(
146 'xml:lang' => array(),
158 'xml:lang' => array(),
161 'action' => array (),
162 'accept' => array (),
163 'accept-charset' => array (),
164 'enctype' => array (),
165 'method' => array (),
167 'target' => array ()),
172 'style' => array ()),
177 'style' => array ()),
182 'style' => array ()),
187 'style' => array ()),
192 'style' => array ()),
197 'style' => array ()),
204 'xml:lang' => array(),
212 'xml:lang' => array(),
217 'noshade' => array (),
219 'width' => array ()),
224 'border' => array (),
226 'height' => array (),
227 'hspace' => array (),
228 'longdesc' => array (),
229 'vspace' => array (),
232 'width' => array ()),
234 'datetime' => array (),
240 'align' => array ()),
243 'class' => array ()),
254 'xml:lang' => array(),
262 'xml:lang' => array()),
265 'width' => array ()),
276 'xml:lang' => array()),
283 'xml:lang' => array(),
294 'xml:lang' => array(),
299 'bgcolor' => array (),
300 'border' => array (),
301 'cellpadding' => array (),
302 'cellspacing' => array (),
308 'summary' => array (),
309 'width' => array ()),
313 'charoff' => array (),
314 'valign' => array ()),
319 'bgcolor' => array (),
321 'charoff' => array (),
323 'colspan' => array (),
325 'headers' => array (),
326 'height' => array (),
327 'nowrap' => array (),
328 'rowspan' => array (),
331 'valign' => array (),
332 'width' => array ()),
336 'disabled' => array (),
338 'readonly' => array ()),
343 'charoff' => array (),
344 'valign' => array ()),
349 'bgcolor' => array (),
351 'charoff' => array (),
353 'colspan' => array (),
354 'headers' => array (),
355 'height' => array (),
356 'nowrap' => array (),
357 'rowspan' => array (),
359 'valign' => array (),
360 'width' => array ()),
364 'charoff' => array (),
366 'valign' => array ()),
370 'bgcolor' => array (),
372 'charoff' => array (),
375 'valign' => array ()),
390 * Kses allowed HTML elements.
392 * @global array $allowedtags
395 $allowedtags = array(
398 'title' => array ()),
400 'title' => array ()),
402 'title' => array ()),
404 'blockquote' => array(
410 'datetime' => array ()),
414 'em' => array (), 'i' => array (),
415 // 'ins' => array('datetime' => array(), 'cite' => array()),
429 $allowedentitynames = array(
430 'nbsp', 'iexcl', 'cent', 'pound', 'curren', 'yen',
431 'brvbar', 'sect', 'uml', 'copy', 'ordf', 'laquo',
432 'not', 'shy', 'reg', 'macr', 'deg', 'plusmn',
433 'acute', 'micro', 'para', 'middot', 'cedil', 'ordm',
434 'raquo', 'iquest', 'Agrave', 'Aacute', 'Acirc', 'Atilde',
435 'Auml', 'Aring', 'AElig', 'Ccedil', 'Egrave', 'Eacute',
436 'Ecirc', 'Euml', 'Igrave', 'Iacute', 'Icirc', 'Iuml',
437 'ETH', 'Ntilde', 'Ograve', 'Oacute', 'Ocirc', 'Otilde',
438 'Ouml', 'times', 'Oslash', 'Ugrave', 'Uacute', 'Ucirc',
439 'Uuml', 'Yacute', 'THORN', 'szlig', 'agrave', 'aacute',
440 'acirc', 'atilde', 'auml', 'aring', 'aelig', 'ccedil',
441 'egrave', 'eacute', 'ecirc', 'euml', 'igrave', 'iacute',
442 'icirc', 'iuml', 'eth', 'ntilde', 'ograve', 'oacute',
443 'ocirc', 'otilde', 'ouml', 'divide', 'oslash', 'ugrave',
444 'uacute', 'ucirc', 'uuml', 'yacute', 'thorn', 'yuml',
445 'quot', 'amp', 'lt', 'gt', 'apos', 'OElig',
446 'oelig', 'Scaron', 'scaron', 'Yuml', 'circ', 'tilde',
447 'ensp', 'emsp', 'thinsp', 'zwnj', 'zwj', 'lrm',
448 'rlm', 'ndash', 'mdash', 'lsquo', 'rsquo', 'sbquo',
449 'ldquo', 'rdquo', 'bdquo', 'dagger', 'Dagger', 'permil',
450 'lsaquo', 'rsaquo', 'euro', 'fnof', 'Alpha', 'Beta',
451 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta',
452 'Iota', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Xi',
453 'Omicron', 'Pi', 'Rho', 'Sigma', 'Tau', 'Upsilon',
454 'Phi', 'Chi', 'Psi', 'Omega', 'alpha', 'beta',
455 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta',
456 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi',
457 'omicron', 'pi', 'rho', 'sigmaf', 'sigma', 'tau',
458 'upsilon', 'phi', 'chi', 'psi', 'omega', 'thetasym',
459 'upsih', 'piv', 'bull', 'hellip', 'prime', 'Prime',
460 'oline', 'frasl', 'weierp', 'image', 'real', 'trade',
461 'alefsym', 'larr', 'uarr', 'rarr', 'darr', 'harr',
462 'crarr', 'lArr', 'uArr', 'rArr', 'dArr', 'hArr',
463 'forall', 'part', 'exist', 'empty', 'nabla', 'isin',
464 'notin', 'ni', 'prod', 'sum', 'minus', 'lowast',
465 'radic', 'prop', 'infin', 'ang', 'and', 'or',
466 'cap', 'cup', 'int', 'sim', 'cong', 'asymp',
467 'ne', 'equiv', 'le', 'ge', 'sub', 'sup',
468 'nsub', 'sube', 'supe', 'oplus', 'otimes', 'perp',
469 'sdot', 'lceil', 'rceil', 'lfloor', 'rfloor', 'lang',
470 'rang', 'loz', 'spades', 'clubs', 'hearts', 'diams',
475 * Filters content and keeps only allowable HTML elements.
477 * This function makes sure that only the allowed HTML element names, attribute
478 * names and attribute values plus only sane HTML entities will occur in
479 * $string. You have to remove any slashes from PHP's magic quotes before you
480 * call this function.
482 * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
483 * 'irc', 'gopher', 'nntp', 'feed', 'telnet, 'mms', 'rtsp' and 'svn'. This
484 * covers all common link protocols, except for 'javascript' which should not
485 * be allowed for untrusted users.
489 * @param string $string Content to filter through kses
490 * @param array $allowed_html List of allowed HTML elements
491 * @param array $allowed_protocols Optional. Allowed protocol in links.
492 * @return string Filtered content with only allowed HTML elements
494 function wp_kses($string, $allowed_html, $allowed_protocols = array ()) {
495 $allowed_protocols = wp_parse_args( $allowed_protocols, apply_filters('kses_allowed_protocols', array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet', 'mms', 'rtsp', 'svn') ));
496 $string = wp_kses_no_null($string);
497 $string = wp_kses_js_entities($string);
498 $string = wp_kses_normalize_entities($string);
499 $allowed_html_fixed = wp_kses_array_lc($allowed_html);
500 $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook
501 return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols);
505 * You add any kses hooks here.
507 * There is currently only one kses WordPress hook and it is called here. All
508 * parameters are passed to the hooks and expected to recieve a string.
512 * @param string $string Content to filter through kses
513 * @param array $allowed_html List of allowed HTML elements
514 * @param array $allowed_protocols Allowed protocol in links
515 * @return string Filtered content through 'pre_kses' hook
517 function wp_kses_hook($string, $allowed_html, $allowed_protocols) {
518 $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols);
523 * This function returns kses' version number.
527 * @return string KSES Version Number
529 function wp_kses_version() {
534 * Searches for HTML tags, no matter how malformed.
536 * It also matches stray ">" characters.
540 * @param string $string Content to filter
541 * @param array $allowed_html Allowed HTML elements
542 * @param array $allowed_protocols Allowed protocols to keep
543 * @return string Content with fixed HTML tags
545 function wp_kses_split($string, $allowed_html, $allowed_protocols) {
546 global $pass_allowed_html, $pass_allowed_protocols;
547 $pass_allowed_html = $allowed_html;
548 $pass_allowed_protocols = $allowed_protocols;
549 return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%',
550 create_function('$match', 'global $pass_allowed_html, $pass_allowed_protocols; return wp_kses_split2($match[1], $pass_allowed_html, $pass_allowed_protocols);'), $string);
554 * Callback for wp_kses_split for fixing malformed HTML tags.
556 * This function does a lot of work. It rejects some very malformed things like
557 * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
558 * strip_tags()!). Otherwise it splits the tag into an element and an attribute
561 * After the tag is split into an element and an attribute list, it is run
562 * through another filter which will remove illegal attributes and once that is
563 * completed, will be returned.
567 * @uses wp_kses_attr()
569 * @param string $string Content to filter
570 * @param array $allowed_html Allowed HTML elements
571 * @param array $allowed_protocols Allowed protocols to keep
572 * @return string Fixed HTML element
574 function wp_kses_split2($string, $allowed_html, $allowed_protocols) {
575 $string = wp_kses_stripslashes($string);
577 if (substr($string, 0, 1) != '<')
579 # It matched a ">" character
581 if (preg_match('%^<!--(.*?)(-->)?$%', $string, $matches)) {
582 $string = str_replace(array('<!--', '-->'), '', $matches[1]);
583 while ( $string != $newstring = wp_kses($string, $allowed_html, $allowed_protocols) )
584 $string = $newstring;
587 // prevent multiple dashes in comments
588 $string = preg_replace('/--+/', '-', $string);
589 // prevent three dashes closing a comment
590 $string = preg_replace('/-$/', '', $string);
591 return "<!--{$string}-->";
593 # Allow HTML comments
595 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
597 # It's seriously malformed
599 $slash = trim($matches[1]);
601 $attrlist = $matches[3];
603 if (!@isset($allowed_html[strtolower($elem)]))
605 # They are using a not allowed HTML element
608 return "<$slash$elem>";
609 # No attributes are allowed for closing elements
611 return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols);
615 * Removes all attributes, if none are allowed for this element.
617 * If some are allowed it calls wp_kses_hair() to split them further, and then
618 * it builds up new HTML code from the data that kses_hair() returns. It also
619 * removes "<" and ">" characters, if there are any left. One more thing it does
620 * is to check if the tag has a closing XHTML slash, and if it does, it puts one
621 * in the returned code as well.
625 * @param string $element HTML element/tag
626 * @param string $attr HTML attributes from HTML element to closing HTML element tag
627 * @param array $allowed_html Allowed HTML elements
628 * @param array $allowed_protocols Allowed protocols to keep
629 * @return string Sanitized HTML element
631 function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) {
632 # Is there a closing XHTML slash at the end of the attributes?
635 if (preg_match('%\s*/\s*$%', $attr))
638 # Are any attributes allowed at all for this element?
640 if (@ count($allowed_html[strtolower($element)]) == 0)
641 return "<$element$xhtml_slash>";
645 $attrarr = wp_kses_hair($attr, $allowed_protocols);
647 # Go through $attrarr, and save the allowed attributes for this element
652 foreach ($attrarr as $arreach) {
653 if (!@ isset ($allowed_html[strtolower($element)][strtolower($arreach['name'])]))
654 continue; # the attribute is not allowed
656 $current = $allowed_html[strtolower($element)][strtolower($arreach['name'])];
658 continue; # the attribute is not allowed
660 if (!is_array($current))
661 $attr2 .= ' '.$arreach['whole'];
662 # there are no checks
665 # there are some checks
667 foreach ($current as $currkey => $currval)
668 if (!wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) {
673 if ( strtolower($arreach['name']) == 'style' ) {
674 $orig_value = $arreach['value'];
676 $value = safecss_filter_attr($orig_value);
681 $arreach['value'] = $value;
683 $arreach['whole'] = str_replace($orig_value, $value, $arreach['whole']);
687 $attr2 .= ' '.$arreach['whole']; # it passed them
688 } # if !is_array($current)
691 # Remove any "<" or ">" characters
693 $attr2 = preg_replace('/[<>]/', '', $attr2);
695 return "<$element$attr2$xhtml_slash>";
699 * Builds an attribute list from string containing attributes.
701 * This function does a lot of work. It parses an attribute list into an array
702 * with attribute data, and tries to do the right thing even if it gets weird
703 * input. It will add quotes around attribute values that don't have any quotes
704 * or apostrophes around them, to make it easier to produce HTML code that will
705 * conform to W3C's HTML specification. It will also remove bad URL protocols
706 * from attribute values. It also reduces duplicate attributes by using the
707 * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
711 * @param string $attr Attribute list from HTML element to closing HTML element tag
712 * @param array $allowed_protocols Allowed protocols to keep
713 * @return array List of attributes after parsing
715 function wp_kses_hair($attr, $allowed_protocols) {
719 $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
721 # Loop through the whole attribute list
723 while (strlen($attr) != 0) {
724 $working = 0; # Was the last operation successful?
727 case 0 : # attribute name, href for instance
729 if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
730 $attrname = $match[1];
731 $working = $mode = 1;
732 $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
737 case 1 : # equals sign or valueless ("selected")
739 if (preg_match('/^\s*=\s*/', $attr)) # equals sign
743 $attr = preg_replace('/^\s*=\s*/', '', $attr);
747 if (preg_match('/^\s+/', $attr)) # valueless
751 if(FALSE === array_key_exists($attrname, $attrarr)) {
752 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
754 $attr = preg_replace('/^\s+/', '', $attr);
759 case 2 : # attribute value, a URL after href= for instance
761 if (preg_match('%^"([^"]*)"(\s+|/?$)%', $attr, $match))
764 $thisval = $match[1];
765 if ( in_array(strtolower($attrname), $uris) )
766 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
768 if(FALSE === array_key_exists($attrname, $attrarr)) {
769 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
773 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
777 if (preg_match("%^'([^']*)'(\s+|/?$)%", $attr, $match))
780 $thisval = $match[1];
781 if ( in_array(strtolower($attrname), $uris) )
782 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
784 if(FALSE === array_key_exists($attrname, $attrarr)) {
785 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
789 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
793 if (preg_match("%^([^\s\"']+)(\s+|/?$)%", $attr, $match))
796 $thisval = $match[1];
797 if ( in_array(strtolower($attrname), $uris) )
798 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
800 if(FALSE === array_key_exists($attrname, $attrarr)) {
801 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
803 # We add quotes to conform to W3C's HTML spec.
806 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
812 if ($working == 0) # not well formed, remove and try again
814 $attr = wp_kses_html_error($attr);
819 if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr))
820 # special case, for when the attribute list ends with a valueless
821 # attribute like "selected"
822 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
828 * Performs different checks for attribute values.
830 * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
831 * and "valueless" with even more checks to come soon.
835 * @param string $value Attribute value
836 * @param string $vless Whether the value is valueless. Use 'y' or 'n'
837 * @param string $checkname What $checkvalue is checking for.
838 * @param mixed $checkvalue What constraint the value should pass
839 * @return bool Whether check passes
841 function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) {
844 switch (strtolower($checkname)) {
846 # The maxlen check makes sure that the attribute value has a length not
847 # greater than the given value. This can be used to avoid Buffer Overflows
848 # in WWW clients and various Internet servers.
850 if (strlen($value) > $checkvalue)
855 # The minlen check makes sure that the attribute value has a length not
856 # smaller than the given value.
858 if (strlen($value) < $checkvalue)
863 # The maxval check does two things: it checks that the attribute value is
864 # an integer from 0 and up, without an excessive amount of zeroes or
865 # whitespace (to avoid Buffer Overflows). It also checks that the attribute
866 # value is not greater than the given value.
867 # This check can be used to avoid Denial of Service attacks.
869 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
871 if ($value > $checkvalue)
876 # The minval check checks that the attribute value is a positive integer,
877 # and that it is not smaller than the given value.
879 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
881 if ($value < $checkvalue)
886 # The valueless check checks if the attribute has a value
887 # (like <a href="blah">) or not (<option selected>). If the given value
888 # is a "y" or a "Y", the attribute must not have a value.
889 # If the given value is an "n" or an "N", the attribute must have one.
891 if (strtolower($checkvalue) != $vless)
900 * Sanitize string from bad protocols.
902 * This function removes all non-allowed protocols from the beginning of
903 * $string. It ignores whitespace and the case of the letters, and it does
904 * understand HTML entities. It does its work in a while loop, so it won't be
905 * fooled by a string like "javascript:javascript:alert(57)".
909 * @param string $string Content to filter bad protocols from
910 * @param array $allowed_protocols Allowed protocols to keep
911 * @return string Filtered content
913 function wp_kses_bad_protocol($string, $allowed_protocols) {
914 $string = wp_kses_no_null($string);
915 $string2 = $string.'a';
917 while ($string != $string2) {
919 $string = wp_kses_bad_protocol_once($string, $allowed_protocols);
926 * Removes any NULL characters in $string.
930 * @param string $string
933 function wp_kses_no_null($string) {
934 $string = preg_replace('/\0+/', '', $string);
935 $string = preg_replace('/(\\\\0)+/', '', $string);
941 * Strips slashes from in front of quotes.
943 * This function changes the character sequence \" to just ". It leaves all
944 * other slashes alone. It's really weird, but the quoting from
945 * preg_replace(//e) seems to require this.
949 * @param string $string String to strip slashes
950 * @return string Fixed strings with quoted slashes
952 function wp_kses_stripslashes($string) {
953 return preg_replace('%\\\\"%', '"', $string);
957 * Goes through an array and changes the keys to all lower case.
961 * @param array $inarray Unfiltered array
962 * @return array Fixed array with all lowercase keys
964 function wp_kses_array_lc($inarray) {
965 $outarray = array ();
967 foreach ( (array) $inarray as $inkey => $inval) {
968 $outkey = strtolower($inkey);
969 $outarray[$outkey] = array ();
971 foreach ( (array) $inval as $inkey2 => $inval2) {
972 $outkey2 = strtolower($inkey2);
973 $outarray[$outkey][$outkey2] = $inval2;
981 * Removes the HTML JavaScript entities found in early versions of Netscape 4.
985 * @param string $string
988 function wp_kses_js_entities($string) {
989 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
993 * Handles parsing errors in wp_kses_hair().
995 * The general plan is to remove everything to and including some whitespace,
996 * but it deals with quotes and apostrophes as well.
1000 * @param string $string
1003 function wp_kses_html_error($string) {
1004 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
1008 * Sanitizes content from bad protocols and other characters.
1010 * This function searches for URL protocols at the beginning of $string, while
1011 * handling whitespace and HTML entities.
1015 * @param string $string Content to check for bad protocols
1016 * @param string $allowed_protocols Allowed protocols
1017 * @return string Sanitized content
1019 function wp_kses_bad_protocol_once($string, $allowed_protocols) {
1020 $string2 = preg_split( '/:|�*58;|�*3a;/i', $string, 2 );
1021 if ( isset($string2[1]) && ! preg_match('%/\?%', $string2[0]) )
1022 $string = wp_kses_bad_protocol_once2( $string2[0], $allowed_protocols ) . trim( $string2[1] );
1028 * Callback for wp_kses_bad_protocol_once() regular expression.
1030 * This function processes URL protocols, checks to see if they're in the
1031 * white-list or not, and returns different data depending on the answer.
1036 * @param string $string URI scheme to check against the whitelist
1037 * @param string $allowed_protocols Allowed protocols
1038 * @return string Sanitized content
1040 function wp_kses_bad_protocol_once2( $string, $allowed_protocols ) {
1041 $string2 = wp_kses_decode_entities($string);
1042 $string2 = preg_replace('/\s/', '', $string2);
1043 $string2 = wp_kses_no_null($string2);
1044 $string2 = strtolower($string2);
1047 foreach ( (array) $allowed_protocols as $one_protocol )
1048 if ( strtolower($one_protocol) == $string2 ) {
1060 * Converts and fixes HTML entities.
1062 * This function normalizes HTML entities. It will convert "AT&T" to the correct
1063 * "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on.
1067 * @param string $string Content to normalize entities
1068 * @return string Content with normalized entities
1070 function wp_kses_normalize_entities($string) {
1071 # Disarm all entities by converting & to &
1073 $string = str_replace('&', '&', $string);
1075 # Change back the allowed entities in our entity whitelist
1077 $string = preg_replace_callback('/&([A-Za-z]{2,8});/', 'wp_kses_named_entities', $string);
1078 $string = preg_replace_callback('/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string);
1079 $string = preg_replace_callback('/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string);
1085 * Callback for wp_kses_normalize_entities() regular expression.
1087 * This function only accepts valid named entity references, which are finite,
1088 * case-sensitive, and highly scrutinized by HTML and XML validators.
1092 * @param array $matches preg_replace_callback() matches array
1093 * @return string Correctly encoded entity
1095 function wp_kses_named_entities($matches) {
1096 global $allowedentitynames;
1098 if ( empty($matches[1]) )
1102 return ( ( ! in_array($i, $allowedentitynames) ) ? "&$i;" : "&$i;" );
1106 * Callback for wp_kses_normalize_entities() regular expression.
1108 * This function helps wp_kses_normalize_entities() to only accept 16 bit values
1109 * and nothing more for &#number; entities.
1114 * @param array $matches preg_replace_callback() matches array
1115 * @return string Correctly encoded entity
1117 function wp_kses_normalize_entities2($matches) {
1118 if ( empty($matches[1]) )
1122 if (valid_unicode($i)) {
1123 $i = str_pad(ltrim($i,'0'), 3, '0', STR_PAD_LEFT);
1133 * Callback for wp_kses_normalize_entities() for regular expression.
1135 * This function helps wp_kses_normalize_entities() to only accept valid Unicode
1136 * numeric entities in hex form.
1140 * @param array $matches preg_replace_callback() matches array
1141 * @return string Correctly encoded entity
1143 function wp_kses_normalize_entities3($matches) {
1144 if ( empty($matches[1]) )
1147 $hexchars = $matches[1];
1148 return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&#x$hexchars;" : '&#x'.ltrim($hexchars,'0').';' );
1152 * Helper function to determine if a Unicode value is valid.
1154 * @param int $i Unicode value
1155 * @return bool true if the value was a valid Unicode number
1157 function valid_unicode($i) {
1158 return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
1159 ($i >= 0x20 && $i <= 0xd7ff) ||
1160 ($i >= 0xe000 && $i <= 0xfffd) ||
1161 ($i >= 0x10000 && $i <= 0x10ffff) );
1165 * Convert all entities to their character counterparts.
1167 * This function decodes numeric HTML entities (A and A). It doesn't do
1168 * anything with other entities like ä, but we don't need them in the URL
1169 * protocol whitelisting system anyway.
1173 * @param string $string Content to change entities
1174 * @return string Content after decoded entities
1176 function wp_kses_decode_entities($string) {
1177 $string = preg_replace_callback('/&#([0-9]+);/', '_wp_kses_decode_entities_chr', $string);
1178 $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', '_wp_kses_decode_entities_chr_hexdec', $string);
1184 * Regex callback for wp_kses_decode_entities()
1186 * @param array $match preg match
1189 function _wp_kses_decode_entities_chr( $match ) {
1190 return chr( $match[1] );
1194 * Regex callback for wp_kses_decode_entities()
1196 * @param array $match preg match
1199 function _wp_kses_decode_entities_chr_hexdec( $match ) {
1200 return chr( hexdec( $match[1] ) );
1204 * Sanitize content with allowed HTML Kses rules.
1207 * @uses $allowedtags
1209 * @param string $data Content to filter, expected to be escaped with slashes
1210 * @return string Filtered content
1212 function wp_filter_kses($data) {
1213 global $allowedtags;
1214 return addslashes( wp_kses(stripslashes( $data ), $allowedtags) );
1218 * Sanitize content with allowed HTML Kses rules.
1221 * @uses $allowedtags
1223 * @param string $data Content to filter, expected to not be escaped
1224 * @return string Filtered content
1226 function wp_kses_data($data) {
1227 global $allowedtags;
1228 return wp_kses( $data , $allowedtags );
1232 * Sanitize content for allowed HTML tags for post content.
1234 * Post content refers to the page contents of the 'post' type and not $_POST
1238 * @uses $allowedposttags
1240 * @param string $data Post content to filter, expected to be escaped with slashes
1241 * @return string Filtered post content with allowed HTML tags and attributes intact.
1243 function wp_filter_post_kses($data) {
1244 global $allowedposttags;
1245 return addslashes ( wp_kses(stripslashes( $data ), $allowedposttags) );
1249 * Sanitize content for allowed HTML tags for post content.
1251 * Post content refers to the page contents of the 'post' type and not $_POST
1255 * @uses $allowedposttags
1257 * @param string $data Post content to filter
1258 * @return string Filtered post content with allowed HTML tags and attributes intact.
1260 function wp_kses_post($data) {
1261 global $allowedposttags;
1262 return wp_kses( $data , $allowedposttags );
1266 * Strips all of the HTML in the content.
1270 * @param string $data Content to strip all HTML from
1271 * @return string Filtered content without any HTML
1273 function wp_filter_nohtml_kses($data) {
1274 return addslashes ( wp_kses(stripslashes( $data ), array()) );
1278 * Adds all Kses input form content filters.
1280 * All hooks have default priority. The wp_filter_kses() function is added to
1281 * the 'pre_comment_content' and 'title_save_pre' hooks.
1283 * The wp_filter_post_kses() function is added to the 'content_save_pre',
1284 * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks.
1287 * @uses add_filter() See description for what functions are added to what hooks.
1289 function kses_init_filters() {
1290 // Normal filtering.
1291 add_filter('pre_comment_content', 'wp_filter_kses');
1292 add_filter('title_save_pre', 'wp_filter_kses');
1295 add_filter('content_save_pre', 'wp_filter_post_kses');
1296 add_filter('excerpt_save_pre', 'wp_filter_post_kses');
1297 add_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1301 * Removes all Kses input form content filters.
1303 * A quick procedural method to removing all of the filters that kses uses for
1304 * content in WordPress Loop.
1306 * Does not remove the kses_init() function from 'init' hook (priority is
1307 * default). Also does not remove kses_init() function from 'set_current_user'
1308 * hook (priority is also default).
1312 function kses_remove_filters() {
1313 // Normal filtering.
1314 remove_filter('pre_comment_content', 'wp_filter_kses');
1315 remove_filter('title_save_pre', 'wp_filter_kses');
1318 remove_filter('content_save_pre', 'wp_filter_post_kses');
1319 remove_filter('excerpt_save_pre', 'wp_filter_post_kses');
1320 remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1324 * Sets up most of the Kses filters for input form content.
1326 * If you remove the kses_init() function from 'init' hook and
1327 * 'set_current_user' (priority is default), then none of the Kses filter hooks
1330 * First removes all of the Kses filters in case the current user does not need
1331 * to have Kses filter the content. If the user does not have unfiltered html
1332 * capability, then Kses filters are added.
1334 * @uses kses_remove_filters() Removes the Kses filters
1335 * @uses kses_init_filters() Adds the Kses filters back if the user
1336 * does not have unfiltered HTML capability.
1339 function kses_init() {
1340 kses_remove_filters();
1342 if (current_user_can('unfiltered_html') == false)
1343 kses_init_filters();
1346 add_action('init', 'kses_init');
1347 add_action('set_current_user', 'kses_init');
1354 function safecss_filter_attr( $css, $deprecated = '' ) {
1355 if ( !empty( $deprecated ) )
1356 _deprecated_argument( __FUNCTION__, '2.8.1' ); // Never implemented
1358 $css = wp_kses_no_null($css);
1359 $css = str_replace(array("\n","\r","\t"), '', $css);
1361 if ( preg_match( '%[\\(&=}]|/\*%', $css ) ) // remove any inline css containing \ ( & } = or comments
1364 $css_array = explode( ';', trim( $css ) );
1365 $allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float',
1366 'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color',
1367 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',
1368 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color',
1369 'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top',
1370 'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side',
1371 'clear', 'cursor', 'direction', 'font', 'font-family', 'font-size', 'font-style',
1372 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin-bottom',
1373 'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom',
1374 'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align',
1377 if ( empty($allowed_attr) )
1381 foreach ( $css_array as $css_item ) {
1382 if ( $css_item == '' )
1384 $css_item = trim( $css_item );
1386 if ( strpos( $css_item, ':' ) === false ) {
1389 $parts = split( ':', $css_item );
1390 if ( in_array( trim( $parts[0] ), $allowed_attr ) )