3 * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes
4 * Copyright (C) 2002, 2003, 2005 Ulf Harnhammar
6 * This program is free software and open source software; you can redistribute
7 * it and/or modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the License,
9 * or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 * http://www.gnu.org/licenses/gpl.html
21 * [kses strips evil scripts!]
23 * Added wp_ prefix to avoid conflicts with existing kses users
26 * @copyright (C) 2002, 2003, 2005
27 * @author Ulf Harnhammar <http://advogato.org/person/metaur/>
35 * You can override this in a plugin.
37 * The wp_kses_allowed_html filter is more powerful and supplies context.
38 * CUSTOM_TAGS is not recommended and should be considered deprecated.
40 * @see wp_kses_allowed_html()
44 if ( ! defined( 'CUSTOM_TAGS' ) )
45 define( 'CUSTOM_TAGS', false );
47 if ( ! CUSTOM_TAGS ) {
49 * Kses global for default allowable HTML tags.
51 * Can be override by using CUSTOM_TAGS constant.
53 * @global array $allowedposttags
56 $allowedposttags = array(
89 'blockquote' => array(
138 'fieldset' => array(),
145 'figcaption' => array(
165 'accept-charset' => true,
287 'cellpadding' => true,
288 'cellspacing' => true,
373 * Kses allowed HTML elements.
375 * @global array $allowedtags
378 $allowedtags = array(
390 'blockquote' => array(
407 $allowedentitynames = array(
408 'nbsp', 'iexcl', 'cent', 'pound', 'curren', 'yen',
409 'brvbar', 'sect', 'uml', 'copy', 'ordf', 'laquo',
410 'not', 'shy', 'reg', 'macr', 'deg', 'plusmn',
411 'acute', 'micro', 'para', 'middot', 'cedil', 'ordm',
412 'raquo', 'iquest', 'Agrave', 'Aacute', 'Acirc', 'Atilde',
413 'Auml', 'Aring', 'AElig', 'Ccedil', 'Egrave', 'Eacute',
414 'Ecirc', 'Euml', 'Igrave', 'Iacute', 'Icirc', 'Iuml',
415 'ETH', 'Ntilde', 'Ograve', 'Oacute', 'Ocirc', 'Otilde',
416 'Ouml', 'times', 'Oslash', 'Ugrave', 'Uacute', 'Ucirc',
417 'Uuml', 'Yacute', 'THORN', 'szlig', 'agrave', 'aacute',
418 'acirc', 'atilde', 'auml', 'aring', 'aelig', 'ccedil',
419 'egrave', 'eacute', 'ecirc', 'euml', 'igrave', 'iacute',
420 'icirc', 'iuml', 'eth', 'ntilde', 'ograve', 'oacute',
421 'ocirc', 'otilde', 'ouml', 'divide', 'oslash', 'ugrave',
422 'uacute', 'ucirc', 'uuml', 'yacute', 'thorn', 'yuml',
423 'quot', 'amp', 'lt', 'gt', 'apos', 'OElig',
424 'oelig', 'Scaron', 'scaron', 'Yuml', 'circ', 'tilde',
425 'ensp', 'emsp', 'thinsp', 'zwnj', 'zwj', 'lrm',
426 'rlm', 'ndash', 'mdash', 'lsquo', 'rsquo', 'sbquo',
427 'ldquo', 'rdquo', 'bdquo', 'dagger', 'Dagger', 'permil',
428 'lsaquo', 'rsaquo', 'euro', 'fnof', 'Alpha', 'Beta',
429 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta',
430 'Iota', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Xi',
431 'Omicron', 'Pi', 'Rho', 'Sigma', 'Tau', 'Upsilon',
432 'Phi', 'Chi', 'Psi', 'Omega', 'alpha', 'beta',
433 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta',
434 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi',
435 'omicron', 'pi', 'rho', 'sigmaf', 'sigma', 'tau',
436 'upsilon', 'phi', 'chi', 'psi', 'omega', 'thetasym',
437 'upsih', 'piv', 'bull', 'hellip', 'prime', 'Prime',
438 'oline', 'frasl', 'weierp', 'image', 'real', 'trade',
439 'alefsym', 'larr', 'uarr', 'rarr', 'darr', 'harr',
440 'crarr', 'lArr', 'uArr', 'rArr', 'dArr', 'hArr',
441 'forall', 'part', 'exist', 'empty', 'nabla', 'isin',
442 'notin', 'ni', 'prod', 'sum', 'minus', 'lowast',
443 'radic', 'prop', 'infin', 'ang', 'and', 'or',
444 'cap', 'cup', 'int', 'sim', 'cong', 'asymp',
445 'ne', 'equiv', 'le', 'ge', 'sub', 'sup',
446 'nsub', 'sube', 'supe', 'oplus', 'otimes', 'perp',
447 'sdot', 'lceil', 'rceil', 'lfloor', 'rfloor', 'lang',
448 'rang', 'loz', 'spades', 'clubs', 'hearts', 'diams',
449 'sup1', 'sup2', 'sup3', 'frac14', 'frac12', 'frac34',
453 $allowedposttags = array_map( '_wp_add_global_attributes', $allowedposttags );
455 $allowedtags = wp_kses_array_lc( $allowedtags );
456 $allowedposttags = wp_kses_array_lc( $allowedposttags );
460 * Filters content and keeps only allowable HTML elements.
462 * This function makes sure that only the allowed HTML element names, attribute
463 * names and attribute values plus only sane HTML entities will occur in
464 * $string. You have to remove any slashes from PHP's magic quotes before you
465 * call this function.
467 * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
468 * 'irc', 'gopher', 'nntp', 'feed', 'telnet, 'mms', 'rtsp' and 'svn'. This
469 * covers all common link protocols, except for 'javascript' which should not
470 * be allowed for untrusted users.
474 * @param string $string Content to filter through kses
475 * @param array $allowed_html List of allowed HTML elements
476 * @param array $allowed_protocols Optional. Allowed protocol in links.
477 * @return string Filtered content with only allowed HTML elements
479 function wp_kses( $string, $allowed_html, $allowed_protocols = array() ) {
480 if ( empty( $allowed_protocols ) )
481 $allowed_protocols = wp_allowed_protocols();
482 $string = wp_kses_no_null($string);
483 $string = wp_kses_js_entities($string);
484 $string = wp_kses_normalize_entities($string);
485 $string = wp_kses_hook($string, $allowed_html, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook
486 return wp_kses_split($string, $allowed_html, $allowed_protocols);
490 * Return a list of allowed tags and attributes for a given context.
494 * @param string $context The context for which to retrieve tags. Allowed values are
495 * post | strip | data | entities or the name of a field filter such as pre_user_description.
496 * @return array List of allowed tags and their allowed attributes.
498 function wp_kses_allowed_html( $context = '' ) {
499 global $allowedposttags, $allowedtags, $allowedentitynames;
501 if ( is_array( $context ) )
502 return apply_filters( 'wp_kses_allowed_html', $context, 'explicit' );
504 switch ( $context ) {
506 return apply_filters( 'wp_kses_allowed_html', $allowedposttags, $context );
508 case 'user_description':
509 case 'pre_user_description':
510 $tags = $allowedtags;
511 $tags['a']['rel'] = true;
512 return apply_filters( 'wp_kses_allowed_html', $tags, $context );
515 return apply_filters( 'wp_kses_allowed_html', array(), $context );
518 return apply_filters( 'wp_kses_allowed_html', $allowedentitynames, $context);
522 return apply_filters( 'wp_kses_allowed_html', $allowedtags, $context );
527 * You add any kses hooks here.
529 * There is currently only one kses WordPress hook and it is called here. All
530 * parameters are passed to the hooks and expected to receive a string.
534 * @param string $string Content to filter through kses
535 * @param array $allowed_html List of allowed HTML elements
536 * @param array $allowed_protocols Allowed protocol in links
537 * @return string Filtered content through 'pre_kses' hook
539 function wp_kses_hook( $string, $allowed_html, $allowed_protocols ) {
540 $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols);
545 * This function returns kses' version number.
549 * @return string KSES Version Number
551 function wp_kses_version() {
556 * Searches for HTML tags, no matter how malformed.
558 * It also matches stray ">" characters.
562 * @param string $string Content to filter
563 * @param array $allowed_html Allowed HTML elements
564 * @param array $allowed_protocols Allowed protocols to keep
565 * @return string Content with fixed HTML tags
567 function wp_kses_split( $string, $allowed_html, $allowed_protocols ) {
568 global $pass_allowed_html, $pass_allowed_protocols;
569 $pass_allowed_html = $allowed_html;
570 $pass_allowed_protocols = $allowed_protocols;
571 return preg_replace_callback( '%(<!--.*?(-->|$))|(<[^>]*(>|$)|>)%', '_wp_kses_split_callback', $string );
575 * Callback for wp_kses_split.
580 function _wp_kses_split_callback( $match ) {
581 global $pass_allowed_html, $pass_allowed_protocols;
582 return wp_kses_split2( $match[0], $pass_allowed_html, $pass_allowed_protocols );
586 * Callback for wp_kses_split for fixing malformed HTML tags.
588 * This function does a lot of work. It rejects some very malformed things like
589 * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
590 * strip_tags()!). Otherwise it splits the tag into an element and an attribute
593 * After the tag is split into an element and an attribute list, it is run
594 * through another filter which will remove illegal attributes and once that is
595 * completed, will be returned.
599 * @uses wp_kses_attr()
601 * @param string $string Content to filter
602 * @param array $allowed_html Allowed HTML elements
603 * @param array $allowed_protocols Allowed protocols to keep
604 * @return string Fixed HTML element
606 function wp_kses_split2($string, $allowed_html, $allowed_protocols) {
607 $string = wp_kses_stripslashes($string);
609 if (substr($string, 0, 1) != '<')
611 # It matched a ">" character
613 if ( '<!--' == substr( $string, 0, 4 ) ) {
614 $string = str_replace( array('<!--', '-->'), '', $string );
615 while ( $string != ($newstring = wp_kses($string, $allowed_html, $allowed_protocols)) )
616 $string = $newstring;
619 // prevent multiple dashes in comments
620 $string = preg_replace('/--+/', '-', $string);
621 // prevent three dashes closing a comment
622 $string = preg_replace('/-$/', '', $string);
623 return "<!--{$string}-->";
625 # Allow HTML comments
627 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
629 # It's seriously malformed
631 $slash = trim($matches[1]);
633 $attrlist = $matches[3];
635 if ( ! is_array( $allowed_html ) )
636 $allowed_html = wp_kses_allowed_html( $allowed_html );
638 if ( ! isset($allowed_html[strtolower($elem)]) )
640 # They are using a not allowed HTML element
644 # No attributes are allowed for closing elements
646 return wp_kses_attr( $elem, $attrlist, $allowed_html, $allowed_protocols );
650 * Removes all attributes, if none are allowed for this element.
652 * If some are allowed it calls wp_kses_hair() to split them further, and then
653 * it builds up new HTML code from the data that kses_hair() returns. It also
654 * removes "<" and ">" characters, if there are any left. One more thing it does
655 * is to check if the tag has a closing XHTML slash, and if it does, it puts one
656 * in the returned code as well.
660 * @param string $element HTML element/tag
661 * @param string $attr HTML attributes from HTML element to closing HTML element tag
662 * @param array $allowed_html Allowed HTML elements
663 * @param array $allowed_protocols Allowed protocols to keep
664 * @return string Sanitized HTML element
666 function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) {
667 # Is there a closing XHTML slash at the end of the attributes?
669 if ( ! is_array( $allowed_html ) )
670 $allowed_html = wp_kses_allowed_html( $allowed_html );
673 if (preg_match('%\s*/\s*$%', $attr))
676 # Are any attributes allowed at all for this element?
677 if ( ! isset($allowed_html[strtolower($element)]) || count($allowed_html[strtolower($element)]) == 0 )
678 return "<$element$xhtml_slash>";
681 $attrarr = wp_kses_hair($attr, $allowed_protocols);
683 # Go through $attrarr, and save the allowed attributes for this element
687 $allowed_attr = $allowed_html[strtolower($element)];
688 foreach ($attrarr as $arreach) {
689 if ( ! isset( $allowed_attr[strtolower($arreach['name'])] ) )
690 continue; # the attribute is not allowed
692 $current = $allowed_attr[strtolower($arreach['name'])];
693 if ( $current == '' )
694 continue; # the attribute is not allowed
696 if ( strtolower( $arreach['name'] ) == 'style' ) {
697 $orig_value = $arreach['value'];
698 $value = safecss_filter_attr( $orig_value );
700 if ( empty( $value ) )
703 $arreach['value'] = $value;
704 $arreach['whole'] = str_replace( $orig_value, $value, $arreach['whole'] );
707 if ( ! is_array($current) ) {
708 $attr2 .= ' '.$arreach['whole'];
709 # there are no checks
712 # there are some checks
714 foreach ($current as $currkey => $currval) {
715 if ( ! wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval) ) {
722 $attr2 .= ' '.$arreach['whole']; # it passed them
723 } # if !is_array($current)
726 # Remove any "<" or ">" characters
727 $attr2 = preg_replace('/[<>]/', '', $attr2);
729 return "<$element$attr2$xhtml_slash>";
733 * Builds an attribute list from string containing attributes.
735 * This function does a lot of work. It parses an attribute list into an array
736 * with attribute data, and tries to do the right thing even if it gets weird
737 * input. It will add quotes around attribute values that don't have any quotes
738 * or apostrophes around them, to make it easier to produce HTML code that will
739 * conform to W3C's HTML specification. It will also remove bad URL protocols
740 * from attribute values. It also reduces duplicate attributes by using the
741 * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
745 * @param string $attr Attribute list from HTML element to closing HTML element tag
746 * @param array $allowed_protocols Allowed protocols to keep
747 * @return array List of attributes after parsing
749 function wp_kses_hair($attr, $allowed_protocols) {
753 $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
755 # Loop through the whole attribute list
757 while (strlen($attr) != 0) {
758 $working = 0; # Was the last operation successful?
761 case 0 : # attribute name, href for instance
763 if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
764 $attrname = $match[1];
765 $working = $mode = 1;
766 $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
771 case 1 : # equals sign or valueless ("selected")
773 if (preg_match('/^\s*=\s*/', $attr)) # equals sign
777 $attr = preg_replace('/^\s*=\s*/', '', $attr);
781 if (preg_match('/^\s+/', $attr)) # valueless
785 if(false === array_key_exists($attrname, $attrarr)) {
786 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
788 $attr = preg_replace('/^\s+/', '', $attr);
793 case 2 : # attribute value, a URL after href= for instance
795 if (preg_match('%^"([^"]*)"(\s+|/?$)%', $attr, $match))
798 $thisval = $match[1];
799 if ( in_array(strtolower($attrname), $uris) )
800 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
802 if(false === array_key_exists($attrname, $attrarr)) {
803 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
807 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
811 if (preg_match("%^'([^']*)'(\s+|/?$)%", $attr, $match))
814 $thisval = $match[1];
815 if ( in_array(strtolower($attrname), $uris) )
816 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
818 if(false === array_key_exists($attrname, $attrarr)) {
819 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
823 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
827 if (preg_match("%^([^\s\"']+)(\s+|/?$)%", $attr, $match))
830 $thisval = $match[1];
831 if ( in_array(strtolower($attrname), $uris) )
832 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
834 if(false === array_key_exists($attrname, $attrarr)) {
835 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
837 # We add quotes to conform to W3C's HTML spec.
840 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
846 if ($working == 0) # not well formed, remove and try again
848 $attr = wp_kses_html_error($attr);
853 if ($mode == 1 && false === array_key_exists($attrname, $attrarr))
854 # special case, for when the attribute list ends with a valueless
855 # attribute like "selected"
856 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
862 * Performs different checks for attribute values.
864 * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
869 * @param string $value Attribute value
870 * @param string $vless Whether the value is valueless. Use 'y' or 'n'
871 * @param string $checkname What $checkvalue is checking for.
872 * @param mixed $checkvalue What constraint the value should pass
873 * @return bool Whether check passes
875 function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) {
878 switch (strtolower($checkname)) {
880 # The maxlen check makes sure that the attribute value has a length not
881 # greater than the given value. This can be used to avoid Buffer Overflows
882 # in WWW clients and various Internet servers.
884 if (strlen($value) > $checkvalue)
889 # The minlen check makes sure that the attribute value has a length not
890 # smaller than the given value.
892 if (strlen($value) < $checkvalue)
897 # The maxval check does two things: it checks that the attribute value is
898 # an integer from 0 and up, without an excessive amount of zeroes or
899 # whitespace (to avoid Buffer Overflows). It also checks that the attribute
900 # value is not greater than the given value.
901 # This check can be used to avoid Denial of Service attacks.
903 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
905 if ($value > $checkvalue)
910 # The minval check makes sure that the attribute value is a positive integer,
911 # and that it is not smaller than the given value.
913 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
915 if ($value < $checkvalue)
920 # The valueless check makes sure if the attribute has a value
921 # (like <a href="blah">) or not (<option selected>). If the given value
922 # is a "y" or a "Y", the attribute must not have a value.
923 # If the given value is an "n" or an "N", the attribute must have one.
925 if (strtolower($checkvalue) != $vless)
934 * Sanitize string from bad protocols.
936 * This function removes all non-allowed protocols from the beginning of
937 * $string. It ignores whitespace and the case of the letters, and it does
938 * understand HTML entities. It does its work in a while loop, so it won't be
939 * fooled by a string like "javascript:javascript:alert(57)".
943 * @param string $string Content to filter bad protocols from
944 * @param array $allowed_protocols Allowed protocols to keep
945 * @return string Filtered content
947 function wp_kses_bad_protocol($string, $allowed_protocols) {
948 $string = wp_kses_no_null($string);
952 $original_string = $string;
953 $string = wp_kses_bad_protocol_once($string, $allowed_protocols);
954 } while ( $original_string != $string && ++$iterations < 6 );
956 if ( $original_string != $string )
963 * Removes any null characters in $string.
967 * @param string $string
970 function wp_kses_no_null($string) {
971 $string = preg_replace('/\0+/', '', $string);
972 $string = preg_replace('/(\\\\0)+/', '', $string);
978 * Strips slashes from in front of quotes.
980 * This function changes the character sequence \" to just ". It leaves all
981 * other slashes alone. It's really weird, but the quoting from
982 * preg_replace(//e) seems to require this.
986 * @param string $string String to strip slashes
987 * @return string Fixed string with quoted slashes
989 function wp_kses_stripslashes($string) {
990 return preg_replace('%\\\\"%', '"', $string);
994 * Goes through an array and changes the keys to all lower case.
998 * @param array $inarray Unfiltered array
999 * @return array Fixed array with all lowercase keys
1001 function wp_kses_array_lc($inarray) {
1002 $outarray = array ();
1004 foreach ( (array) $inarray as $inkey => $inval) {
1005 $outkey = strtolower($inkey);
1006 $outarray[$outkey] = array ();
1008 foreach ( (array) $inval as $inkey2 => $inval2) {
1009 $outkey2 = strtolower($inkey2);
1010 $outarray[$outkey][$outkey2] = $inval2;
1012 } # foreach $inarray
1018 * Removes the HTML JavaScript entities found in early versions of Netscape 4.
1022 * @param string $string
1025 function wp_kses_js_entities($string) {
1026 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
1030 * Handles parsing errors in wp_kses_hair().
1032 * The general plan is to remove everything to and including some whitespace,
1033 * but it deals with quotes and apostrophes as well.
1037 * @param string $string
1040 function wp_kses_html_error($string) {
1041 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
1045 * Sanitizes content from bad protocols and other characters.
1047 * This function searches for URL protocols at the beginning of $string, while
1048 * handling whitespace and HTML entities.
1052 * @param string $string Content to check for bad protocols
1053 * @param string $allowed_protocols Allowed protocols
1054 * @return string Sanitized content
1056 function wp_kses_bad_protocol_once($string, $allowed_protocols, $count = 1 ) {
1057 $string2 = preg_split( '/:|�*58;|�*3a;/i', $string, 2 );
1058 if ( isset($string2[1]) && ! preg_match('%/\?%', $string2[0]) ) {
1059 $string = trim( $string2[1] );
1060 $protocol = wp_kses_bad_protocol_once2( $string2[0], $allowed_protocols );
1061 if ( 'feed:' == $protocol ) {
1064 $string = wp_kses_bad_protocol_once( $string, $allowed_protocols, ++$count );
1065 if ( empty( $string ) )
1068 $string = $protocol . $string;
1075 * Callback for wp_kses_bad_protocol_once() regular expression.
1077 * This function processes URL protocols, checks to see if they're in the
1078 * whitelist or not, and returns different data depending on the answer.
1083 * @param string $string URI scheme to check against the whitelist
1084 * @param string $allowed_protocols Allowed protocols
1085 * @return string Sanitized content
1087 function wp_kses_bad_protocol_once2( $string, $allowed_protocols ) {
1088 $string2 = wp_kses_decode_entities($string);
1089 $string2 = preg_replace('/\s/', '', $string2);
1090 $string2 = wp_kses_no_null($string2);
1091 $string2 = strtolower($string2);
1094 foreach ( (array) $allowed_protocols as $one_protocol )
1095 if ( strtolower($one_protocol) == $string2 ) {
1107 * Converts and fixes HTML entities.
1109 * This function normalizes HTML entities. It will convert "AT&T" to the correct
1110 * "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on.
1114 * @param string $string Content to normalize entities
1115 * @return string Content with normalized entities
1117 function wp_kses_normalize_entities($string) {
1118 # Disarm all entities by converting & to &
1120 $string = str_replace('&', '&', $string);
1122 # Change back the allowed entities in our entity whitelist
1124 $string = preg_replace_callback('/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string);
1125 $string = preg_replace_callback('/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string);
1126 $string = preg_replace_callback('/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string);
1132 * Callback for wp_kses_normalize_entities() regular expression.
1134 * This function only accepts valid named entity references, which are finite,
1135 * case-sensitive, and highly scrutinized by HTML and XML validators.
1139 * @param array $matches preg_replace_callback() matches array
1140 * @return string Correctly encoded entity
1142 function wp_kses_named_entities($matches) {
1143 global $allowedentitynames;
1145 if ( empty($matches[1]) )
1149 return ( ( ! in_array($i, $allowedentitynames) ) ? "&$i;" : "&$i;" );
1153 * Callback for wp_kses_normalize_entities() regular expression.
1155 * This function helps wp_kses_normalize_entities() to only accept 16-bit values
1156 * and nothing more for &#number; entities.
1161 * @param array $matches preg_replace_callback() matches array
1162 * @return string Correctly encoded entity
1164 function wp_kses_normalize_entities2($matches) {
1165 if ( empty($matches[1]) )
1169 if (valid_unicode($i)) {
1170 $i = str_pad(ltrim($i,'0'), 3, '0', STR_PAD_LEFT);
1180 * Callback for wp_kses_normalize_entities() for regular expression.
1182 * This function helps wp_kses_normalize_entities() to only accept valid Unicode
1183 * numeric entities in hex form.
1187 * @param array $matches preg_replace_callback() matches array
1188 * @return string Correctly encoded entity
1190 function wp_kses_normalize_entities3($matches) {
1191 if ( empty($matches[1]) )
1194 $hexchars = $matches[1];
1195 return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&#x$hexchars;" : '&#x'.ltrim($hexchars,'0').';' );
1199 * Helper function to determine if a Unicode value is valid.
1201 * @param int $i Unicode value
1202 * @return bool True if the value was a valid Unicode number
1204 function valid_unicode($i) {
1205 return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
1206 ($i >= 0x20 && $i <= 0xd7ff) ||
1207 ($i >= 0xe000 && $i <= 0xfffd) ||
1208 ($i >= 0x10000 && $i <= 0x10ffff) );
1212 * Convert all entities to their character counterparts.
1214 * This function decodes numeric HTML entities (A and A). It doesn't do
1215 * anything with other entities like ä, but we don't need them in the URL
1216 * protocol whitelisting system anyway.
1220 * @param string $string Content to change entities
1221 * @return string Content after decoded entities
1223 function wp_kses_decode_entities($string) {
1224 $string = preg_replace_callback('/&#([0-9]+);/', '_wp_kses_decode_entities_chr', $string);
1225 $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', '_wp_kses_decode_entities_chr_hexdec', $string);
1231 * Regex callback for wp_kses_decode_entities()
1233 * @param array $match preg match
1236 function _wp_kses_decode_entities_chr( $match ) {
1237 return chr( $match[1] );
1241 * Regex callback for wp_kses_decode_entities()
1243 * @param array $match preg match
1246 function _wp_kses_decode_entities_chr_hexdec( $match ) {
1247 return chr( hexdec( $match[1] ) );
1251 * Sanitize content with allowed HTML Kses rules.
1254 * @uses $allowedtags
1256 * @param string $data Content to filter, expected to be escaped with slashes
1257 * @return string Filtered content
1259 function wp_filter_kses( $data ) {
1260 return addslashes( wp_kses( stripslashes( $data ), current_filter() ) );
1264 * Sanitize content with allowed HTML Kses rules.
1267 * @uses $allowedtags
1269 * @param string $data Content to filter, expected to not be escaped
1270 * @return string Filtered content
1272 function wp_kses_data( $data ) {
1273 return wp_kses( $data , current_filter() );
1277 * Sanitize content for allowed HTML tags for post content.
1279 * Post content refers to the page contents of the 'post' type and not $_POST
1284 * @param string $data Post content to filter, expected to be escaped with slashes
1285 * @return string Filtered post content with allowed HTML tags and attributes intact.
1287 function wp_filter_post_kses($data) {
1288 return addslashes ( wp_kses( stripslashes( $data ), 'post' ) );
1292 * Sanitize content for allowed HTML tags for post content.
1294 * Post content refers to the page contents of the 'post' type and not $_POST
1299 * @param string $data Post content to filter
1300 * @return string Filtered post content with allowed HTML tags and attributes intact.
1302 function wp_kses_post($data) {
1303 return wp_kses( $data , 'post' );
1307 * Strips all of the HTML in the content.
1311 * @param string $data Content to strip all HTML from
1312 * @return string Filtered content without any HTML
1314 function wp_filter_nohtml_kses( $data ) {
1315 return addslashes ( wp_kses( stripslashes( $data ), 'strip' ) );
1319 * Adds all Kses input form content filters.
1321 * All hooks have default priority. The wp_filter_kses() function is added to
1322 * the 'pre_comment_content' and 'title_save_pre' hooks.
1324 * The wp_filter_post_kses() function is added to the 'content_save_pre',
1325 * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks.
1328 * @uses add_filter() See description for what functions are added to what hooks.
1330 function kses_init_filters() {
1332 add_filter('title_save_pre', 'wp_filter_kses');
1334 // Comment filtering
1335 if ( current_user_can( 'unfiltered_html' ) )
1336 add_filter( 'pre_comment_content', 'wp_filter_post_kses' );
1338 add_filter( 'pre_comment_content', 'wp_filter_kses' );
1341 add_filter('content_save_pre', 'wp_filter_post_kses');
1342 add_filter('excerpt_save_pre', 'wp_filter_post_kses');
1343 add_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1347 * Removes all Kses input form content filters.
1349 * A quick procedural method to removing all of the filters that kses uses for
1350 * content in WordPress Loop.
1352 * Does not remove the kses_init() function from 'init' hook (priority is
1353 * default). Also does not remove kses_init() function from 'set_current_user'
1354 * hook (priority is also default).
1358 function kses_remove_filters() {
1360 remove_filter('title_save_pre', 'wp_filter_kses');
1362 // Comment filtering
1363 remove_filter( 'pre_comment_content', 'wp_filter_post_kses' );
1364 remove_filter( 'pre_comment_content', 'wp_filter_kses' );
1367 remove_filter('content_save_pre', 'wp_filter_post_kses');
1368 remove_filter('excerpt_save_pre', 'wp_filter_post_kses');
1369 remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1373 * Sets up most of the Kses filters for input form content.
1375 * If you remove the kses_init() function from 'init' hook and
1376 * 'set_current_user' (priority is default), then none of the Kses filter hooks
1379 * First removes all of the Kses filters in case the current user does not need
1380 * to have Kses filter the content. If the user does not have unfiltered_html
1381 * capability, then Kses filters are added.
1383 * @uses kses_remove_filters() Removes the Kses filters
1384 * @uses kses_init_filters() Adds the Kses filters back if the user
1385 * does not have unfiltered HTML capability.
1388 function kses_init() {
1389 kses_remove_filters();
1391 if (current_user_can('unfiltered_html') == false)
1392 kses_init_filters();
1395 add_action('init', 'kses_init');
1396 add_action('set_current_user', 'kses_init');
1403 function safecss_filter_attr( $css, $deprecated = '' ) {
1404 if ( !empty( $deprecated ) )
1405 _deprecated_argument( __FUNCTION__, '2.8.1' ); // Never implemented
1407 $css = wp_kses_no_null($css);
1408 $css = str_replace(array("\n","\r","\t"), '', $css);
1410 if ( preg_match( '%[\\(&=}]|/\*%', $css ) ) // remove any inline css containing \ ( & } = or comments
1413 $css_array = explode( ';', trim( $css ) );
1414 $allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float',
1415 'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color',
1416 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',
1417 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color',
1418 'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top',
1419 'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side',
1420 'clear', 'cursor', 'direction', 'font', 'font-family', 'font-size', 'font-style',
1421 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin-bottom',
1422 'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom',
1423 'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align',
1426 if ( empty($allowed_attr) )
1430 foreach ( $css_array as $css_item ) {
1431 if ( $css_item == '' )
1433 $css_item = trim( $css_item );
1435 if ( strpos( $css_item, ':' ) === false ) {
1438 $parts = explode( ':', $css_item );
1439 if ( in_array( trim( $parts[0] ), $allowed_attr ) )
1453 * Helper function to add global attributes to a tag in the allowed html list.
1458 * @param array $value An array of attributes.
1459 * @return array The array of attributes with global attributes added.
1461 function _wp_add_global_attributes( $value ) {
1462 $global_attributes = array(
1469 if ( true === $value )
1472 if ( is_array( $value ) )
1473 return array_merge( $value, $global_attributes );