3 * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes
4 * Copyright (C) 2002, 2003, 2005 Ulf Harnhammar
6 * This program is free software and open source software; you can redistribute
7 * it and/or modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the License,
9 * or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 * http://www.gnu.org/licenses/gpl.html
21 * [kses strips evil scripts!]
23 * Added wp_ prefix to avoid conflicts with existing kses users
26 * @copyright (C) 2002, 2003, 2005
27 * @author Ulf Harnhammar <http://advogato.org/person/metaur/>
35 * You can override this in a plugin.
37 * The wp_kses_allowed_html filter is more powerful and supplies context.
38 * CUSTOM_TAGS is not recommended and should be considered deprecated.
40 * @see wp_kses_allowed_html()
44 if ( ! defined( 'CUSTOM_TAGS' ) )
45 define( 'CUSTOM_TAGS', false );
47 // Ensure that these variables are added to the global namespace
48 // (e.g. if using namespaces / autoload in the current PHP environment).
49 global $allowedposttags, $allowedtags, $allowedentitynames;
51 if ( ! CUSTOM_TAGS ) {
53 * Kses global for default allowable HTML tags.
55 * Can be override by using CUSTOM_TAGS constant.
57 * @global array $allowedposttags
60 $allowedposttags = array(
101 'blockquote' => array(
159 'fieldset' => array(),
166 'figcaption' => array(
186 'accept-charset' => true,
310 'cellpadding' => true,
311 'cellspacing' => true,
414 * Kses allowed HTML elements.
416 * @global array $allowedtags
419 $allowedtags = array(
431 'blockquote' => array(
448 $allowedentitynames = array(
449 'nbsp', 'iexcl', 'cent', 'pound', 'curren', 'yen',
450 'brvbar', 'sect', 'uml', 'copy', 'ordf', 'laquo',
451 'not', 'shy', 'reg', 'macr', 'deg', 'plusmn',
452 'acute', 'micro', 'para', 'middot', 'cedil', 'ordm',
453 'raquo', 'iquest', 'Agrave', 'Aacute', 'Acirc', 'Atilde',
454 'Auml', 'Aring', 'AElig', 'Ccedil', 'Egrave', 'Eacute',
455 'Ecirc', 'Euml', 'Igrave', 'Iacute', 'Icirc', 'Iuml',
456 'ETH', 'Ntilde', 'Ograve', 'Oacute', 'Ocirc', 'Otilde',
457 'Ouml', 'times', 'Oslash', 'Ugrave', 'Uacute', 'Ucirc',
458 'Uuml', 'Yacute', 'THORN', 'szlig', 'agrave', 'aacute',
459 'acirc', 'atilde', 'auml', 'aring', 'aelig', 'ccedil',
460 'egrave', 'eacute', 'ecirc', 'euml', 'igrave', 'iacute',
461 'icirc', 'iuml', 'eth', 'ntilde', 'ograve', 'oacute',
462 'ocirc', 'otilde', 'ouml', 'divide', 'oslash', 'ugrave',
463 'uacute', 'ucirc', 'uuml', 'yacute', 'thorn', 'yuml',
464 'quot', 'amp', 'lt', 'gt', 'apos', 'OElig',
465 'oelig', 'Scaron', 'scaron', 'Yuml', 'circ', 'tilde',
466 'ensp', 'emsp', 'thinsp', 'zwnj', 'zwj', 'lrm',
467 'rlm', 'ndash', 'mdash', 'lsquo', 'rsquo', 'sbquo',
468 'ldquo', 'rdquo', 'bdquo', 'dagger', 'Dagger', 'permil',
469 'lsaquo', 'rsaquo', 'euro', 'fnof', 'Alpha', 'Beta',
470 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta',
471 'Iota', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Xi',
472 'Omicron', 'Pi', 'Rho', 'Sigma', 'Tau', 'Upsilon',
473 'Phi', 'Chi', 'Psi', 'Omega', 'alpha', 'beta',
474 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta',
475 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi',
476 'omicron', 'pi', 'rho', 'sigmaf', 'sigma', 'tau',
477 'upsilon', 'phi', 'chi', 'psi', 'omega', 'thetasym',
478 'upsih', 'piv', 'bull', 'hellip', 'prime', 'Prime',
479 'oline', 'frasl', 'weierp', 'image', 'real', 'trade',
480 'alefsym', 'larr', 'uarr', 'rarr', 'darr', 'harr',
481 'crarr', 'lArr', 'uArr', 'rArr', 'dArr', 'hArr',
482 'forall', 'part', 'exist', 'empty', 'nabla', 'isin',
483 'notin', 'ni', 'prod', 'sum', 'minus', 'lowast',
484 'radic', 'prop', 'infin', 'ang', 'and', 'or',
485 'cap', 'cup', 'int', 'sim', 'cong', 'asymp',
486 'ne', 'equiv', 'le', 'ge', 'sub', 'sup',
487 'nsub', 'sube', 'supe', 'oplus', 'otimes', 'perp',
488 'sdot', 'lceil', 'rceil', 'lfloor', 'rfloor', 'lang',
489 'rang', 'loz', 'spades', 'clubs', 'hearts', 'diams',
490 'sup1', 'sup2', 'sup3', 'frac14', 'frac12', 'frac34',
494 $allowedposttags = array_map( '_wp_add_global_attributes', $allowedposttags );
496 $allowedtags = wp_kses_array_lc( $allowedtags );
497 $allowedposttags = wp_kses_array_lc( $allowedposttags );
501 * Filters content and keeps only allowable HTML elements.
503 * This function makes sure that only the allowed HTML element names, attribute
504 * names and attribute values plus only sane HTML entities will occur in
505 * $string. You have to remove any slashes from PHP's magic quotes before you
506 * call this function.
508 * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
509 * 'irc', 'gopher', 'nntp', 'feed', 'telnet, 'mms', 'rtsp' and 'svn'. This
510 * covers all common link protocols, except for 'javascript' which should not
511 * be allowed for untrusted users.
515 * @param string $string Content to filter through kses
516 * @param array $allowed_html List of allowed HTML elements
517 * @param array $allowed_protocols Optional. Allowed protocol in links.
518 * @return string Filtered content with only allowed HTML elements
520 function wp_kses( $string, $allowed_html, $allowed_protocols = array() ) {
521 if ( empty( $allowed_protocols ) )
522 $allowed_protocols = wp_allowed_protocols();
523 $string = wp_kses_no_null($string);
524 $string = wp_kses_js_entities($string);
525 $string = wp_kses_normalize_entities($string);
526 $string = wp_kses_hook($string, $allowed_html, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook
527 return wp_kses_split($string, $allowed_html, $allowed_protocols);
531 * Return a list of allowed tags and attributes for a given context.
535 * @param string $context The context for which to retrieve tags. Allowed values are
536 * post | strip | data | entities or the name of a field filter such as pre_user_description.
537 * @return array List of allowed tags and their allowed attributes.
539 function wp_kses_allowed_html( $context = '' ) {
540 global $allowedposttags, $allowedtags, $allowedentitynames;
542 if ( is_array( $context ) ) {
544 * Filter HTML elements allowed for a given context.
548 * @param string $tags Allowed tags, attributes, and/or entities.
549 * @param string $context Context to judge allowed tags by. Allowed values are 'post',
550 * 'data', 'strip', 'entities', 'explicit', or the name of a filter.
552 return apply_filters( 'wp_kses_allowed_html', $context, 'explicit' );
555 switch ( $context ) {
557 /** This filter is documented in wp-includes/kses.php */
558 return apply_filters( 'wp_kses_allowed_html', $allowedposttags, $context );
560 case 'user_description':
561 case 'pre_user_description':
562 $tags = $allowedtags;
563 $tags['a']['rel'] = true;
564 /** This filter is documented in wp-includes/kses.php */
565 return apply_filters( 'wp_kses_allowed_html', $tags, $context );
568 /** This filter is documented in wp-includes/kses.php */
569 return apply_filters( 'wp_kses_allowed_html', array(), $context );
572 /** This filter is documented in wp-includes/kses.php */
573 return apply_filters( 'wp_kses_allowed_html', $allowedentitynames, $context);
577 /** This filter is documented in wp-includes/kses.php */
578 return apply_filters( 'wp_kses_allowed_html', $allowedtags, $context );
583 * You add any kses hooks here.
585 * There is currently only one kses WordPress hook and it is called here. All
586 * parameters are passed to the hooks and expected to receive a string.
590 * @param string $string Content to filter through kses
591 * @param array $allowed_html List of allowed HTML elements
592 * @param array $allowed_protocols Allowed protocol in links
593 * @return string Filtered content through 'pre_kses' hook
595 function wp_kses_hook( $string, $allowed_html, $allowed_protocols ) {
597 * Filter content to be run through kses.
601 * @param string $string Content to run through kses.
602 * @param array $allowed_html Allowed HTML elements.
603 * @param array $allowed_protocols Allowed protocol in links.
605 $string = apply_filters( 'pre_kses', $string, $allowed_html, $allowed_protocols );
610 * This function returns kses' version number.
614 * @return string KSES Version Number
616 function wp_kses_version() {
621 * Searches for HTML tags, no matter how malformed.
623 * It also matches stray ">" characters.
627 * @param string $string Content to filter
628 * @param array $allowed_html Allowed HTML elements
629 * @param array $allowed_protocols Allowed protocols to keep
630 * @return string Content with fixed HTML tags
632 function wp_kses_split( $string, $allowed_html, $allowed_protocols ) {
633 global $pass_allowed_html, $pass_allowed_protocols;
634 $pass_allowed_html = $allowed_html;
635 $pass_allowed_protocols = $allowed_protocols;
636 return preg_replace_callback( '%(<!--.*?(-->|$))|(<[^>]*(>|$)|>)%', '_wp_kses_split_callback', $string );
640 * Callback for wp_kses_split.
645 function _wp_kses_split_callback( $match ) {
646 global $pass_allowed_html, $pass_allowed_protocols;
647 return wp_kses_split2( $match[0], $pass_allowed_html, $pass_allowed_protocols );
651 * Callback for wp_kses_split for fixing malformed HTML tags.
653 * This function does a lot of work. It rejects some very malformed things like
654 * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
655 * strip_tags()!). Otherwise it splits the tag into an element and an attribute
658 * After the tag is split into an element and an attribute list, it is run
659 * through another filter which will remove illegal attributes and once that is
660 * completed, will be returned.
665 * @param string $string Content to filter
666 * @param array $allowed_html Allowed HTML elements
667 * @param array $allowed_protocols Allowed protocols to keep
668 * @return string Fixed HTML element
670 function wp_kses_split2($string, $allowed_html, $allowed_protocols) {
671 $string = wp_kses_stripslashes($string);
673 if (substr($string, 0, 1) != '<')
675 # It matched a ">" character
677 if ( '<!--' == substr( $string, 0, 4 ) ) {
678 $string = str_replace( array('<!--', '-->'), '', $string );
679 while ( $string != ($newstring = wp_kses($string, $allowed_html, $allowed_protocols)) )
680 $string = $newstring;
683 // prevent multiple dashes in comments
684 $string = preg_replace('/--+/', '-', $string);
685 // prevent three dashes closing a comment
686 $string = preg_replace('/-$/', '', $string);
687 return "<!--{$string}-->";
689 # Allow HTML comments
691 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
693 # It's seriously malformed
695 $slash = trim($matches[1]);
697 $attrlist = $matches[3];
699 if ( ! is_array( $allowed_html ) )
700 $allowed_html = wp_kses_allowed_html( $allowed_html );
702 if ( ! isset($allowed_html[strtolower($elem)]) )
704 # They are using a not allowed HTML element
708 # No attributes are allowed for closing elements
710 return wp_kses_attr( $elem, $attrlist, $allowed_html, $allowed_protocols );
714 * Removes all attributes, if none are allowed for this element.
716 * If some are allowed it calls wp_kses_hair() to split them further, and then
717 * it builds up new HTML code from the data that kses_hair() returns. It also
718 * removes "<" and ">" characters, if there are any left. One more thing it does
719 * is to check if the tag has a closing XHTML slash, and if it does, it puts one
720 * in the returned code as well.
724 * @param string $element HTML element/tag
725 * @param string $attr HTML attributes from HTML element to closing HTML element tag
726 * @param array $allowed_html Allowed HTML elements
727 * @param array $allowed_protocols Allowed protocols to keep
728 * @return string Sanitized HTML element
730 function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) {
731 # Is there a closing XHTML slash at the end of the attributes?
733 if ( ! is_array( $allowed_html ) )
734 $allowed_html = wp_kses_allowed_html( $allowed_html );
737 if (preg_match('%\s*/\s*$%', $attr))
740 # Are any attributes allowed at all for this element?
741 if ( ! isset($allowed_html[strtolower($element)]) || count($allowed_html[strtolower($element)]) == 0 )
742 return "<$element$xhtml_slash>";
745 $attrarr = wp_kses_hair($attr, $allowed_protocols);
747 # Go through $attrarr, and save the allowed attributes for this element
751 $allowed_attr = $allowed_html[strtolower($element)];
752 foreach ($attrarr as $arreach) {
753 if ( ! isset( $allowed_attr[strtolower($arreach['name'])] ) )
754 continue; # the attribute is not allowed
756 $current = $allowed_attr[strtolower($arreach['name'])];
757 if ( $current == '' )
758 continue; # the attribute is not allowed
760 if ( strtolower( $arreach['name'] ) == 'style' ) {
761 $orig_value = $arreach['value'];
762 $value = safecss_filter_attr( $orig_value );
764 if ( empty( $value ) )
767 $arreach['value'] = $value;
768 $arreach['whole'] = str_replace( $orig_value, $value, $arreach['whole'] );
771 if ( ! is_array($current) ) {
772 $attr2 .= ' '.$arreach['whole'];
773 # there are no checks
776 # there are some checks
778 foreach ($current as $currkey => $currval) {
779 if ( ! wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval) ) {
786 $attr2 .= ' '.$arreach['whole']; # it passed them
787 } # if !is_array($current)
790 # Remove any "<" or ">" characters
791 $attr2 = preg_replace('/[<>]/', '', $attr2);
793 return "<$element$attr2$xhtml_slash>";
797 * Builds an attribute list from string containing attributes.
799 * This function does a lot of work. It parses an attribute list into an array
800 * with attribute data, and tries to do the right thing even if it gets weird
801 * input. It will add quotes around attribute values that don't have any quotes
802 * or apostrophes around them, to make it easier to produce HTML code that will
803 * conform to W3C's HTML specification. It will also remove bad URL protocols
804 * from attribute values. It also reduces duplicate attributes by using the
805 * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
809 * @param string $attr Attribute list from HTML element to closing HTML element tag
810 * @param array $allowed_protocols Allowed protocols to keep
811 * @return array List of attributes after parsing
813 function wp_kses_hair($attr, $allowed_protocols) {
817 $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
819 # Loop through the whole attribute list
821 while (strlen($attr) != 0) {
822 $working = 0; # Was the last operation successful?
825 case 0 : # attribute name, href for instance
827 if ( preg_match('/^([-a-zA-Z:]+)/', $attr, $match ) ) {
828 $attrname = $match[1];
829 $working = $mode = 1;
830 $attr = preg_replace( '/^[-a-zA-Z:]+/', '', $attr );
835 case 1 : # equals sign or valueless ("selected")
837 if (preg_match('/^\s*=\s*/', $attr)) # equals sign
841 $attr = preg_replace('/^\s*=\s*/', '', $attr);
845 if (preg_match('/^\s+/', $attr)) # valueless
849 if(false === array_key_exists($attrname, $attrarr)) {
850 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
852 $attr = preg_replace('/^\s+/', '', $attr);
857 case 2 : # attribute value, a URL after href= for instance
859 if (preg_match('%^"([^"]*)"(\s+|/?$)%', $attr, $match))
862 $thisval = $match[1];
863 if ( in_array(strtolower($attrname), $uris) )
864 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
866 if(false === array_key_exists($attrname, $attrarr)) {
867 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
871 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
875 if (preg_match("%^'([^']*)'(\s+|/?$)%", $attr, $match))
878 $thisval = $match[1];
879 if ( in_array(strtolower($attrname), $uris) )
880 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
882 if(false === array_key_exists($attrname, $attrarr)) {
883 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
887 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
891 if (preg_match("%^([^\s\"']+)(\s+|/?$)%", $attr, $match))
894 $thisval = $match[1];
895 if ( in_array(strtolower($attrname), $uris) )
896 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
898 if(false === array_key_exists($attrname, $attrarr)) {
899 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
901 # We add quotes to conform to W3C's HTML spec.
904 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
910 if ($working == 0) # not well formed, remove and try again
912 $attr = wp_kses_html_error($attr);
917 if ($mode == 1 && false === array_key_exists($attrname, $attrarr))
918 # special case, for when the attribute list ends with a valueless
919 # attribute like "selected"
920 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
926 * Performs different checks for attribute values.
928 * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
933 * @param string $value Attribute value
934 * @param string $vless Whether the value is valueless. Use 'y' or 'n'
935 * @param string $checkname What $checkvalue is checking for.
936 * @param mixed $checkvalue What constraint the value should pass
937 * @return bool Whether check passes
939 function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) {
942 switch (strtolower($checkname)) {
944 # The maxlen check makes sure that the attribute value has a length not
945 # greater than the given value. This can be used to avoid Buffer Overflows
946 # in WWW clients and various Internet servers.
948 if (strlen($value) > $checkvalue)
953 # The minlen check makes sure that the attribute value has a length not
954 # smaller than the given value.
956 if (strlen($value) < $checkvalue)
961 # The maxval check does two things: it checks that the attribute value is
962 # an integer from 0 and up, without an excessive amount of zeroes or
963 # whitespace (to avoid Buffer Overflows). It also checks that the attribute
964 # value is not greater than the given value.
965 # This check can be used to avoid Denial of Service attacks.
967 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
969 if ($value > $checkvalue)
974 # The minval check makes sure that the attribute value is a positive integer,
975 # and that it is not smaller than the given value.
977 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
979 if ($value < $checkvalue)
984 # The valueless check makes sure if the attribute has a value
985 # (like <a href="blah">) or not (<option selected>). If the given value
986 # is a "y" or a "Y", the attribute must not have a value.
987 # If the given value is an "n" or an "N", the attribute must have one.
989 if (strtolower($checkvalue) != $vless)
998 * Sanitize string from bad protocols.
1000 * This function removes all non-allowed protocols from the beginning of
1001 * $string. It ignores whitespace and the case of the letters, and it does
1002 * understand HTML entities. It does its work in a while loop, so it won't be
1003 * fooled by a string like "javascript:javascript:alert(57)".
1007 * @param string $string Content to filter bad protocols from
1008 * @param array $allowed_protocols Allowed protocols to keep
1009 * @return string Filtered content
1011 function wp_kses_bad_protocol($string, $allowed_protocols) {
1012 $string = wp_kses_no_null($string);
1016 $original_string = $string;
1017 $string = wp_kses_bad_protocol_once($string, $allowed_protocols);
1018 } while ( $original_string != $string && ++$iterations < 6 );
1020 if ( $original_string != $string )
1027 * Removes any invalid control characters in $string.
1029 * Also removes any instance of the '\0' string.
1033 * @param string $string
1036 function wp_kses_no_null($string) {
1037 $string = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $string);
1038 $string = preg_replace('/(\\\\0)+/', '', $string);
1044 * Strips slashes from in front of quotes.
1046 * This function changes the character sequence \" to just ". It leaves all
1047 * other slashes alone. It's really weird, but the quoting from
1048 * preg_replace(//e) seems to require this.
1052 * @param string $string String to strip slashes
1053 * @return string Fixed string with quoted slashes
1055 function wp_kses_stripslashes($string) {
1056 return preg_replace('%\\\\"%', '"', $string);
1060 * Goes through an array and changes the keys to all lower case.
1064 * @param array $inarray Unfiltered array
1065 * @return array Fixed array with all lowercase keys
1067 function wp_kses_array_lc($inarray) {
1068 $outarray = array ();
1070 foreach ( (array) $inarray as $inkey => $inval) {
1071 $outkey = strtolower($inkey);
1072 $outarray[$outkey] = array ();
1074 foreach ( (array) $inval as $inkey2 => $inval2) {
1075 $outkey2 = strtolower($inkey2);
1076 $outarray[$outkey][$outkey2] = $inval2;
1078 } # foreach $inarray
1084 * Removes the HTML JavaScript entities found in early versions of Netscape 4.
1088 * @param string $string
1091 function wp_kses_js_entities($string) {
1092 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
1096 * Handles parsing errors in wp_kses_hair().
1098 * The general plan is to remove everything to and including some whitespace,
1099 * but it deals with quotes and apostrophes as well.
1103 * @param string $string
1106 function wp_kses_html_error($string) {
1107 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
1111 * Sanitizes content from bad protocols and other characters.
1113 * This function searches for URL protocols at the beginning of $string, while
1114 * handling whitespace and HTML entities.
1118 * @param string $string Content to check for bad protocols
1119 * @param string $allowed_protocols Allowed protocols
1120 * @return string Sanitized content
1122 function wp_kses_bad_protocol_once($string, $allowed_protocols, $count = 1 ) {
1123 $string2 = preg_split( '/:|�*58;|�*3a;/i', $string, 2 );
1124 if ( isset($string2[1]) && ! preg_match('%/\?%', $string2[0]) ) {
1125 $string = trim( $string2[1] );
1126 $protocol = wp_kses_bad_protocol_once2( $string2[0], $allowed_protocols );
1127 if ( 'feed:' == $protocol ) {
1130 $string = wp_kses_bad_protocol_once( $string, $allowed_protocols, ++$count );
1131 if ( empty( $string ) )
1134 $string = $protocol . $string;
1141 * Callback for wp_kses_bad_protocol_once() regular expression.
1143 * This function processes URL protocols, checks to see if they're in the
1144 * whitelist or not, and returns different data depending on the answer.
1149 * @param string $string URI scheme to check against the whitelist
1150 * @param string $allowed_protocols Allowed protocols
1151 * @return string Sanitized content
1153 function wp_kses_bad_protocol_once2( $string, $allowed_protocols ) {
1154 $string2 = wp_kses_decode_entities($string);
1155 $string2 = preg_replace('/\s/', '', $string2);
1156 $string2 = wp_kses_no_null($string2);
1157 $string2 = strtolower($string2);
1160 foreach ( (array) $allowed_protocols as $one_protocol )
1161 if ( strtolower($one_protocol) == $string2 ) {
1173 * Converts and fixes HTML entities.
1175 * This function normalizes HTML entities. It will convert `AT&T` to the correct
1176 * `AT&T`, `:` to `:`, `&#XYZZY;` to `&#XYZZY;` and so on.
1180 * @param string $string Content to normalize entities
1181 * @return string Content with normalized entities
1183 function wp_kses_normalize_entities($string) {
1184 # Disarm all entities by converting & to &
1186 $string = str_replace('&', '&', $string);
1188 # Change back the allowed entities in our entity whitelist
1190 $string = preg_replace_callback('/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string);
1191 $string = preg_replace_callback('/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string);
1192 $string = preg_replace_callback('/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string);
1198 * Callback for wp_kses_normalize_entities() regular expression.
1200 * This function only accepts valid named entity references, which are finite,
1201 * case-sensitive, and highly scrutinized by HTML and XML validators.
1205 * @param array $matches preg_replace_callback() matches array
1206 * @return string Correctly encoded entity
1208 function wp_kses_named_entities($matches) {
1209 global $allowedentitynames;
1211 if ( empty($matches[1]) )
1215 return ( ( ! in_array($i, $allowedentitynames) ) ? "&$i;" : "&$i;" );
1219 * Callback for wp_kses_normalize_entities() regular expression.
1221 * This function helps {@see wp_kses_normalize_entities()} to only accept 16-bit
1222 * values and nothing more for `&#number;` entities.
1227 * @param array $matches preg_replace_callback() matches array
1228 * @return string Correctly encoded entity
1230 function wp_kses_normalize_entities2($matches) {
1231 if ( empty($matches[1]) )
1235 if (valid_unicode($i)) {
1236 $i = str_pad(ltrim($i,'0'), 3, '0', STR_PAD_LEFT);
1246 * Callback for wp_kses_normalize_entities() for regular expression.
1248 * This function helps wp_kses_normalize_entities() to only accept valid Unicode
1249 * numeric entities in hex form.
1253 * @param array $matches preg_replace_callback() matches array
1254 * @return string Correctly encoded entity
1256 function wp_kses_normalize_entities3($matches) {
1257 if ( empty($matches[1]) )
1260 $hexchars = $matches[1];
1261 return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&#x$hexchars;" : '&#x'.ltrim($hexchars,'0').';' );
1265 * Helper function to determine if a Unicode value is valid.
1267 * @param int $i Unicode value
1268 * @return bool True if the value was a valid Unicode number
1270 function valid_unicode($i) {
1271 return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
1272 ($i >= 0x20 && $i <= 0xd7ff) ||
1273 ($i >= 0xe000 && $i <= 0xfffd) ||
1274 ($i >= 0x10000 && $i <= 0x10ffff) );
1278 * Convert all entities to their character counterparts.
1280 * This function decodes numeric HTML entities (`A` and `A`).
1281 * It doesn't do anything with other entities like ä, but we don't
1282 * need them in the URL protocol whitelisting system anyway.
1286 * @param string $string Content to change entities
1287 * @return string Content after decoded entities
1289 function wp_kses_decode_entities($string) {
1290 $string = preg_replace_callback('/&#([0-9]+);/', '_wp_kses_decode_entities_chr', $string);
1291 $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', '_wp_kses_decode_entities_chr_hexdec', $string);
1297 * Regex callback for wp_kses_decode_entities()
1299 * @param array $match preg match
1302 function _wp_kses_decode_entities_chr( $match ) {
1303 return chr( $match[1] );
1307 * Regex callback for wp_kses_decode_entities()
1309 * @param array $match preg match
1312 function _wp_kses_decode_entities_chr_hexdec( $match ) {
1313 return chr( hexdec( $match[1] ) );
1317 * Sanitize content with allowed HTML Kses rules.
1321 * @param string $data Content to filter, expected to be escaped with slashes
1322 * @return string Filtered content
1324 function wp_filter_kses( $data ) {
1325 return addslashes( wp_kses( stripslashes( $data ), current_filter() ) );
1329 * Sanitize content with allowed HTML Kses rules.
1333 * @param string $data Content to filter, expected to not be escaped
1334 * @return string Filtered content
1336 function wp_kses_data( $data ) {
1337 return wp_kses( $data , current_filter() );
1341 * Sanitize content for allowed HTML tags for post content.
1343 * Post content refers to the page contents of the 'post' type and not $_POST
1348 * @param string $data Post content to filter, expected to be escaped with slashes
1349 * @return string Filtered post content with allowed HTML tags and attributes intact.
1351 function wp_filter_post_kses($data) {
1352 return addslashes ( wp_kses( stripslashes( $data ), 'post' ) );
1356 * Sanitize content for allowed HTML tags for post content.
1358 * Post content refers to the page contents of the 'post' type and not $_POST
1363 * @param string $data Post content to filter
1364 * @return string Filtered post content with allowed HTML tags and attributes intact.
1366 function wp_kses_post($data) {
1367 return wp_kses( $data , 'post' );
1371 * Strips all of the HTML in the content.
1375 * @param string $data Content to strip all HTML from
1376 * @return string Filtered content without any HTML
1378 function wp_filter_nohtml_kses( $data ) {
1379 return addslashes ( wp_kses( stripslashes( $data ), 'strip' ) );
1383 * Adds all Kses input form content filters.
1385 * All hooks have default priority. The wp_filter_kses() function is added to
1386 * the 'pre_comment_content' and 'title_save_pre' hooks.
1388 * The wp_filter_post_kses() function is added to the 'content_save_pre',
1389 * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks.
1393 function kses_init_filters() {
1395 add_filter('title_save_pre', 'wp_filter_kses');
1397 // Comment filtering
1398 if ( current_user_can( 'unfiltered_html' ) )
1399 add_filter( 'pre_comment_content', 'wp_filter_post_kses' );
1401 add_filter( 'pre_comment_content', 'wp_filter_kses' );
1404 add_filter('content_save_pre', 'wp_filter_post_kses');
1405 add_filter('excerpt_save_pre', 'wp_filter_post_kses');
1406 add_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1410 * Removes all Kses input form content filters.
1412 * A quick procedural method to removing all of the filters that kses uses for
1413 * content in WordPress Loop.
1415 * Does not remove the kses_init() function from 'init' hook (priority is
1416 * default). Also does not remove kses_init() function from 'set_current_user'
1417 * hook (priority is also default).
1421 function kses_remove_filters() {
1423 remove_filter('title_save_pre', 'wp_filter_kses');
1425 // Comment filtering
1426 remove_filter( 'pre_comment_content', 'wp_filter_post_kses' );
1427 remove_filter( 'pre_comment_content', 'wp_filter_kses' );
1430 remove_filter('content_save_pre', 'wp_filter_post_kses');
1431 remove_filter('excerpt_save_pre', 'wp_filter_post_kses');
1432 remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1436 * Sets up most of the Kses filters for input form content.
1438 * If you remove the kses_init() function from 'init' hook and
1439 * 'set_current_user' (priority is default), then none of the Kses filter hooks
1442 * First removes all of the Kses filters in case the current user does not need
1443 * to have Kses filter the content. If the user does not have unfiltered_html
1444 * capability, then Kses filters are added.
1448 function kses_init() {
1449 kses_remove_filters();
1451 if (current_user_can('unfiltered_html') == false)
1452 kses_init_filters();
1455 add_action('init', 'kses_init');
1456 add_action('set_current_user', 'kses_init');
1463 function safecss_filter_attr( $css, $deprecated = '' ) {
1464 if ( !empty( $deprecated ) )
1465 _deprecated_argument( __FUNCTION__, '2.8.1' ); // Never implemented
1467 $css = wp_kses_no_null($css);
1468 $css = str_replace(array("\n","\r","\t"), '', $css);
1470 if ( preg_match( '%[\\\\(&=}]|/\*%', $css ) ) // remove any inline css containing \ ( & } = or comments
1473 $css_array = explode( ';', trim( $css ) );
1476 * Filter list of allowed CSS attributes.
1480 * @param array $attr List of allowed CSS attributes.
1482 $allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float',
1483 'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color',
1484 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',
1485 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color',
1486 'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top',
1487 'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side',
1488 'clear', 'cursor', 'direction', 'font', 'font-family', 'font-size', 'font-style',
1489 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin-bottom',
1490 'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom',
1491 'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align',
1494 if ( empty($allowed_attr) )
1498 foreach ( $css_array as $css_item ) {
1499 if ( $css_item == '' )
1501 $css_item = trim( $css_item );
1503 if ( strpos( $css_item, ':' ) === false ) {
1506 $parts = explode( ':', $css_item );
1507 if ( in_array( trim( $parts[0] ), $allowed_attr ) )
1521 * Helper function to add global attributes to a tag in the allowed html list.
1526 * @param array $value An array of attributes.
1527 * @return array The array of attributes with global attributes added.
1529 function _wp_add_global_attributes( $value ) {
1530 $global_attributes = array(
1538 if ( true === $value )
1541 if ( is_array( $value ) )
1542 return array_merge( $value, $global_attributes );