3 * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes
4 * Copyright (C) 2002, 2003, 2005 Ulf Harnhammar
6 * This program is free software and open source software; you can redistribute
7 * it and/or modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the License,
9 * or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 * http://www.gnu.org/licenses/gpl.html
21 * [kses strips evil scripts!]
23 * Added wp_ prefix to avoid conflicts with existing kses users
26 * @copyright (C) 2002, 2003, 2005
27 * @author Ulf Harnhammar <http://advogato.org/person/metaur/>
35 * You can override this in a plugin.
37 * The wp_kses_allowed_html filter is more powerful and supplies context.
38 * CUSTOM_TAGS is not recommended and should be considered deprecated.
40 * @see wp_kses_allowed_html()
44 if ( ! defined( 'CUSTOM_TAGS' ) )
45 define( 'CUSTOM_TAGS', false );
47 // Ensure that these variables are added to the global namespace
48 // (e.g. if using namespaces / autoload in the current PHP environment).
49 global $allowedposttags, $allowedtags, $allowedentitynames;
51 if ( ! CUSTOM_TAGS ) {
53 * Kses global for default allowable HTML tags.
55 * Can be override by using CUSTOM_TAGS constant.
57 * @global array $allowedposttags
60 $allowedposttags = array(
104 'blockquote' => array(
162 'fieldset' => array(),
169 'figcaption' => array(
189 'accept-charset' => true,
313 'cellpadding' => true,
314 'cellspacing' => true,
418 * Kses allowed HTML elements.
420 * @global array $allowedtags
423 $allowedtags = array(
435 'blockquote' => array(
453 $allowedentitynames = array(
454 'nbsp', 'iexcl', 'cent', 'pound', 'curren', 'yen',
455 'brvbar', 'sect', 'uml', 'copy', 'ordf', 'laquo',
456 'not', 'shy', 'reg', 'macr', 'deg', 'plusmn',
457 'acute', 'micro', 'para', 'middot', 'cedil', 'ordm',
458 'raquo', 'iquest', 'Agrave', 'Aacute', 'Acirc', 'Atilde',
459 'Auml', 'Aring', 'AElig', 'Ccedil', 'Egrave', 'Eacute',
460 'Ecirc', 'Euml', 'Igrave', 'Iacute', 'Icirc', 'Iuml',
461 'ETH', 'Ntilde', 'Ograve', 'Oacute', 'Ocirc', 'Otilde',
462 'Ouml', 'times', 'Oslash', 'Ugrave', 'Uacute', 'Ucirc',
463 'Uuml', 'Yacute', 'THORN', 'szlig', 'agrave', 'aacute',
464 'acirc', 'atilde', 'auml', 'aring', 'aelig', 'ccedil',
465 'egrave', 'eacute', 'ecirc', 'euml', 'igrave', 'iacute',
466 'icirc', 'iuml', 'eth', 'ntilde', 'ograve', 'oacute',
467 'ocirc', 'otilde', 'ouml', 'divide', 'oslash', 'ugrave',
468 'uacute', 'ucirc', 'uuml', 'yacute', 'thorn', 'yuml',
469 'quot', 'amp', 'lt', 'gt', 'apos', 'OElig',
470 'oelig', 'Scaron', 'scaron', 'Yuml', 'circ', 'tilde',
471 'ensp', 'emsp', 'thinsp', 'zwnj', 'zwj', 'lrm',
472 'rlm', 'ndash', 'mdash', 'lsquo', 'rsquo', 'sbquo',
473 'ldquo', 'rdquo', 'bdquo', 'dagger', 'Dagger', 'permil',
474 'lsaquo', 'rsaquo', 'euro', 'fnof', 'Alpha', 'Beta',
475 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta',
476 'Iota', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Xi',
477 'Omicron', 'Pi', 'Rho', 'Sigma', 'Tau', 'Upsilon',
478 'Phi', 'Chi', 'Psi', 'Omega', 'alpha', 'beta',
479 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta',
480 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi',
481 'omicron', 'pi', 'rho', 'sigmaf', 'sigma', 'tau',
482 'upsilon', 'phi', 'chi', 'psi', 'omega', 'thetasym',
483 'upsih', 'piv', 'bull', 'hellip', 'prime', 'Prime',
484 'oline', 'frasl', 'weierp', 'image', 'real', 'trade',
485 'alefsym', 'larr', 'uarr', 'rarr', 'darr', 'harr',
486 'crarr', 'lArr', 'uArr', 'rArr', 'dArr', 'hArr',
487 'forall', 'part', 'exist', 'empty', 'nabla', 'isin',
488 'notin', 'ni', 'prod', 'sum', 'minus', 'lowast',
489 'radic', 'prop', 'infin', 'ang', 'and', 'or',
490 'cap', 'cup', 'int', 'sim', 'cong', 'asymp',
491 'ne', 'equiv', 'le', 'ge', 'sub', 'sup',
492 'nsub', 'sube', 'supe', 'oplus', 'otimes', 'perp',
493 'sdot', 'lceil', 'rceil', 'lfloor', 'rfloor', 'lang',
494 'rang', 'loz', 'spades', 'clubs', 'hearts', 'diams',
495 'sup1', 'sup2', 'sup3', 'frac14', 'frac12', 'frac34',
499 $allowedposttags = array_map( '_wp_add_global_attributes', $allowedposttags );
501 $allowedtags = wp_kses_array_lc( $allowedtags );
502 $allowedposttags = wp_kses_array_lc( $allowedposttags );
506 * Filters content and keeps only allowable HTML elements.
508 * This function makes sure that only the allowed HTML element names, attribute
509 * names and attribute values plus only sane HTML entities will occur in
510 * $string. You have to remove any slashes from PHP's magic quotes before you
511 * call this function.
513 * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
514 * 'irc', 'gopher', 'nntp', 'feed', 'telnet, 'mms', 'rtsp' and 'svn'. This
515 * covers all common link protocols, except for 'javascript' which should not
516 * be allowed for untrusted users.
520 * @param string $string Content to filter through kses
521 * @param array $allowed_html List of allowed HTML elements
522 * @param array $allowed_protocols Optional. Allowed protocol in links.
523 * @return string Filtered content with only allowed HTML elements
525 function wp_kses( $string, $allowed_html, $allowed_protocols = array() ) {
526 if ( empty( $allowed_protocols ) )
527 $allowed_protocols = wp_allowed_protocols();
528 $string = wp_kses_no_null( $string, array( 'slash_zero' => 'keep' ) );
529 $string = wp_kses_js_entities($string);
530 $string = wp_kses_normalize_entities($string);
531 $string = wp_kses_hook($string, $allowed_html, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook
532 return wp_kses_split($string, $allowed_html, $allowed_protocols);
536 * Filters one attribute only and ensures its value is allowed.
538 * This function has the advantage of being more secure than esc_attr() and can
539 * escape data in some situations where wp_kses() must strip the whole attribute.
543 * @param string $string The 'whole' attribute, including name and value.
544 * @param string $element The element name to which the attribute belongs.
545 * @return string Filtered attribute.
547 function wp_kses_one_attr( $string, $element ) {
548 $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
549 $allowed_html = wp_kses_allowed_html( 'post' );
550 $allowed_protocols = wp_allowed_protocols();
551 $string = wp_kses_no_null( $string, array( 'slash_zero' => 'keep' ) );
552 $string = wp_kses_js_entities( $string );
554 // Preserve leading and trailing whitespace.
556 preg_match('/^\s*/', $string, $matches);
558 preg_match('/\s*$/', $string, $matches);
559 $trail = $matches[0];
560 if ( empty( $trail ) ) {
561 $string = substr( $string, strlen( $lead ) );
563 $string = substr( $string, strlen( $lead ), -strlen( $trail ) );
566 // Parse attribute name and value from input.
567 $split = preg_split( '/\s*=\s*/', $string, 2 );
569 if ( count( $split ) == 2 ) {
572 // Remove quotes surrounding $value.
573 // Also guarantee correct quoting in $string for this one attribute.
574 if ( '' == $value ) {
579 if ( '"' == $quote || "'" == $quote ) {
580 if ( substr( $value, -1 ) != $quote ) {
583 $value = substr( $value, 1, -1 );
588 // Sanitize quotes, angle braces, and entities.
589 $value = esc_attr( $value );
591 // Sanitize URI values.
592 if ( in_array( strtolower( $name ), $uris ) ) {
593 $value = wp_kses_bad_protocol( $value, $allowed_protocols );
596 $string = "$name=$quote$value$quote";
603 // Sanitize attribute by name.
604 wp_kses_attr_check( $name, $value, $string, $vless, $element, $allowed_html );
606 // Restore whitespace.
607 return $lead . $string . $trail;
611 * Return a list of allowed tags and attributes for a given context.
615 * @global array $allowedposttags
616 * @global array $allowedtags
617 * @global array $allowedentitynames
619 * @param string $context The context for which to retrieve tags.
620 * Allowed values are post, strip, data,entities, or
621 * the name of a field filter such as pre_user_description.
622 * @return array List of allowed tags and their allowed attributes.
624 function wp_kses_allowed_html( $context = '' ) {
625 global $allowedposttags, $allowedtags, $allowedentitynames;
627 if ( is_array( $context ) ) {
629 * Filter HTML elements allowed for a given context.
633 * @param string $tags Allowed tags, attributes, and/or entities.
634 * @param string $context Context to judge allowed tags by. Allowed values are 'post',
635 * 'data', 'strip', 'entities', 'explicit', or the name of a filter.
637 return apply_filters( 'wp_kses_allowed_html', $context, 'explicit' );
640 switch ( $context ) {
642 /** This filter is documented in wp-includes/kses.php */
643 return apply_filters( 'wp_kses_allowed_html', $allowedposttags, $context );
645 case 'user_description':
646 case 'pre_user_description':
647 $tags = $allowedtags;
648 $tags['a']['rel'] = true;
649 /** This filter is documented in wp-includes/kses.php */
650 return apply_filters( 'wp_kses_allowed_html', $tags, $context );
653 /** This filter is documented in wp-includes/kses.php */
654 return apply_filters( 'wp_kses_allowed_html', array(), $context );
657 /** This filter is documented in wp-includes/kses.php */
658 return apply_filters( 'wp_kses_allowed_html', $allowedentitynames, $context);
662 /** This filter is documented in wp-includes/kses.php */
663 return apply_filters( 'wp_kses_allowed_html', $allowedtags, $context );
668 * You add any kses hooks here.
670 * There is currently only one kses WordPress hook and it is called here. All
671 * parameters are passed to the hooks and expected to receive a string.
675 * @param string $string Content to filter through kses
676 * @param array $allowed_html List of allowed HTML elements
677 * @param array $allowed_protocols Allowed protocol in links
678 * @return string Filtered content through 'pre_kses' hook
680 function wp_kses_hook( $string, $allowed_html, $allowed_protocols ) {
682 * Filter content to be run through kses.
686 * @param string $string Content to run through kses.
687 * @param array $allowed_html Allowed HTML elements.
688 * @param array $allowed_protocols Allowed protocol in links.
690 return apply_filters( 'pre_kses', $string, $allowed_html, $allowed_protocols );
694 * This function returns kses' version number.
698 * @return string KSES Version Number
700 function wp_kses_version() {
705 * Searches for HTML tags, no matter how malformed.
707 * It also matches stray ">" characters.
711 * @global array $pass_allowed_html
712 * @global array $pass_allowed_protocols
714 * @param string $string Content to filter
715 * @param array $allowed_html Allowed HTML elements
716 * @param array $allowed_protocols Allowed protocols to keep
717 * @return string Content with fixed HTML tags
719 function wp_kses_split( $string, $allowed_html, $allowed_protocols ) {
720 global $pass_allowed_html, $pass_allowed_protocols;
721 $pass_allowed_html = $allowed_html;
722 $pass_allowed_protocols = $allowed_protocols;
723 return preg_replace_callback( '%(<!--.*?(-->|$))|(<[^>]*(>|$)|>)%', '_wp_kses_split_callback', $string );
727 * Callback for wp_kses_split.
732 * @global array $pass_allowed_html
733 * @global array $pass_allowed_protocols
737 function _wp_kses_split_callback( $match ) {
738 global $pass_allowed_html, $pass_allowed_protocols;
739 return wp_kses_split2( $match[0], $pass_allowed_html, $pass_allowed_protocols );
743 * Callback for wp_kses_split for fixing malformed HTML tags.
745 * This function does a lot of work. It rejects some very malformed things like
746 * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
747 * strip_tags()!). Otherwise it splits the tag into an element and an attribute
750 * After the tag is split into an element and an attribute list, it is run
751 * through another filter which will remove illegal attributes and once that is
752 * completed, will be returned.
757 * @param string $string Content to filter
758 * @param array $allowed_html Allowed HTML elements
759 * @param array $allowed_protocols Allowed protocols to keep
760 * @return string Fixed HTML element
762 function wp_kses_split2($string, $allowed_html, $allowed_protocols) {
763 $string = wp_kses_stripslashes($string);
765 if (substr($string, 0, 1) != '<')
767 // It matched a ">" character
769 if ( '<!--' == substr( $string, 0, 4 ) ) {
770 $string = str_replace( array('<!--', '-->'), '', $string );
771 while ( $string != ($newstring = wp_kses($string, $allowed_html, $allowed_protocols)) )
772 $string = $newstring;
775 // prevent multiple dashes in comments
776 $string = preg_replace('/--+/', '-', $string);
777 // prevent three dashes closing a comment
778 $string = preg_replace('/-$/', '', $string);
779 return "<!--{$string}-->";
781 // Allow HTML comments
783 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
785 // It's seriously malformed
787 $slash = trim($matches[1]);
789 $attrlist = $matches[3];
791 if ( ! is_array( $allowed_html ) )
792 $allowed_html = wp_kses_allowed_html( $allowed_html );
794 if ( ! isset($allowed_html[strtolower($elem)]) )
796 // They are using a not allowed HTML element
800 // No attributes are allowed for closing elements
802 return wp_kses_attr( $elem, $attrlist, $allowed_html, $allowed_protocols );
806 * Removes all attributes, if none are allowed for this element.
808 * If some are allowed it calls wp_kses_hair() to split them further, and then
809 * it builds up new HTML code from the data that kses_hair() returns. It also
810 * removes "<" and ">" characters, if there are any left. One more thing it does
811 * is to check if the tag has a closing XHTML slash, and if it does, it puts one
812 * in the returned code as well.
816 * @param string $element HTML element/tag
817 * @param string $attr HTML attributes from HTML element to closing HTML element tag
818 * @param array $allowed_html Allowed HTML elements
819 * @param array $allowed_protocols Allowed protocols to keep
820 * @return string Sanitized HTML element
822 function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) {
823 if ( ! is_array( $allowed_html ) )
824 $allowed_html = wp_kses_allowed_html( $allowed_html );
826 // Is there a closing XHTML slash at the end of the attributes?
828 if (preg_match('%\s*/\s*$%', $attr))
831 // Are any attributes allowed at all for this element?
832 if ( ! isset($allowed_html[strtolower($element)]) || count($allowed_html[strtolower($element)]) == 0 )
833 return "<$element$xhtml_slash>";
836 $attrarr = wp_kses_hair($attr, $allowed_protocols);
838 // Go through $attrarr, and save the allowed attributes for this element
841 foreach ( $attrarr as $arreach ) {
842 if ( wp_kses_attr_check( $arreach['name'], $arreach['value'], $arreach['whole'], $arreach['vless'], $element, $allowed_html ) ) {
843 $attr2 .= ' '.$arreach['whole'];
847 // Remove any "<" or ">" characters
848 $attr2 = preg_replace('/[<>]/', '', $attr2);
850 return "<$element$attr2$xhtml_slash>";
854 * Determine whether an attribute is allowed.
858 * @param string $name The attribute name. Returns empty string when not allowed.
859 * @param string $value The attribute value. Returns a filtered value.
860 * @param string $whole The name=value input. Returns filtered input.
861 * @param string $vless 'y' when attribute like "enabled", otherwise 'n'.
862 * @param string $element The name of the element to which this attribute belongs.
863 * @param array $allowed_html The full list of allowed elements and attributes.
864 * @return bool Is the attribute allowed?
866 function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowed_html ) {
867 $allowed_attr = $allowed_html[strtolower( $element )];
869 $name_low = strtolower( $name );
870 if ( ! isset( $allowed_attr[$name_low] ) || '' == $allowed_attr[$name_low] ) {
871 $name = $value = $whole = '';
875 if ( 'style' == $name_low ) {
876 $new_value = safecss_filter_attr( $value );
878 if ( empty( $new_value ) ) {
879 $name = $value = $whole = '';
883 $whole = str_replace( $value, $new_value, $whole );
887 if ( is_array( $allowed_attr[$name_low] ) ) {
888 // there are some checks
889 foreach ( $allowed_attr[$name_low] as $currkey => $currval ) {
890 if ( ! wp_kses_check_attr_val( $value, $vless, $currkey, $currval ) ) {
891 $name = $value = $whole = '';
901 * Builds an attribute list from string containing attributes.
903 * This function does a lot of work. It parses an attribute list into an array
904 * with attribute data, and tries to do the right thing even if it gets weird
905 * input. It will add quotes around attribute values that don't have any quotes
906 * or apostrophes around them, to make it easier to produce HTML code that will
907 * conform to W3C's HTML specification. It will also remove bad URL protocols
908 * from attribute values. It also reduces duplicate attributes by using the
909 * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
913 * @param string $attr Attribute list from HTML element to closing HTML element tag
914 * @param array $allowed_protocols Allowed protocols to keep
915 * @return array List of attributes after parsing
917 function wp_kses_hair($attr, $allowed_protocols) {
921 $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
923 // Loop through the whole attribute list
925 while (strlen($attr) != 0) {
926 $working = 0; // Was the last operation successful?
929 case 0 : // attribute name, href for instance
931 if ( preg_match('/^([-a-zA-Z:]+)/', $attr, $match ) ) {
932 $attrname = $match[1];
933 $working = $mode = 1;
934 $attr = preg_replace( '/^[-a-zA-Z:]+/', '', $attr );
939 case 1 : // equals sign or valueless ("selected")
941 if (preg_match('/^\s*=\s*/', $attr)) // equals sign
945 $attr = preg_replace('/^\s*=\s*/', '', $attr);
949 if (preg_match('/^\s+/', $attr)) // valueless
953 if(false === array_key_exists($attrname, $attrarr)) {
954 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
956 $attr = preg_replace('/^\s+/', '', $attr);
961 case 2 : // attribute value, a URL after href= for instance
963 if (preg_match('%^"([^"]*)"(\s+|/?$)%', $attr, $match))
966 $thisval = $match[1];
967 if ( in_array(strtolower($attrname), $uris) )
968 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
970 if(false === array_key_exists($attrname, $attrarr)) {
971 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
975 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
979 if (preg_match("%^'([^']*)'(\s+|/?$)%", $attr, $match))
982 $thisval = $match[1];
983 if ( in_array(strtolower($attrname), $uris) )
984 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
986 if(false === array_key_exists($attrname, $attrarr)) {
987 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
991 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
995 if (preg_match("%^([^\s\"']+)(\s+|/?$)%", $attr, $match))
998 $thisval = $match[1];
999 if ( in_array(strtolower($attrname), $uris) )
1000 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
1002 if(false === array_key_exists($attrname, $attrarr)) {
1003 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
1005 // We add quotes to conform to W3C's HTML spec.
1008 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
1014 if ($working == 0) // not well formed, remove and try again
1016 $attr = wp_kses_html_error($attr);
1021 if ($mode == 1 && false === array_key_exists($attrname, $attrarr))
1022 // special case, for when the attribute list ends with a valueless
1023 // attribute like "selected"
1024 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
1030 * Finds all attributes of an HTML element.
1032 * Does not modify input. May return "evil" output.
1034 * Based on wp_kses_split2() and wp_kses_attr()
1038 * @param string $element HTML element/tag
1039 * @return array|bool List of attributes found in $element. Returns false on failure.
1041 function wp_kses_attr_parse( $element ) {
1042 $valid = preg_match('%^(<\s*)(/\s*)?([a-zA-Z0-9]+\s*)([^>]*)(>?)$%', $element, $matches);
1043 if ( 1 !== $valid ) {
1047 $begin = $matches[1];
1048 $slash = $matches[2];
1049 $elname = $matches[3];
1050 $attr = $matches[4];
1053 if ( '' !== $slash ) {
1054 // Closing elements do not get parsed.
1058 // Is there a closing XHTML slash at the end of the attributes?
1059 if ( 1 === preg_match( '%\s*/\s*$%', $attr, $matches ) ) {
1060 $xhtml_slash = $matches[0];
1061 $attr = substr( $attr, 0, -strlen( $xhtml_slash ) );
1067 $attrarr = wp_kses_hair_parse( $attr );
1068 if ( false === $attrarr ) {
1072 // Make sure all input is returned by adding front and back matter.
1073 array_unshift( $attrarr, $begin . $slash . $elname );
1074 array_push( $attrarr, $xhtml_slash . $end );
1080 * Builds an attribute list from string containing attributes.
1082 * Does not modify input. May return "evil" output.
1083 * In case of unexpected input, returns false instead of stripping things.
1085 * Based on wp_kses_hair() but does not return a multi-dimensional array.
1089 * @param string $attr Attribute list from HTML element to closing HTML element tag
1090 * @return array|bool List of attributes found in $attr. Returns false on failure.
1092 function wp_kses_hair_parse( $attr ) {
1093 if ( '' === $attr ) {
1099 . '[-a-zA-Z:]+' // Attribute name.
1101 . '\[\[?[^\[\]]+\]\]?' // Shortcode in the name position implies unfiltered_html.
1103 . '(?:' // Attribute value.
1104 . '\s*=\s*' // All values begin with '='
1106 . '"[^"]*"' // Double-quoted
1108 . "'[^']*'" // Single-quoted
1110 . '[^\s"\']+' // Non-quoted
1111 . '(?:\s|$)' // Must have a space
1114 . '(?:\s|$)' // If attribute has no value, space is required.
1116 . '\s*'; // Trailing space is optional except as mentioned above.
1118 // Although it is possible to reduce this procedure to a single regexp,
1119 // we must run that regexp twice to get exactly the expected result.
1121 $validation = "%^($regex)+$%";
1122 $extraction = "%$regex%";
1124 if ( 1 === preg_match( $validation, $attr ) ) {
1125 preg_match_all( $extraction, $attr, $attrarr );
1133 * Performs different checks for attribute values.
1135 * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
1140 * @param string $value Attribute value
1141 * @param string $vless Whether the value is valueless. Use 'y' or 'n'
1142 * @param string $checkname What $checkvalue is checking for.
1143 * @param mixed $checkvalue What constraint the value should pass
1144 * @return bool Whether check passes
1146 function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) {
1149 switch (strtolower($checkname)) {
1151 // The maxlen check makes sure that the attribute value has a length not
1152 // greater than the given value. This can be used to avoid Buffer Overflows
1153 // in WWW clients and various Internet servers.
1155 if (strlen($value) > $checkvalue)
1160 // The minlen check makes sure that the attribute value has a length not
1161 // smaller than the given value.
1163 if (strlen($value) < $checkvalue)
1168 // The maxval check does two things: it checks that the attribute value is
1169 // an integer from 0 and up, without an excessive amount of zeroes or
1170 // whitespace (to avoid Buffer Overflows). It also checks that the attribute
1171 // value is not greater than the given value.
1172 // This check can be used to avoid Denial of Service attacks.
1174 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
1176 if ($value > $checkvalue)
1181 // The minval check makes sure that the attribute value is a positive integer,
1182 // and that it is not smaller than the given value.
1184 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
1186 if ($value < $checkvalue)
1191 // The valueless check makes sure if the attribute has a value
1192 // (like <a href="blah">) or not (<option selected>). If the given value
1193 // is a "y" or a "Y", the attribute must not have a value.
1194 // If the given value is an "n" or an "N", the attribute must have one.
1196 if (strtolower($checkvalue) != $vless)
1205 * Sanitize string from bad protocols.
1207 * This function removes all non-allowed protocols from the beginning of
1208 * $string. It ignores whitespace and the case of the letters, and it does
1209 * understand HTML entities. It does its work in a while loop, so it won't be
1210 * fooled by a string like "javascript:javascript:alert(57)".
1214 * @param string $string Content to filter bad protocols from
1215 * @param array $allowed_protocols Allowed protocols to keep
1216 * @return string Filtered content
1218 function wp_kses_bad_protocol($string, $allowed_protocols) {
1219 $string = wp_kses_no_null($string);
1223 $original_string = $string;
1224 $string = wp_kses_bad_protocol_once($string, $allowed_protocols);
1225 } while ( $original_string != $string && ++$iterations < 6 );
1227 if ( $original_string != $string )
1234 * Removes any invalid control characters in $string.
1236 * Also removes any instance of the '\0' string.
1240 * @param string $string
1241 * @param array $options Set 'slash_zero' => 'keep' when '\0' is allowed. Default is 'remove'.
1244 function wp_kses_no_null( $string, $options = null ) {
1245 if ( ! isset( $options['slash_zero'] ) ) {
1246 $options = array( 'slash_zero' => 'remove' );
1249 $string = preg_replace( '/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $string );
1250 if ( 'remove' == $options['slash_zero'] ) {
1251 $string = preg_replace( '/\\\\+0+/', '', $string );
1258 * Strips slashes from in front of quotes.
1260 * This function changes the character sequence \" to just ". It leaves all
1261 * other slashes alone. It's really weird, but the quoting from
1262 * preg_replace(//e) seems to require this.
1266 * @param string $string String to strip slashes
1267 * @return string Fixed string with quoted slashes
1269 function wp_kses_stripslashes($string) {
1270 return preg_replace('%\\\\"%', '"', $string);
1274 * Goes through an array and changes the keys to all lower case.
1278 * @param array $inarray Unfiltered array
1279 * @return array Fixed array with all lowercase keys
1281 function wp_kses_array_lc($inarray) {
1282 $outarray = array ();
1284 foreach ( (array) $inarray as $inkey => $inval) {
1285 $outkey = strtolower($inkey);
1286 $outarray[$outkey] = array ();
1288 foreach ( (array) $inval as $inkey2 => $inval2) {
1289 $outkey2 = strtolower($inkey2);
1290 $outarray[$outkey][$outkey2] = $inval2;
1292 } // foreach $inarray
1298 * Removes the HTML JavaScript entities found in early versions of Netscape 4.
1302 * @param string $string
1305 function wp_kses_js_entities($string) {
1306 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
1310 * Handles parsing errors in wp_kses_hair().
1312 * The general plan is to remove everything to and including some whitespace,
1313 * but it deals with quotes and apostrophes as well.
1317 * @param string $string
1320 function wp_kses_html_error($string) {
1321 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
1325 * Sanitizes content from bad protocols and other characters.
1327 * This function searches for URL protocols at the beginning of $string, while
1328 * handling whitespace and HTML entities.
1332 * @param string $string Content to check for bad protocols
1333 * @param string $allowed_protocols Allowed protocols
1334 * @return string Sanitized content
1336 function wp_kses_bad_protocol_once($string, $allowed_protocols, $count = 1 ) {
1337 $string2 = preg_split( '/:|�*58;|�*3a;/i', $string, 2 );
1338 if ( isset($string2[1]) && ! preg_match('%/\?%', $string2[0]) ) {
1339 $string = trim( $string2[1] );
1340 $protocol = wp_kses_bad_protocol_once2( $string2[0], $allowed_protocols );
1341 if ( 'feed:' == $protocol ) {
1344 $string = wp_kses_bad_protocol_once( $string, $allowed_protocols, ++$count );
1345 if ( empty( $string ) )
1348 $string = $protocol . $string;
1355 * Callback for wp_kses_bad_protocol_once() regular expression.
1357 * This function processes URL protocols, checks to see if they're in the
1358 * whitelist or not, and returns different data depending on the answer.
1363 * @param string $string URI scheme to check against the whitelist
1364 * @param string $allowed_protocols Allowed protocols
1365 * @return string Sanitized content
1367 function wp_kses_bad_protocol_once2( $string, $allowed_protocols ) {
1368 $string2 = wp_kses_decode_entities($string);
1369 $string2 = preg_replace('/\s/', '', $string2);
1370 $string2 = wp_kses_no_null($string2);
1371 $string2 = strtolower($string2);
1374 foreach ( (array) $allowed_protocols as $one_protocol )
1375 if ( strtolower($one_protocol) == $string2 ) {
1387 * Converts and fixes HTML entities.
1389 * This function normalizes HTML entities. It will convert `AT&T` to the correct
1390 * `AT&T`, `:` to `:`, `&#XYZZY;` to `&#XYZZY;` and so on.
1394 * @param string $string Content to normalize entities
1395 * @return string Content with normalized entities
1397 function wp_kses_normalize_entities($string) {
1398 // Disarm all entities by converting & to &
1399 $string = str_replace('&', '&', $string);
1401 // Change back the allowed entities in our entity whitelist
1402 $string = preg_replace_callback('/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string);
1403 $string = preg_replace_callback('/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string);
1404 $string = preg_replace_callback('/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string);
1410 * Callback for wp_kses_normalize_entities() regular expression.
1412 * This function only accepts valid named entity references, which are finite,
1413 * case-sensitive, and highly scrutinized by HTML and XML validators.
1417 * @global array $allowedentitynames
1419 * @param array $matches preg_replace_callback() matches array
1420 * @return string Correctly encoded entity
1422 function wp_kses_named_entities($matches) {
1423 global $allowedentitynames;
1425 if ( empty($matches[1]) )
1429 return ( ! in_array( $i, $allowedentitynames ) ) ? "&$i;" : "&$i;";
1433 * Callback for wp_kses_normalize_entities() regular expression.
1435 * This function helps {@see wp_kses_normalize_entities()} to only accept 16-bit
1436 * values and nothing more for `&#number;` entities.
1441 * @param array $matches preg_replace_callback() matches array
1442 * @return string Correctly encoded entity
1444 function wp_kses_normalize_entities2($matches) {
1445 if ( empty($matches[1]) )
1449 if (valid_unicode($i)) {
1450 $i = str_pad(ltrim($i,'0'), 3, '0', STR_PAD_LEFT);
1460 * Callback for wp_kses_normalize_entities() for regular expression.
1462 * This function helps wp_kses_normalize_entities() to only accept valid Unicode
1463 * numeric entities in hex form.
1467 * @param array $matches preg_replace_callback() matches array
1468 * @return string Correctly encoded entity
1470 function wp_kses_normalize_entities3($matches) {
1471 if ( empty($matches[1]) )
1474 $hexchars = $matches[1];
1475 return ( ! valid_unicode( hexdec( $hexchars ) ) ) ? "&#x$hexchars;" : '&#x'.ltrim($hexchars,'0').';';
1479 * Helper function to determine if a Unicode value is valid.
1481 * @param int $i Unicode value
1482 * @return bool True if the value was a valid Unicode number
1484 function valid_unicode($i) {
1485 return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
1486 ($i >= 0x20 && $i <= 0xd7ff) ||
1487 ($i >= 0xe000 && $i <= 0xfffd) ||
1488 ($i >= 0x10000 && $i <= 0x10ffff) );
1492 * Convert all entities to their character counterparts.
1494 * This function decodes numeric HTML entities (`A` and `A`).
1495 * It doesn't do anything with other entities like ä, but we don't
1496 * need them in the URL protocol whitelisting system anyway.
1500 * @param string $string Content to change entities
1501 * @return string Content after decoded entities
1503 function wp_kses_decode_entities($string) {
1504 $string = preg_replace_callback('/&#([0-9]+);/', '_wp_kses_decode_entities_chr', $string);
1505 $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', '_wp_kses_decode_entities_chr_hexdec', $string);
1511 * Regex callback for wp_kses_decode_entities()
1513 * @param array $match preg match
1516 function _wp_kses_decode_entities_chr( $match ) {
1517 return chr( $match[1] );
1521 * Regex callback for wp_kses_decode_entities()
1523 * @param array $match preg match
1526 function _wp_kses_decode_entities_chr_hexdec( $match ) {
1527 return chr( hexdec( $match[1] ) );
1531 * Sanitize content with allowed HTML Kses rules.
1535 * @param string $data Content to filter, expected to be escaped with slashes
1536 * @return string Filtered content
1538 function wp_filter_kses( $data ) {
1539 return addslashes( wp_kses( stripslashes( $data ), current_filter() ) );
1543 * Sanitize content with allowed HTML Kses rules.
1547 * @param string $data Content to filter, expected to not be escaped
1548 * @return string Filtered content
1550 function wp_kses_data( $data ) {
1551 return wp_kses( $data, current_filter() );
1555 * Sanitize content for allowed HTML tags for post content.
1557 * Post content refers to the page contents of the 'post' type and not $_POST
1562 * @param string $data Post content to filter, expected to be escaped with slashes
1563 * @return string Filtered post content with allowed HTML tags and attributes intact.
1565 function wp_filter_post_kses( $data ) {
1566 return addslashes( wp_kses( stripslashes( $data ), 'post' ) );
1570 * Sanitize content for allowed HTML tags for post content.
1572 * Post content refers to the page contents of the 'post' type and not $_POST
1577 * @param string $data Post content to filter
1578 * @return string Filtered post content with allowed HTML tags and attributes intact.
1580 function wp_kses_post( $data ) {
1581 return wp_kses( $data, 'post' );
1585 * Navigates through an array, object, or scalar, and sanitizes content for
1586 * allowed HTML tags for post content.
1592 * @param mixed $data The array, object, or scalar value to inspect.
1593 * @return mixed The filtered content.
1595 function wp_kses_post_deep( $data ) {
1596 return map_deep( $data, 'wp_kses_post' );
1600 * Strips all of the HTML in the content.
1604 * @param string $data Content to strip all HTML from
1605 * @return string Filtered content without any HTML
1607 function wp_filter_nohtml_kses( $data ) {
1608 return addslashes( wp_kses( stripslashes( $data ), 'strip' ) );
1612 * Adds all Kses input form content filters.
1614 * All hooks have default priority. The wp_filter_kses() function is added to
1615 * the 'pre_comment_content' and 'title_save_pre' hooks.
1617 * The wp_filter_post_kses() function is added to the 'content_save_pre',
1618 * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks.
1622 function kses_init_filters() {
1624 add_filter('title_save_pre', 'wp_filter_kses');
1626 // Comment filtering
1627 if ( current_user_can( 'unfiltered_html' ) )
1628 add_filter( 'pre_comment_content', 'wp_filter_post_kses' );
1630 add_filter( 'pre_comment_content', 'wp_filter_kses' );
1633 add_filter('content_save_pre', 'wp_filter_post_kses');
1634 add_filter('excerpt_save_pre', 'wp_filter_post_kses');
1635 add_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1639 * Removes all Kses input form content filters.
1641 * A quick procedural method to removing all of the filters that kses uses for
1642 * content in WordPress Loop.
1644 * Does not remove the kses_init() function from 'init' hook (priority is
1645 * default). Also does not remove kses_init() function from 'set_current_user'
1646 * hook (priority is also default).
1650 function kses_remove_filters() {
1652 remove_filter('title_save_pre', 'wp_filter_kses');
1654 // Comment filtering
1655 remove_filter( 'pre_comment_content', 'wp_filter_post_kses' );
1656 remove_filter( 'pre_comment_content', 'wp_filter_kses' );
1659 remove_filter('content_save_pre', 'wp_filter_post_kses');
1660 remove_filter('excerpt_save_pre', 'wp_filter_post_kses');
1661 remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1665 * Sets up most of the Kses filters for input form content.
1667 * If you remove the kses_init() function from 'init' hook and
1668 * 'set_current_user' (priority is default), then none of the Kses filter hooks
1671 * First removes all of the Kses filters in case the current user does not need
1672 * to have Kses filter the content. If the user does not have unfiltered_html
1673 * capability, then Kses filters are added.
1677 function kses_init() {
1678 kses_remove_filters();
1680 if ( ! current_user_can( 'unfiltered_html' ) ) {
1681 kses_init_filters();
1690 * @param string $css A string of CSS rules.
1691 * @param string $deprecated Not used.
1692 * @return string Filtered string of CSS rules.
1694 function safecss_filter_attr( $css, $deprecated = '' ) {
1695 if ( !empty( $deprecated ) )
1696 _deprecated_argument( __FUNCTION__, '2.8.1' ); // Never implemented
1698 $css = wp_kses_no_null($css);
1699 $css = str_replace(array("\n","\r","\t"), '', $css);
1701 if ( preg_match( '%[\\\\(&=}]|/\*%', $css ) ) // remove any inline css containing \ ( & } = or comments
1704 $css_array = explode( ';', trim( $css ) );
1707 * Filter list of allowed CSS attributes.
1711 * @param array $attr List of allowed CSS attributes.
1713 $allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float',
1714 'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color',
1715 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',
1716 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color',
1717 'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top',
1718 'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side',
1719 'clear', 'cursor', 'direction', 'font', 'font-family', 'font-size', 'font-style',
1720 'font-variant', 'font-weight', 'height', 'min-height','max-height' , 'letter-spacing', 'line-height', 'margin-bottom',
1721 'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom',
1722 'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align',
1723 'width', 'min-width', 'max-width' ) );
1725 if ( empty($allowed_attr) )
1729 foreach ( $css_array as $css_item ) {
1730 if ( $css_item == '' )
1732 $css_item = trim( $css_item );
1734 if ( strpos( $css_item, ':' ) === false ) {
1737 $parts = explode( ':', $css_item );
1738 if ( in_array( trim( $parts[0] ), $allowed_attr ) )
1752 * Helper function to add global attributes to a tag in the allowed html list.
1757 * @param array $value An array of attributes.
1758 * @return array The array of attributes with global attributes added.
1760 function _wp_add_global_attributes( $value ) {
1761 $global_attributes = array(
1769 if ( true === $value )
1772 if ( is_array( $value ) )
1773 return array_merge( $value, $global_attributes );