X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/7688c6ba71852cd89123b62b2d57683535e4702a..refs/tags/wordpress-2.9:/wp-includes/kses.php diff --git a/wp-includes/kses.php b/wp-includes/kses.php index 8c448958..56176b46 100644 --- a/wp-includes/kses.php +++ b/wp-includes/kses.php @@ -1,215 +1,331 @@ + * + * @package External + * @subpackage KSES + * + * @internal + * *** CONTACT INFORMATION *** + * E-mail: metaur at users dot sourceforge dot net + * Web page: http://sourceforge.net/projects/kses + * Paper mail: Ulf Harnhammar + * Ymergatan 17 C + * 753 25 Uppsala + * SWEDEN + * + * [kses strips evil scripts!] + */ + +/** + * You can override this in your my-hacks.php file You can also override this + * in a plugin file. The my-hacks.php is deprecated in its usage. + * + * @since 1.2.0 + */ if (!defined('CUSTOM_TAGS')) define('CUSTOM_TAGS', false); -// You can override this in your my-hacks.php file if (!CUSTOM_TAGS) { + /** + * Kses global for default allowable HTML tags. + * + * Can be override by using CUSTOM_TAGS constant. + * + * @global array $allowedposttags + * @since 2.0.0 + */ $allowedposttags = array( 'address' => array(), 'a' => array( - 'href' => array(), 'title' => array(), - 'rel' => array(), 'rev' => array(), - 'name' => array() - ), + 'class' => array (), + 'href' => array (), + 'id' => array (), + 'title' => array (), + 'rel' => array (), + 'rev' => array (), + 'name' => array (), + 'target' => array()), 'abbr' => array( - 'title' => array(), 'class' => array() - ), + 'class' => array (), + 'title' => array ()), 'acronym' => array( - 'title' => array() - ), + 'title' => array ()), 'b' => array(), 'big' => array(), 'blockquote' => array( - 'cite' => array(), 'xml:lang' => array(), - 'lang' => array() - ), - 'br' => array(), + 'id' => array (), + 'cite' => array (), + 'class' => array(), + 'lang' => array(), + 'xml:lang' => array()), + 'br' => array ( + 'class' => array ()), 'button' => array( - 'disabled' => array(), 'name' => array(), - 'type' => array(), 'value' => array() - ), + 'disabled' => array (), + 'name' => array (), + 'type' => array (), + 'value' => array ()), 'caption' => array( - 'align' => array() - ), - 'code' => array(), + 'align' => array (), + 'class' => array ()), + 'cite' => array ( + 'class' => array(), + 'dir' => array(), + 'lang' => array(), + 'title' => array ()), + 'code' => array ( + 'style' => array()), 'col' => array( - 'align' => array(), 'char' => array(), - 'charoff' => array(), 'span' => array(), - 'valign' => array(), 'width' => array() - ), + 'align' => array (), + 'char' => array (), + 'charoff' => array (), + 'span' => array (), + 'dir' => array(), + 'style' => array (), + 'valign' => array (), + 'width' => array ()), 'del' => array( - 'datetime' => array() - ), + 'datetime' => array ()), 'dd' => array(), 'div' => array( - 'align' => array(), 'xml:lang' => array(), - 'lang' => array() - ), + 'align' => array (), + 'class' => array (), + 'dir' => array (), + 'lang' => array(), + 'style' => array (), + 'xml:lang' => array()), 'dl' => array(), 'dt' => array(), 'em' => array(), 'fieldset' => array(), 'font' => array( - 'color' => array(), 'face' => array(), - 'size' => array() - ), + 'color' => array (), + 'face' => array (), + 'size' => array ()), 'form' => array( - 'action' => array(), 'accept' => array(), - 'accept-charset' => array(), 'enctype' => array(), - 'method' => array(), 'name' => array(), - 'target' => array() - ), + 'action' => array (), + 'accept' => array (), + 'accept-charset' => array (), + 'enctype' => array (), + 'method' => array (), + 'name' => array (), + 'target' => array ()), 'h1' => array( - 'align' => array() - ), - 'h2' => array( - 'align' => array() - ), - 'h3' => array( - 'align' => array() - ), - 'h4' => array( - 'align' => array() - ), - 'h5' => array( - 'align' => array() - ), - 'h6' => array( - 'align' => array() - ), - 'hr' => array( - 'align' => array(), 'noshade' => array(), - 'size' => array(), 'width' => array() - ), + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'h2' => array ( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'h3' => array ( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'h4' => array ( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'h5' => array ( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'h6' => array ( + 'align' => array (), + 'class' => array (), + 'id' => array (), + 'style' => array ()), + 'hr' => array ( + 'align' => array (), + 'class' => array (), + 'noshade' => array (), + 'size' => array (), + 'width' => array ()), 'i' => array(), 'img' => array( - 'alt' => array(), 'align' => array(), - 'border' => array(), 'height' => array(), - 'hspace' => array(), 'longdesc' => array(), - 'vspace' => array(), 'src' => array(), - 'width' => array() - ), + 'alt' => array (), + 'align' => array (), + 'border' => array (), + 'class' => array (), + 'height' => array (), + 'hspace' => array (), + 'longdesc' => array (), + 'vspace' => array (), + 'src' => array (), + 'style' => array (), + 'width' => array ()), 'ins' => array( - 'datetime' => array(), 'cite' => array() - ), + 'datetime' => array (), + 'cite' => array ()), 'kbd' => array(), 'label' => array( - 'for' => array() - ), + 'for' => array ()), 'legend' => array( - 'align' => array() - ), - 'li' => array(), + 'align' => array ()), + 'li' => array ( + 'align' => array (), + 'class' => array ()), 'p' => array( - 'align' => array(), 'xml:lang' => array(), - 'lang' => array() - ), + 'class' => array (), + 'align' => array (), + 'dir' => array(), + 'lang' => array(), + 'style' => array (), + 'xml:lang' => array()), 'pre' => array( - 'width' => array() - ), + 'style' => array(), + 'width' => array ()), 'q' => array( - 'cite' => array() - ), + 'cite' => array ()), 's' => array(), + 'span' => array ( + 'class' => array (), + 'dir' => array (), + 'align' => array (), + 'lang' => array (), + 'style' => array (), + 'title' => array (), + 'xml:lang' => array()), 'strike' => array(), 'strong' => array(), 'sub' => array(), 'sup' => array(), 'table' => array( - 'align' => array(), 'bgcolor' => array(), - 'border' => array(), 'cellpadding' => array(), - 'cellspacing' => array(), 'rules' => array(), - 'summary' => array(), 'width' => array() - ), + 'align' => array (), + 'bgcolor' => array (), + 'border' => array (), + 'cellpadding' => array (), + 'cellspacing' => array (), + 'class' => array (), + 'dir' => array(), + 'id' => array(), + 'rules' => array (), + 'style' => array (), + 'summary' => array (), + 'width' => array ()), 'tbody' => array( - 'align' => array(), 'char' => array(), - 'charoff' => array(), 'valign' => array() - ), + 'align' => array (), + 'char' => array (), + 'charoff' => array (), + 'valign' => array ()), 'td' => array( - 'abbr' => array(), 'align' => array(), - 'axis' => array(), 'bgcolor' => array(), - 'char' => array(), 'charoff' => array(), - 'colspan' => array(), 'headers' => array(), - 'height' => array(), 'nowrap' => array(), - 'rowspan' => array(), 'scope' => array(), - 'valign' => array(), 'width' => array() - ), + 'abbr' => array (), + 'align' => array (), + 'axis' => array (), + 'bgcolor' => array (), + 'char' => array (), + 'charoff' => array (), + 'class' => array (), + 'colspan' => array (), + 'dir' => array(), + 'headers' => array (), + 'height' => array (), + 'nowrap' => array (), + 'rowspan' => array (), + 'scope' => array (), + 'style' => array (), + 'valign' => array (), + 'width' => array ()), 'textarea' => array( - 'cols' => array(), 'rows' => array(), - 'disabled' => array(), 'name' => array(), - 'readonly' => array() - ), + 'cols' => array (), + 'rows' => array (), + 'disabled' => array (), + 'name' => array (), + 'readonly' => array ()), 'tfoot' => array( - 'align' => array(), 'char' => array(), - 'charoff' => array(), 'valign' => array() - ), + 'align' => array (), + 'char' => array (), + 'class' => array (), + 'charoff' => array (), + 'valign' => array ()), 'th' => array( - 'abbr' => array(), 'align' => array(), - 'axis' => array(), 'bgcolor' => array(), - 'char' => array(), 'charoff' => array(), - 'colspan' => array(), 'headers' => array(), - 'height' => array(), 'nowrap' => array(), - 'rowspan' => array(), 'scope' => array(), - 'valign' => array(), 'width' => array() - ), + 'abbr' => array (), + 'align' => array (), + 'axis' => array (), + 'bgcolor' => array (), + 'char' => array (), + 'charoff' => array (), + 'class' => array (), + 'colspan' => array (), + 'headers' => array (), + 'height' => array (), + 'nowrap' => array (), + 'rowspan' => array (), + 'scope' => array (), + 'valign' => array (), + 'width' => array ()), 'thead' => array( - 'align' => array(), 'char' => array(), - 'charoff' => array(), 'valign' => array() - ), + 'align' => array (), + 'char' => array (), + 'charoff' => array (), + 'class' => array (), + 'valign' => array ()), 'title' => array(), 'tr' => array( - 'align' => array(), 'bgcolor' => array(), - 'char' => array(), 'charoff' => array(), - 'valign' => array() - ), + 'align' => array (), + 'bgcolor' => array (), + 'char' => array (), + 'charoff' => array (), + 'class' => array (), + 'style' => array (), + 'valign' => array ()), 'tt' => array(), 'u' => array(), - 'ul' => array(), - 'ol' => array(), - 'var' => array() - ); - + 'ul' => array ( + 'class' => array (), + 'style' => array (), + 'type' => array ()), + 'ol' => array ( + 'class' => array (), + 'start' => array (), + 'style' => array (), + 'type' => array ()), + 'var' => array ()); + + /** + * Kses allowed HTML elements. + * + * @global array $allowedtags + * @since 1.0.0 + */ $allowedtags = array( 'a' => array( - 'href' => array(), 'title' => array() - ), + 'href' => array (), + 'title' => array ()), 'abbr' => array( - 'title' => array() - ), + 'title' => array ()), 'acronym' => array( - 'title' => array() - ), + 'title' => array ()), 'b' => array(), 'blockquote' => array( - 'cite' => array() - ), + 'cite' => array ()), // 'br' => array(), + 'cite' => array (), 'code' => array(), - // 'del' => array('datetime' => array()), + 'del' => array( + 'datetime' => array ()), // 'dd' => array(), // 'dl' => array(), // 'dt' => array(), - 'em' => array(), - 'i' => array(), + 'em' => array (), 'i' => array (), // 'ins' => array('datetime' => array(), 'cite' => array()), // 'li' => array(), // 'ol' => array(), // 'p' => array(), - // 'q' => array(), + 'q' => array( + 'cite' => array ()), 'strike' => array(), 'strong' => array(), // 'sub' => array(), @@ -219,57 +335,106 @@ if (!CUSTOM_TAGS) { ); } -function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) - ############################################################################### - # This function makes sure that only the allowed HTML element names, attribute - # names and attribute values plus only sane HTML entities will occur in - # $string. You have to remove any slashes from PHP's magic quotes before you - # call this function. - ############################################################################### - { +/** + * Filters content and keeps only allowable HTML elements. + * + * This function makes sure that only the allowed HTML element names, attribute + * names and attribute values plus only sane HTML entities will occur in + * $string. You have to remove any slashes from PHP's magic quotes before you + * call this function. + * + * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news', + * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common + * link protocols, except for 'javascript' which should not be allowed for + * untrusted users. + * + * @since 1.0.0 + * + * @param string $string Content to filter through kses + * @param array $allowed_html List of allowed HTML elements + * @param array $allowed_protocols Optional. Allowed protocol in links. + * @return string Filtered content with only allowed HTML elements + */ +function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) { $string = wp_kses_no_null($string); $string = wp_kses_js_entities($string); $string = wp_kses_normalize_entities($string); $allowed_html_fixed = wp_kses_array_lc($allowed_html); $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols); -} # function wp_kses +} -function wp_kses_hook($string, $allowed_html, $allowed_protocols) -############################################################################### -# You add any kses hooks here. -############################################################################### -{ +/** + * You add any kses hooks here. + * + * There is currently only one kses WordPress hook and it is called here. All + * parameters are passed to the hooks and expected to recieve a string. + * + * @since 1.0.0 + * + * @param string $string Content to filter through kses + * @param array $allowed_html List of allowed HTML elements + * @param array $allowed_protocols Allowed protocol in links + * @return string Filtered content through 'pre_kses' hook + */ +function wp_kses_hook($string, $allowed_html, $allowed_protocols) { $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols); return $string; -} # function wp_kses_hook +} -function wp_kses_version() -############################################################################### -# This function returns kses' version number. -############################################################################### -{ +/** + * This function returns kses' version number. + * + * @since 1.0.0 + * + * @return string KSES Version Number + */ +function wp_kses_version() { return '0.2.2'; -} # function wp_kses_version - -function wp_kses_split($string, $allowed_html, $allowed_protocols) -############################################################################### -# This function searches for HTML tags, no matter how malformed. It also -# matches stray ">" characters. -############################################################################### -{ - return preg_replace('%((|$))|(<[^>]*(>|$)|>))%e', - "wp_kses_split2('\\1', \$allowed_html, ".'$allowed_protocols)', $string); -} # function wp_kses_split - -function wp_kses_split2($string, $allowed_html, $allowed_protocols) -############################################################################### -# This function does a lot of work. It rejects some very malformed things -# like <:::>. It returns an empty string, if the element isn't allowed (look -# ma, no strip_tags()!). Otherwise it splits the tag into an element and an -# attribute list. -############################################################################### -{ +} + +/** + * Searches for HTML tags, no matter how malformed. + * + * It also matches stray ">" characters. + * + * @since 1.0.0 + * + * @param string $string Content to filter + * @param array $allowed_html Allowed HTML elements + * @param array $allowed_protocols Allowed protocols to keep + * @return string Content with fixed HTML tags + */ +function wp_kses_split($string, $allowed_html, $allowed_protocols) { + global $pass_allowed_html, $pass_allowed_protocols; + $pass_allowed_html = $allowed_html; + $pass_allowed_protocols = $allowed_protocols; + return preg_replace_callback('%((|$))|(<[^>]*(>|$)|>))%', + create_function('$match', 'global $pass_allowed_html, $pass_allowed_protocols; return wp_kses_split2($match[1], $pass_allowed_html, $pass_allowed_protocols);'), $string); +} + +/** + * Callback for wp_kses_split for fixing malformed HTML tags. + * + * This function does a lot of work. It rejects some very malformed things like + * <:::>. It returns an empty string, if the element isn't allowed (look ma, no + * strip_tags()!). Otherwise it splits the tag into an element and an attribute + * list. + * + * After the tag is split into an element and an attribute list, it is run + * through another filter which will remove illegal attributes and once that is + * completed, will be returned. + * + * @access private + * @since 1.0.0 + * @uses wp_kses_attr() + * + * @param string $string Content to filter + * @param array $allowed_html Allowed HTML elements + * @param array $allowed_protocols Allowed protocols to keep + * @return string Fixed HTML element + */ +function wp_kses_split2($string, $allowed_html, $allowed_protocols) { $string = wp_kses_stripslashes($string); if (substr($string, 0, 1) != '<') @@ -282,6 +447,10 @@ function wp_kses_split2($string, $allowed_html, $allowed_protocols) $string = $newstring; if ( $string == '' ) return ''; + // prevent multiple dashes in comments + $string = preg_replace('/--+/', '-', $string); + // prevent three dashes closing a comment + $string = preg_replace('/-$/', '', $string); return ""; } # Allow HTML comments @@ -303,18 +472,26 @@ function wp_kses_split2($string, $allowed_html, $allowed_protocols) # No attributes are allowed for closing elements return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols); -} # function wp_kses_split2 - -function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) -############################################################################### -# This function removes all attributes, if none are allowed for this element. -# If some are allowed it calls wp_kses_hair() to split them further, and then it -# builds up new HTML code from the data that kses_hair() returns. It also -# removes "<" and ">" characters, if there are any left. One more thing it -# does is to check if the tag has a closing XHTML slash, and if it does, -# it puts one in the returned code as well. -############################################################################### -{ +} + +/** + * Removes all attributes, if none are allowed for this element. + * + * If some are allowed it calls wp_kses_hair() to split them further, and then + * it builds up new HTML code from the data that kses_hair() returns. It also + * removes "<" and ">" characters, if there are any left. One more thing it does + * is to check if the tag has a closing XHTML slash, and if it does, it puts one + * in the returned code as well. + * + * @since 1.0.0 + * + * @param string $element HTML element/tag + * @param string $attr HTML attributes from HTML element to closing HTML element tag + * @param array $allowed_html Allowed HTML elements + * @param array $allowed_protocols Allowed protocols to keep + * @return string Sanitized HTML element + */ +function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) { # Is there a closing XHTML slash at the end of the attributes? $xhtml_slash = ''; @@ -356,6 +533,19 @@ function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) break; } + if ( $arreach['name'] == 'style' ) { + $orig_value = $arreach['value']; + + $value = safecss_filter_attr($orig_value); + + if ( empty($value) ) + continue; + + $arreach['value'] = $value; + + $arreach['whole'] = str_replace($orig_value, $value, $arreach['whole']); + } + if ($ok) $attr2 .= ' '.$arreach['whole']; # it passed them } # if !is_array($current) @@ -366,21 +556,30 @@ function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) $attr2 = preg_replace('/[<>]/', '', $attr2); return "<$element$attr2$xhtml_slash>"; -} # function wp_kses_attr - -function wp_kses_hair($attr, $allowed_protocols) -############################################################################### -# This function does a lot of work. It parses an attribute list into an array -# with attribute data, and tries to do the right thing even if it gets weird -# input. It will add quotes around attribute values that don't have any quotes -# or apostrophes around them, to make it easier to produce HTML code that will -# conform to W3C's HTML specification. It will also remove bad URL protocols -# from attribute values. -############################################################################### -{ +} + +/** + * Builds an attribute list from string containing attributes. + * + * This function does a lot of work. It parses an attribute list into an array + * with attribute data, and tries to do the right thing even if it gets weird + * input. It will add quotes around attribute values that don't have any quotes + * or apostrophes around them, to make it easier to produce HTML code that will + * conform to W3C's HTML specification. It will also remove bad URL protocols + * from attribute values. It also reduces duplicate attributes by using the + * attribute defined first (foo='bar' foo='baz' will result in foo='bar'). + * + * @since 1.0.0 + * + * @param string $attr Attribute list from HTML element to closing HTML element tag + * @param array $allowed_protocols Allowed protocols to keep + * @return array List of attributes after parsing + */ +function wp_kses_hair($attr, $allowed_protocols) { $attrarr = array (); $mode = 0; $attrname = ''; + $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action'); # Loop through the whole attribute list @@ -412,7 +611,9 @@ function wp_kses_hair($attr, $allowed_protocols) { $working = 1; $mode = 0; - $attrarr[] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); + if(FALSE === array_key_exists($attrname, $attrarr)) { + $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); + } $attr = preg_replace('/^\s+/', '', $attr); } @@ -423,9 +624,13 @@ function wp_kses_hair($attr, $allowed_protocols) if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) # "value" { - $thisval = wp_kses_bad_protocol($match[1], $allowed_protocols); + $thisval = $match[1]; + if ( in_array($attrname, $uris) ) + $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); - $attrarr[] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n'); + if(FALSE === array_key_exists($attrname, $attrarr)) { + $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n'); + } $working = 1; $mode = 0; $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); @@ -435,9 +640,13 @@ function wp_kses_hair($attr, $allowed_protocols) if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) # 'value' { - $thisval = wp_kses_bad_protocol($match[1], $allowed_protocols); + $thisval = $match[1]; + if ( in_array($attrname, $uris) ) + $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); - $attrarr[] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n'); + if(FALSE === array_key_exists($attrname, $attrarr)) { + $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n'); + } $working = 1; $mode = 0; $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); @@ -447,9 +656,13 @@ function wp_kses_hair($attr, $allowed_protocols) if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) # value { - $thisval = wp_kses_bad_protocol($match[1], $allowed_protocols); + $thisval = $match[1]; + if ( in_array($attrname, $uris) ) + $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); - $attrarr[] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n'); + if(FALSE === array_key_exists($attrname, $attrarr)) { + $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n'); + } # We add quotes to conform to W3C's HTML spec. $working = 1; $mode = 0; @@ -460,27 +673,35 @@ function wp_kses_hair($attr, $allowed_protocols) } # switch if ($working == 0) # not well formed, remove and try again - { + { $attr = wp_kses_html_error($attr); $mode = 0; } } # while - if ($mode == 1) + if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr)) # special case, for when the attribute list ends with a valueless # attribute like "selected" - $attrarr[] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); + $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); return $attrarr; -} # function wp_kses_hair - -function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) -############################################################################### -# This function performs different checks for attribute values. The currently -# implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless" -# with even more checks to come soon. -############################################################################### -{ +} + +/** + * Performs different checks for attribute values. + * + * The currently implemented checks are "maxlen", "minlen", "maxval", "minval" + * and "valueless" with even more checks to come soon. + * + * @since 1.0.0 + * + * @param string $value Attribute value + * @param string $vless Whether the value is valueless or not. Use 'y' or 'n' + * @param string $checkname What $checkvalue is checking for. + * @param mixed $checkvalue What constraint the value should pass + * @return bool Whether check passes (true) or not (false) + */ +function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) { $ok = true; switch (strtolower($checkname)) { @@ -536,18 +757,24 @@ function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) } # switch return $ok; -} # function wp_kses_check_attr_val - -function wp_kses_bad_protocol($string, $allowed_protocols) -############################################################################### -# This function removes all non-allowed protocols from the beginning of -# $string. It ignores whitespace and the case of the letters, and it does -# understand HTML entities. It does its work in a while loop, so it won't be -# fooled by a string like "javascript:javascript:alert(57)". -############################################################################### -{ +} + +/** + * Sanitize string from bad protocols. + * + * This function removes all non-allowed protocols from the beginning of + * $string. It ignores whitespace and the case of the letters, and it does + * understand HTML entities. It does its work in a while loop, so it won't be + * fooled by a string like "javascript:javascript:alert(57)". + * + * @since 1.0.0 + * + * @param string $string Content to filter bad protocols from + * @param array $allowed_protocols Allowed protocols to keep + * @return string Filtered content + */ +function wp_kses_bad_protocol($string, $allowed_protocols) { $string = wp_kses_no_null($string); - $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature" $string2 = $string.'a'; while ($string != $string2) { @@ -556,92 +783,146 @@ function wp_kses_bad_protocol($string, $allowed_protocols) } # while return $string; -} # function wp_kses_bad_protocol +} -function wp_kses_no_null($string) -############################################################################### -# This function removes any NULL characters in $string. -############################################################################### -{ +/** + * Removes any NULL characters in $string. + * + * @since 1.0.0 + * + * @param string $string + * @return string + */ +function wp_kses_no_null($string) { $string = preg_replace('/\0+/', '', $string); $string = preg_replace('/(\\\\0)+/', '', $string); return $string; -} # function wp_kses_no_null - -function wp_kses_stripslashes($string) -############################################################################### -# This function changes the character sequence \" to just " -# It leaves all other slashes alone. It's really weird, but the quoting from -# preg_replace(//e) seems to require this. -############################################################################### -{ +} + +/** + * Strips slashes from in front of quotes. + * + * This function changes the character sequence \" to just ". It leaves all + * other slashes alone. It's really weird, but the quoting from + * preg_replace(//e) seems to require this. + * + * @since 1.0.0 + * + * @param string $string String to strip slashes + * @return string Fixed strings with quoted slashes + */ +function wp_kses_stripslashes($string) { return preg_replace('%\\\\"%', '"', $string); -} # function wp_kses_stripslashes +} -function wp_kses_array_lc($inarray) -############################################################################### -# This function goes through an array, and changes the keys to all lower case. -############################################################################### -{ +/** + * Goes through an array and changes the keys to all lower case. + * + * @since 1.0.0 + * + * @param array $inarray Unfiltered array + * @return array Fixed array with all lowercase keys + */ +function wp_kses_array_lc($inarray) { $outarray = array (); - foreach ($inarray as $inkey => $inval) { + foreach ( (array) $inarray as $inkey => $inval) { $outkey = strtolower($inkey); $outarray[$outkey] = array (); - foreach ($inval as $inkey2 => $inval2) { + foreach ( (array) $inval as $inkey2 => $inval2) { $outkey2 = strtolower($inkey2); $outarray[$outkey][$outkey2] = $inval2; } # foreach $inval } # foreach $inarray return $outarray; -} # function wp_kses_array_lc - -function wp_kses_js_entities($string) -############################################################################### -# This function removes the HTML JavaScript entities found in early versions of -# Netscape 4. -############################################################################### -{ +} + +/** + * Removes the HTML JavaScript entities found in early versions of Netscape 4. + * + * @since 1.0.0 + * + * @param string $string + * @return string + */ +function wp_kses_js_entities($string) { return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); -} # function wp_kses_js_entities - -function wp_kses_html_error($string) -############################################################################### -# This function deals with parsing errors in wp_kses_hair(). The general plan is -# to remove everything to and including some whitespace, but it deals with -# quotes and apostrophes as well. -############################################################################### -{ +} + +/** + * Handles parsing errors in wp_kses_hair(). + * + * The general plan is to remove everything to and including some whitespace, + * but it deals with quotes and apostrophes as well. + * + * @since 1.0.0 + * + * @param string $string + * @return string + */ +function wp_kses_html_error($string) { return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string); -} # function wp_kses_html_error - -function wp_kses_bad_protocol_once($string, $allowed_protocols) -############################################################################### -# This function searches for URL protocols at the beginning of $string, while -# handling whitespace and HTML entities. -############################################################################### -{ - return preg_replace('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|:|&#[Xx]3[Aa];)\s*/e', 'wp_kses_bad_protocol_once2("\\1", $allowed_protocols)', $string); -} # function wp_kses_bad_protocol_once - -function wp_kses_bad_protocol_once2($string, $allowed_protocols) -############################################################################### -# This function processes URL protocols, checks to see if they're in the white- -# list or not, and returns different data depending on the answer. -############################################################################### -{ +} + +/** + * Sanitizes content from bad protocols and other characters. + * + * This function searches for URL protocols at the beginning of $string, while + * handling whitespace and HTML entities. + * + * @since 1.0.0 + * + * @param string $string Content to check for bad protocols + * @param string $allowed_protocols Allowed protocols + * @return string Sanitized content + */ +function wp_kses_bad_protocol_once($string, $allowed_protocols) { + global $_kses_allowed_protocols; + $_kses_allowed_protocols = $allowed_protocols; + + $string2 = preg_split('/:|:|:/i', $string, 2); + if ( isset($string2[1]) && !preg_match('%/\?%', $string2[0]) ) + $string = wp_kses_bad_protocol_once2($string2[0]) . trim($string2[1]); + else + $string = preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|:|&#[Xx]3[Aa];)\s*/', 'wp_kses_bad_protocol_once2', $string); + + return $string; +} + +/** + * Callback for wp_kses_bad_protocol_once() regular expression. + * + * This function processes URL protocols, checks to see if they're in the + * white-list or not, and returns different data depending on the answer. + * + * @access private + * @since 1.0.0 + * + * @param mixed $matches string or preg_replace_callback() matches array to check for bad protocols + * @return string Sanitized content + */ +function wp_kses_bad_protocol_once2($matches) { + global $_kses_allowed_protocols; + + if ( is_array($matches) ) { + if ( ! isset($matches[1]) || empty($matches[1]) ) + return ''; + + $string = $matches[1]; + } else { + $string = $matches; + } + $string2 = wp_kses_decode_entities($string); $string2 = preg_replace('/\s/', '', $string2); $string2 = wp_kses_no_null($string2); - $string2 = preg_replace('/\xad+/', '', $string2); - # deals with Opera "feature" $string2 = strtolower($string2); $allowed = false; - foreach ($allowed_protocols as $one_protocol) + foreach ( (array) $_kses_allowed_protocols as $one_protocol) if (strtolower($one_protocol) == $string2) { $allowed = true; break; @@ -651,14 +932,20 @@ function wp_kses_bad_protocol_once2($string, $allowed_protocols) return "$string2:"; else return ''; -} # function wp_kses_bad_protocol_once2 - -function wp_kses_normalize_entities($string) -############################################################################### -# This function normalizes HTML entities. It will convert "AT&T" to the correct -# "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. -############################################################################### -{ +} + +/** + * Converts and fixes HTML entities. + * + * This function normalizes HTML entities. It will convert "AT&T" to the correct + * "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. + * + * @since 1.0.0 + * + * @param string $string Content to normalize entities + * @return string Content with normalized entities + */ +function wp_kses_normalize_entities($string) { # Disarm all entities by converting & to & $string = str_replace('&', '&', $string); @@ -666,48 +953,189 @@ function wp_kses_normalize_entities($string) # Change back the allowed entities in our entity whitelist $string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string); - $string = preg_replace('/&#0*([0-9]{1,5});/e', 'wp_kses_normalize_entities2("\\1")', $string); - $string = preg_replace('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', '&#\\1\\2;', $string); + $string = preg_replace_callback('/&#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string); + $string = preg_replace_callback('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string); return $string; -} # function wp_kses_normalize_entities - -function wp_kses_normalize_entities2($i) -############################################################################### -# This function helps wp_kses_normalize_entities() to only accept 16 bit values -# and nothing more for &#number; entities. -############################################################################### -{ - return (($i > 65535) ? "&#$i;" : "&#$i;"); -} # function wp_kses_normalize_entities2 - -function wp_kses_decode_entities($string) -############################################################################### -# This function decodes numeric HTML entities (A and A). It doesn't -# do anything with other entities like ä, but we don't need them in the -# URL protocol whitelisting system anyway. -############################################################################### -{ - $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string); - $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', $string); +} + +/** + * Callback for wp_kses_normalize_entities() regular expression. + * + * This function helps wp_kses_normalize_entities() to only accept 16 bit values + * and nothing more for &#number; entities. + * + * @access private + * @since 1.0.0 + * + * @param array $matches preg_replace_callback() matches array + * @return string Correctly encoded entity + */ +function wp_kses_normalize_entities2($matches) { + if ( ! isset($matches[1]) || empty($matches[1]) ) + return ''; + + $i = $matches[1]; + return ( ( ! valid_unicode($i) ) || ($i > 65535) ? "&#$i;" : "&#$i;" ); +} + +/** + * Callback for wp_kses_normalize_entities() for regular expression. + * + * This function helps wp_kses_normalize_entities() to only accept valid Unicode + * numeric entities in hex form. + * + * @access private + * + * @param array $matches preg_replace_callback() matches array + * @return string Correctly encoded entity + */ +function wp_kses_normalize_entities3($matches) { + if ( ! isset($matches[2]) || empty($matches[2]) ) + return ''; + + $hexchars = $matches[2]; + return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&#x$hexchars;" : "&#x$hexchars;" ); +} + +/** + * Helper function to determine if a Unicode value is valid. + * + * @param int $i Unicode value + * @return bool true if the value was a valid Unicode number + */ +function valid_unicode($i) { + return ( $i == 0x9 || $i == 0xa || $i == 0xd || + ($i >= 0x20 && $i <= 0xd7ff) || + ($i >= 0xe000 && $i <= 0xfffd) || + ($i >= 0x10000 && $i <= 0x10ffff) ); +} + +/** + * Convert all entities to their character counterparts. + * + * This function decodes numeric HTML entities (A and A). It doesn't do + * anything with other entities like ä, but we don't need them in the URL + * protocol whitelisting system anyway. + * + * @since 1.0.0 + * + * @param string $string Content to change entities + * @return string Content after decoded entities + */ +function wp_kses_decode_entities($string) { + $string = preg_replace_callback('/&#([0-9]+);/', '_wp_kses_decode_entities_chr', $string); + $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', '_wp_kses_decode_entities_chr_hexdec', $string); return $string; -} # function wp_kses_decode_entities +} + +/** + * Regex callback for wp_kses_decode_entities() + * + * @param array $match preg match + * @return string + */ +function _wp_kses_decode_entities_chr( $match ) { + return chr( $match[1] ); +} + +/** + * Regex callback for wp_kses_decode_entities() + * + * @param array $match preg match + * @return string + */ +function _wp_kses_decode_entities_chr_hexdec( $match ) { + return chr( hexdec( $match[1] ) ); +} +/** + * Sanitize content with allowed HTML Kses rules. + * + * @since 1.0.0 + * @uses $allowedtags + * + * @param string $data Content to filter, expected to be escaped with slashes + * @return string Filtered content + */ function wp_filter_kses($data) { global $allowedtags; return addslashes( wp_kses(stripslashes( $data ), $allowedtags) ); } +/** + * Sanitize content with allowed HTML Kses rules. + * + * @since 2.9.0 + * @uses $allowedtags + * + * @param string $data Content to filter, expected to not be escaped + * @return string Filtered content + */ +function wp_kses_data($data) { + global $allowedtags; + return wp_kses( $data , $allowedtags ); +} + +/** + * Sanitize content for allowed HTML tags for post content. + * + * Post content refers to the page contents of the 'post' type and not $_POST + * data from forms. + * + * @since 2.0.0 + * @uses $allowedposttags + * + * @param string $data Post content to filter, expected to be escaped with slashes + * @return string Filtered post content with allowed HTML tags and attributes intact. + */ function wp_filter_post_kses($data) { global $allowedposttags; return addslashes ( wp_kses(stripslashes( $data ), $allowedposttags) ); } +/** + * Sanitize content for allowed HTML tags for post content. + * + * Post content refers to the page contents of the 'post' type and not $_POST + * data from forms. + * + * @since 2.9.0 + * @uses $allowedposttags + * + * @param string $data Post content to filter + * @return string Filtered post content with allowed HTML tags and attributes intact. + */ +function wp_kses_post($data) { + global $allowedposttags; + return wp_kses( $data , $allowedposttags ); +} + +/** + * Strips all of the HTML in the content. + * + * @since 2.1.0 + * + * @param string $data Content to strip all HTML from + * @return string Filtered content without any HTML + */ function wp_filter_nohtml_kses($data) { return addslashes ( wp_kses(stripslashes( $data ), array()) ); } +/** + * Adds all Kses input form content filters. + * + * All hooks have default priority. The wp_filter_kses() function is added to + * the 'pre_comment_content' and 'title_save_pre' hooks. + * + * The wp_filter_post_kses() function is added to the 'content_save_pre', + * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks. + * + * @since 2.0.0 + * @uses add_filter() See description for what functions are added to what hooks. + */ function kses_init_filters() { // Normal filtering. add_filter('pre_comment_content', 'wp_filter_kses'); @@ -719,6 +1147,18 @@ function kses_init_filters() { add_filter('content_filtered_save_pre', 'wp_filter_post_kses'); } +/** + * Removes all Kses input form content filters. + * + * A quick procedural method to removing all of the filters that kses uses for + * content in WordPress Loop. + * + * Does not remove the kses_init() function from 'init' hook (priority is + * default). Also does not remove kses_init() function from 'set_current_user' + * hook (priority is also default). + * + * @since 2.0.6 + */ function kses_remove_filters() { // Normal filtering. remove_filter('pre_comment_content', 'wp_filter_kses'); @@ -730,6 +1170,22 @@ function kses_remove_filters() { remove_filter('content_filtered_save_pre', 'wp_filter_post_kses'); } +/** + * Sets up most of the Kses filters for input form content. + * + * If you remove the kses_init() function from 'init' hook and + * 'set_current_user' (priority is default), then none of the Kses filter hooks + * will be added. + * + * First removes all of the Kses filters in case the current user does not need + * to have Kses filter the content. If the user does not have unfiltered html + * capability, then Kses filters are added. + * + * @uses kses_remove_filters() Removes the Kses filters + * @uses kses_init_filters() Adds the Kses filters back if the user + * does not have unfiltered HTML capability. + * @since 2.0.0 + */ function kses_init() { kses_remove_filters(); @@ -739,4 +1195,49 @@ function kses_init() { add_action('init', 'kses_init'); add_action('set_current_user', 'kses_init'); -?> + +function safecss_filter_attr( $css, $deprecated = '' ) { + $css = wp_kses_no_null($css); + $css = str_replace(array("\n","\r","\t"), '', $css); + + if ( preg_match( '%[\\(&]|/\*%', $css ) ) // remove any inline css containing \ ( & or comments + return ''; + + $css_array = split( ';', trim( $css ) ); + $allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float', + 'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color', + 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left', + 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color', + 'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top', + 'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side', + 'clear', 'cursor', 'direction', 'font', 'font-family', 'font-size', 'font-style', + 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin-bottom', + 'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom', + 'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align', + 'width' ) ); + + if ( empty($allowed_attr) ) + return $css; + + $css = ''; + foreach ( $css_array as $css_item ) { + if ( $css_item == '' ) + continue; + $css_item = trim( $css_item ); + $found = false; + if ( strpos( $css_item, ':' ) === false ) { + $found = true; + } else { + $parts = split( ':', $css_item ); + if ( in_array( trim( $parts[0] ), $allowed_attr ) ) + $found = true; + } + if ( $found ) { + if( $css != '' ) + $css .= ';'; + $css .= $css_item; + } + } + + return $css; +}