3 * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes
4 * Copyright (C) 2002, 2003, 2005 Ulf Harnhammar
6 * This program is free software and open source software; you can redistribute
7 * it and/or modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the License,
9 * or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit
19 * http://www.gnu.org/licenses/gpl.html
21 * [kses strips evil scripts!]
23 * Added wp_ prefix to avoid conflicts with existing kses users
26 * @copyright (C) 2002, 2003, 2005
27 * @author Ulf Harnhammar <http://advogato.org/person/metaur/>
34 * You can override this in a plugin.
38 if ( ! defined( 'CUSTOM_TAGS' ) )
39 define( 'CUSTOM_TAGS', false );
41 if ( ! CUSTOM_TAGS ) {
43 * Kses global for default allowable HTML tags.
45 * Can be override by using CUSTOM_TAGS constant.
47 * @global array $allowedposttags
50 $allowedposttags = array(
72 'xml:lang' => array(),
80 'xml:lang' => array(),
84 'blockquote' => array(
89 'xml:lang' => array()),
93 'disabled' => array (),
104 'title' => array ()),
110 'charoff' => array (),
114 'valign' => array (),
115 'width' => array ()),
117 'datetime' => array ()),
126 'xml:lang' => array(),
134 'xml:lang' => array()),
138 'fieldset' => array(),
145 'xml:lang' => array(),
147 'figcaption' => array(
153 'xml:lang' => array(),
165 'xml:lang' => array(),
168 'action' => array (),
169 'accept' => array (),
170 'accept-charset' => array (),
171 'enctype' => array (),
172 'method' => array (),
174 'target' => array ()),
179 'style' => array ()),
184 'style' => array ()),
189 'style' => array ()),
194 'style' => array ()),
199 'style' => array ()),
204 'style' => array ()),
211 'xml:lang' => array(),
219 'xml:lang' => array(),
224 'noshade' => array (),
226 'width' => array ()),
231 'border' => array (),
233 'height' => array (),
234 'hspace' => array (),
235 'longdesc' => array (),
236 'vspace' => array (),
239 'width' => array ()),
241 'datetime' => array (),
247 'align' => array ()),
250 'class' => array ()),
261 'xml:lang' => array(),
269 'xml:lang' => array()),
272 'width' => array ()),
283 'xml:lang' => array()),
290 'xml:lang' => array(),
301 'xml:lang' => array(),
306 'bgcolor' => array (),
307 'border' => array (),
308 'cellpadding' => array (),
309 'cellspacing' => array (),
315 'summary' => array (),
316 'width' => array ()),
320 'charoff' => array (),
321 'valign' => array ()),
326 'bgcolor' => array (),
328 'charoff' => array (),
330 'colspan' => array (),
332 'headers' => array (),
333 'height' => array (),
334 'nowrap' => array (),
335 'rowspan' => array (),
338 'valign' => array (),
339 'width' => array ()),
343 'disabled' => array (),
345 'readonly' => array ()),
350 'charoff' => array (),
351 'valign' => array ()),
356 'bgcolor' => array (),
358 'charoff' => array (),
360 'colspan' => array (),
361 'headers' => array (),
362 'height' => array (),
363 'nowrap' => array (),
364 'rowspan' => array (),
366 'valign' => array (),
367 'width' => array ()),
371 'charoff' => array (),
373 'valign' => array ()),
377 'bgcolor' => array (),
379 'charoff' => array (),
382 'valign' => array ()),
397 * Kses allowed HTML elements.
399 * @global array $allowedtags
402 $allowedtags = array(
405 'title' => array ()),
407 'title' => array ()),
409 'title' => array ()),
411 'blockquote' => array(
417 'datetime' => array ()),
421 'em' => array (), 'i' => array (),
422 // 'ins' => array('datetime' => array(), 'cite' => array()),
436 $allowedentitynames = array(
437 'nbsp', 'iexcl', 'cent', 'pound', 'curren', 'yen',
438 'brvbar', 'sect', 'uml', 'copy', 'ordf', 'laquo',
439 'not', 'shy', 'reg', 'macr', 'deg', 'plusmn',
440 'acute', 'micro', 'para', 'middot', 'cedil', 'ordm',
441 'raquo', 'iquest', 'Agrave', 'Aacute', 'Acirc', 'Atilde',
442 'Auml', 'Aring', 'AElig', 'Ccedil', 'Egrave', 'Eacute',
443 'Ecirc', 'Euml', 'Igrave', 'Iacute', 'Icirc', 'Iuml',
444 'ETH', 'Ntilde', 'Ograve', 'Oacute', 'Ocirc', 'Otilde',
445 'Ouml', 'times', 'Oslash', 'Ugrave', 'Uacute', 'Ucirc',
446 'Uuml', 'Yacute', 'THORN', 'szlig', 'agrave', 'aacute',
447 'acirc', 'atilde', 'auml', 'aring', 'aelig', 'ccedil',
448 'egrave', 'eacute', 'ecirc', 'euml', 'igrave', 'iacute',
449 'icirc', 'iuml', 'eth', 'ntilde', 'ograve', 'oacute',
450 'ocirc', 'otilde', 'ouml', 'divide', 'oslash', 'ugrave',
451 'uacute', 'ucirc', 'uuml', 'yacute', 'thorn', 'yuml',
452 'quot', 'amp', 'lt', 'gt', 'apos', 'OElig',
453 'oelig', 'Scaron', 'scaron', 'Yuml', 'circ', 'tilde',
454 'ensp', 'emsp', 'thinsp', 'zwnj', 'zwj', 'lrm',
455 'rlm', 'ndash', 'mdash', 'lsquo', 'rsquo', 'sbquo',
456 'ldquo', 'rdquo', 'bdquo', 'dagger', 'Dagger', 'permil',
457 'lsaquo', 'rsaquo', 'euro', 'fnof', 'Alpha', 'Beta',
458 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta',
459 'Iota', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Xi',
460 'Omicron', 'Pi', 'Rho', 'Sigma', 'Tau', 'Upsilon',
461 'Phi', 'Chi', 'Psi', 'Omega', 'alpha', 'beta',
462 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta',
463 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi',
464 'omicron', 'pi', 'rho', 'sigmaf', 'sigma', 'tau',
465 'upsilon', 'phi', 'chi', 'psi', 'omega', 'thetasym',
466 'upsih', 'piv', 'bull', 'hellip', 'prime', 'Prime',
467 'oline', 'frasl', 'weierp', 'image', 'real', 'trade',
468 'alefsym', 'larr', 'uarr', 'rarr', 'darr', 'harr',
469 'crarr', 'lArr', 'uArr', 'rArr', 'dArr', 'hArr',
470 'forall', 'part', 'exist', 'empty', 'nabla', 'isin',
471 'notin', 'ni', 'prod', 'sum', 'minus', 'lowast',
472 'radic', 'prop', 'infin', 'ang', 'and', 'or',
473 'cap', 'cup', 'int', 'sim', 'cong', 'asymp',
474 'ne', 'equiv', 'le', 'ge', 'sub', 'sup',
475 'nsub', 'sube', 'supe', 'oplus', 'otimes', 'perp',
476 'sdot', 'lceil', 'rceil', 'lfloor', 'rfloor', 'lang',
477 'rang', 'loz', 'spades', 'clubs', 'hearts', 'diams',
482 * Filters content and keeps only allowable HTML elements.
484 * This function makes sure that only the allowed HTML element names, attribute
485 * names and attribute values plus only sane HTML entities will occur in
486 * $string. You have to remove any slashes from PHP's magic quotes before you
487 * call this function.
489 * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
490 * 'irc', 'gopher', 'nntp', 'feed', 'telnet, 'mms', 'rtsp' and 'svn'. This
491 * covers all common link protocols, except for 'javascript' which should not
492 * be allowed for untrusted users.
496 * @param string $string Content to filter through kses
497 * @param array $allowed_html List of allowed HTML elements
498 * @param array $allowed_protocols Optional. Allowed protocol in links.
499 * @return string Filtered content with only allowed HTML elements
501 function wp_kses($string, $allowed_html, $allowed_protocols = array ()) {
502 $allowed_protocols = wp_parse_args( $allowed_protocols, apply_filters('kses_allowed_protocols', array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet', 'mms', 'rtsp', 'svn') ));
503 $string = wp_kses_no_null($string);
504 $string = wp_kses_js_entities($string);
505 $string = wp_kses_normalize_entities($string);
506 $allowed_html_fixed = wp_kses_array_lc($allowed_html);
507 $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook
508 return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols);
512 * You add any kses hooks here.
514 * There is currently only one kses WordPress hook and it is called here. All
515 * parameters are passed to the hooks and expected to recieve a string.
519 * @param string $string Content to filter through kses
520 * @param array $allowed_html List of allowed HTML elements
521 * @param array $allowed_protocols Allowed protocol in links
522 * @return string Filtered content through 'pre_kses' hook
524 function wp_kses_hook($string, $allowed_html, $allowed_protocols) {
525 $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols);
530 * This function returns kses' version number.
534 * @return string KSES Version Number
536 function wp_kses_version() {
541 * Searches for HTML tags, no matter how malformed.
543 * It also matches stray ">" characters.
547 * @param string $string Content to filter
548 * @param array $allowed_html Allowed HTML elements
549 * @param array $allowed_protocols Allowed protocols to keep
550 * @return string Content with fixed HTML tags
552 function wp_kses_split($string, $allowed_html, $allowed_protocols) {
553 global $pass_allowed_html, $pass_allowed_protocols;
554 $pass_allowed_html = $allowed_html;
555 $pass_allowed_protocols = $allowed_protocols;
556 return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%',
557 create_function('$match', 'global $pass_allowed_html, $pass_allowed_protocols; return wp_kses_split2($match[1], $pass_allowed_html, $pass_allowed_protocols);'), $string);
561 * Callback for wp_kses_split for fixing malformed HTML tags.
563 * This function does a lot of work. It rejects some very malformed things like
564 * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
565 * strip_tags()!). Otherwise it splits the tag into an element and an attribute
568 * After the tag is split into an element and an attribute list, it is run
569 * through another filter which will remove illegal attributes and once that is
570 * completed, will be returned.
574 * @uses wp_kses_attr()
576 * @param string $string Content to filter
577 * @param array $allowed_html Allowed HTML elements
578 * @param array $allowed_protocols Allowed protocols to keep
579 * @return string Fixed HTML element
581 function wp_kses_split2($string, $allowed_html, $allowed_protocols) {
582 $string = wp_kses_stripslashes($string);
584 if (substr($string, 0, 1) != '<')
586 # It matched a ">" character
588 if (preg_match('%^<!--(.*?)(-->)?$%', $string, $matches)) {
589 $string = str_replace(array('<!--', '-->'), '', $matches[1]);
590 while ( $string != $newstring = wp_kses($string, $allowed_html, $allowed_protocols) )
591 $string = $newstring;
594 // prevent multiple dashes in comments
595 $string = preg_replace('/--+/', '-', $string);
596 // prevent three dashes closing a comment
597 $string = preg_replace('/-$/', '', $string);
598 return "<!--{$string}-->";
600 # Allow HTML comments
602 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
604 # It's seriously malformed
606 $slash = trim($matches[1]);
608 $attrlist = $matches[3];
610 if (!@isset($allowed_html[strtolower($elem)]))
612 # They are using a not allowed HTML element
615 return "<$slash$elem>";
616 # No attributes are allowed for closing elements
618 return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols);
622 * Removes all attributes, if none are allowed for this element.
624 * If some are allowed it calls wp_kses_hair() to split them further, and then
625 * it builds up new HTML code from the data that kses_hair() returns. It also
626 * removes "<" and ">" characters, if there are any left. One more thing it does
627 * is to check if the tag has a closing XHTML slash, and if it does, it puts one
628 * in the returned code as well.
632 * @param string $element HTML element/tag
633 * @param string $attr HTML attributes from HTML element to closing HTML element tag
634 * @param array $allowed_html Allowed HTML elements
635 * @param array $allowed_protocols Allowed protocols to keep
636 * @return string Sanitized HTML element
638 function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) {
639 # Is there a closing XHTML slash at the end of the attributes?
642 if (preg_match('%\s*/\s*$%', $attr))
645 # Are any attributes allowed at all for this element?
647 if (@ count($allowed_html[strtolower($element)]) == 0)
648 return "<$element$xhtml_slash>";
652 $attrarr = wp_kses_hair($attr, $allowed_protocols);
654 # Go through $attrarr, and save the allowed attributes for this element
659 foreach ($attrarr as $arreach) {
660 if (!@ isset ($allowed_html[strtolower($element)][strtolower($arreach['name'])]))
661 continue; # the attribute is not allowed
663 $current = $allowed_html[strtolower($element)][strtolower($arreach['name'])];
665 continue; # the attribute is not allowed
667 if (!is_array($current))
668 $attr2 .= ' '.$arreach['whole'];
669 # there are no checks
672 # there are some checks
674 foreach ($current as $currkey => $currval)
675 if (!wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) {
680 if ( strtolower($arreach['name']) == 'style' ) {
681 $orig_value = $arreach['value'];
683 $value = safecss_filter_attr($orig_value);
688 $arreach['value'] = $value;
690 $arreach['whole'] = str_replace($orig_value, $value, $arreach['whole']);
694 $attr2 .= ' '.$arreach['whole']; # it passed them
695 } # if !is_array($current)
698 # Remove any "<" or ">" characters
700 $attr2 = preg_replace('/[<>]/', '', $attr2);
702 return "<$element$attr2$xhtml_slash>";
706 * Builds an attribute list from string containing attributes.
708 * This function does a lot of work. It parses an attribute list into an array
709 * with attribute data, and tries to do the right thing even if it gets weird
710 * input. It will add quotes around attribute values that don't have any quotes
711 * or apostrophes around them, to make it easier to produce HTML code that will
712 * conform to W3C's HTML specification. It will also remove bad URL protocols
713 * from attribute values. It also reduces duplicate attributes by using the
714 * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
718 * @param string $attr Attribute list from HTML element to closing HTML element tag
719 * @param array $allowed_protocols Allowed protocols to keep
720 * @return array List of attributes after parsing
722 function wp_kses_hair($attr, $allowed_protocols) {
726 $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
728 # Loop through the whole attribute list
730 while (strlen($attr) != 0) {
731 $working = 0; # Was the last operation successful?
734 case 0 : # attribute name, href for instance
736 if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
737 $attrname = $match[1];
738 $working = $mode = 1;
739 $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
744 case 1 : # equals sign or valueless ("selected")
746 if (preg_match('/^\s*=\s*/', $attr)) # equals sign
750 $attr = preg_replace('/^\s*=\s*/', '', $attr);
754 if (preg_match('/^\s+/', $attr)) # valueless
758 if(FALSE === array_key_exists($attrname, $attrarr)) {
759 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
761 $attr = preg_replace('/^\s+/', '', $attr);
766 case 2 : # attribute value, a URL after href= for instance
768 if (preg_match('%^"([^"]*)"(\s+|/?$)%', $attr, $match))
771 $thisval = $match[1];
772 if ( in_array(strtolower($attrname), $uris) )
773 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
775 if(FALSE === array_key_exists($attrname, $attrarr)) {
776 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
780 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
784 if (preg_match("%^'([^']*)'(\s+|/?$)%", $attr, $match))
787 $thisval = $match[1];
788 if ( in_array(strtolower($attrname), $uris) )
789 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
791 if(FALSE === array_key_exists($attrname, $attrarr)) {
792 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
796 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
800 if (preg_match("%^([^\s\"']+)(\s+|/?$)%", $attr, $match))
803 $thisval = $match[1];
804 if ( in_array(strtolower($attrname), $uris) )
805 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
807 if(FALSE === array_key_exists($attrname, $attrarr)) {
808 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
810 # We add quotes to conform to W3C's HTML spec.
813 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
819 if ($working == 0) # not well formed, remove and try again
821 $attr = wp_kses_html_error($attr);
826 if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr))
827 # special case, for when the attribute list ends with a valueless
828 # attribute like "selected"
829 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
835 * Performs different checks for attribute values.
837 * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
838 * and "valueless" with even more checks to come soon.
842 * @param string $value Attribute value
843 * @param string $vless Whether the value is valueless. Use 'y' or 'n'
844 * @param string $checkname What $checkvalue is checking for.
845 * @param mixed $checkvalue What constraint the value should pass
846 * @return bool Whether check passes
848 function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) {
851 switch (strtolower($checkname)) {
853 # The maxlen check makes sure that the attribute value has a length not
854 # greater than the given value. This can be used to avoid Buffer Overflows
855 # in WWW clients and various Internet servers.
857 if (strlen($value) > $checkvalue)
862 # The minlen check makes sure that the attribute value has a length not
863 # smaller than the given value.
865 if (strlen($value) < $checkvalue)
870 # The maxval check does two things: it checks that the attribute value is
871 # an integer from 0 and up, without an excessive amount of zeroes or
872 # whitespace (to avoid Buffer Overflows). It also checks that the attribute
873 # value is not greater than the given value.
874 # This check can be used to avoid Denial of Service attacks.
876 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
878 if ($value > $checkvalue)
883 # The minval check checks that the attribute value is a positive integer,
884 # and that it is not smaller than the given value.
886 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
888 if ($value < $checkvalue)
893 # The valueless check checks if the attribute has a value
894 # (like <a href="blah">) or not (<option selected>). If the given value
895 # is a "y" or a "Y", the attribute must not have a value.
896 # If the given value is an "n" or an "N", the attribute must have one.
898 if (strtolower($checkvalue) != $vless)
907 * Sanitize string from bad protocols.
909 * This function removes all non-allowed protocols from the beginning of
910 * $string. It ignores whitespace and the case of the letters, and it does
911 * understand HTML entities. It does its work in a while loop, so it won't be
912 * fooled by a string like "javascript:javascript:alert(57)".
916 * @param string $string Content to filter bad protocols from
917 * @param array $allowed_protocols Allowed protocols to keep
918 * @return string Filtered content
920 function wp_kses_bad_protocol($string, $allowed_protocols) {
921 $string = wp_kses_no_null($string);
922 $string2 = $string.'a';
924 while ($string != $string2) {
926 $string = wp_kses_bad_protocol_once($string, $allowed_protocols);
933 * Removes any NULL characters in $string.
937 * @param string $string
940 function wp_kses_no_null($string) {
941 $string = preg_replace('/\0+/', '', $string);
942 $string = preg_replace('/(\\\\0)+/', '', $string);
948 * Strips slashes from in front of quotes.
950 * This function changes the character sequence \" to just ". It leaves all
951 * other slashes alone. It's really weird, but the quoting from
952 * preg_replace(//e) seems to require this.
956 * @param string $string String to strip slashes
957 * @return string Fixed strings with quoted slashes
959 function wp_kses_stripslashes($string) {
960 return preg_replace('%\\\\"%', '"', $string);
964 * Goes through an array and changes the keys to all lower case.
968 * @param array $inarray Unfiltered array
969 * @return array Fixed array with all lowercase keys
971 function wp_kses_array_lc($inarray) {
972 $outarray = array ();
974 foreach ( (array) $inarray as $inkey => $inval) {
975 $outkey = strtolower($inkey);
976 $outarray[$outkey] = array ();
978 foreach ( (array) $inval as $inkey2 => $inval2) {
979 $outkey2 = strtolower($inkey2);
980 $outarray[$outkey][$outkey2] = $inval2;
988 * Removes the HTML JavaScript entities found in early versions of Netscape 4.
992 * @param string $string
995 function wp_kses_js_entities($string) {
996 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
1000 * Handles parsing errors in wp_kses_hair().
1002 * The general plan is to remove everything to and including some whitespace,
1003 * but it deals with quotes and apostrophes as well.
1007 * @param string $string
1010 function wp_kses_html_error($string) {
1011 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
1015 * Sanitizes content from bad protocols and other characters.
1017 * This function searches for URL protocols at the beginning of $string, while
1018 * handling whitespace and HTML entities.
1022 * @param string $string Content to check for bad protocols
1023 * @param string $allowed_protocols Allowed protocols
1024 * @return string Sanitized content
1026 function wp_kses_bad_protocol_once($string, $allowed_protocols) {
1027 $string2 = preg_split( '/:|�*58;|�*3a;/i', $string, 2 );
1028 if ( isset($string2[1]) && ! preg_match('%/\?%', $string2[0]) )
1029 $string = wp_kses_bad_protocol_once2( $string2[0], $allowed_protocols ) . trim( $string2[1] );
1035 * Callback for wp_kses_bad_protocol_once() regular expression.
1037 * This function processes URL protocols, checks to see if they're in the
1038 * white-list or not, and returns different data depending on the answer.
1043 * @param string $string URI scheme to check against the whitelist
1044 * @param string $allowed_protocols Allowed protocols
1045 * @return string Sanitized content
1047 function wp_kses_bad_protocol_once2( $string, $allowed_protocols ) {
1048 $string2 = wp_kses_decode_entities($string);
1049 $string2 = preg_replace('/\s/', '', $string2);
1050 $string2 = wp_kses_no_null($string2);
1051 $string2 = strtolower($string2);
1054 foreach ( (array) $allowed_protocols as $one_protocol )
1055 if ( strtolower($one_protocol) == $string2 ) {
1067 * Converts and fixes HTML entities.
1069 * This function normalizes HTML entities. It will convert "AT&T" to the correct
1070 * "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on.
1074 * @param string $string Content to normalize entities
1075 * @return string Content with normalized entities
1077 function wp_kses_normalize_entities($string) {
1078 # Disarm all entities by converting & to &
1080 $string = str_replace('&', '&', $string);
1082 # Change back the allowed entities in our entity whitelist
1084 $string = preg_replace_callback('/&([A-Za-z]{2,8});/', 'wp_kses_named_entities', $string);
1085 $string = preg_replace_callback('/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string);
1086 $string = preg_replace_callback('/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string);
1092 * Callback for wp_kses_normalize_entities() regular expression.
1094 * This function only accepts valid named entity references, which are finite,
1095 * case-sensitive, and highly scrutinized by HTML and XML validators.
1099 * @param array $matches preg_replace_callback() matches array
1100 * @return string Correctly encoded entity
1102 function wp_kses_named_entities($matches) {
1103 global $allowedentitynames;
1105 if ( empty($matches[1]) )
1109 return ( ( ! in_array($i, $allowedentitynames) ) ? "&$i;" : "&$i;" );
1113 * Callback for wp_kses_normalize_entities() regular expression.
1115 * This function helps wp_kses_normalize_entities() to only accept 16 bit values
1116 * and nothing more for &#number; entities.
1121 * @param array $matches preg_replace_callback() matches array
1122 * @return string Correctly encoded entity
1124 function wp_kses_normalize_entities2($matches) {
1125 if ( empty($matches[1]) )
1129 if (valid_unicode($i)) {
1130 $i = str_pad(ltrim($i,'0'), 3, '0', STR_PAD_LEFT);
1140 * Callback for wp_kses_normalize_entities() for regular expression.
1142 * This function helps wp_kses_normalize_entities() to only accept valid Unicode
1143 * numeric entities in hex form.
1147 * @param array $matches preg_replace_callback() matches array
1148 * @return string Correctly encoded entity
1150 function wp_kses_normalize_entities3($matches) {
1151 if ( empty($matches[1]) )
1154 $hexchars = $matches[1];
1155 return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&#x$hexchars;" : '&#x'.ltrim($hexchars,'0').';' );
1159 * Helper function to determine if a Unicode value is valid.
1161 * @param int $i Unicode value
1162 * @return bool true if the value was a valid Unicode number
1164 function valid_unicode($i) {
1165 return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
1166 ($i >= 0x20 && $i <= 0xd7ff) ||
1167 ($i >= 0xe000 && $i <= 0xfffd) ||
1168 ($i >= 0x10000 && $i <= 0x10ffff) );
1172 * Convert all entities to their character counterparts.
1174 * This function decodes numeric HTML entities (A and A). It doesn't do
1175 * anything with other entities like ä, but we don't need them in the URL
1176 * protocol whitelisting system anyway.
1180 * @param string $string Content to change entities
1181 * @return string Content after decoded entities
1183 function wp_kses_decode_entities($string) {
1184 $string = preg_replace_callback('/&#([0-9]+);/', '_wp_kses_decode_entities_chr', $string);
1185 $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', '_wp_kses_decode_entities_chr_hexdec', $string);
1191 * Regex callback for wp_kses_decode_entities()
1193 * @param array $match preg match
1196 function _wp_kses_decode_entities_chr( $match ) {
1197 return chr( $match[1] );
1201 * Regex callback for wp_kses_decode_entities()
1203 * @param array $match preg match
1206 function _wp_kses_decode_entities_chr_hexdec( $match ) {
1207 return chr( hexdec( $match[1] ) );
1211 * Sanitize content with allowed HTML Kses rules.
1214 * @uses $allowedtags
1216 * @param string $data Content to filter, expected to be escaped with slashes
1217 * @return string Filtered content
1219 function wp_filter_kses($data) {
1220 global $allowedtags;
1221 return addslashes( wp_kses(stripslashes( $data ), $allowedtags) );
1225 * Sanitize content with allowed HTML Kses rules.
1228 * @uses $allowedtags
1230 * @param string $data Content to filter, expected to not be escaped
1231 * @return string Filtered content
1233 function wp_kses_data($data) {
1234 global $allowedtags;
1235 return wp_kses( $data , $allowedtags );
1239 * Sanitize content for allowed HTML tags for post content.
1241 * Post content refers to the page contents of the 'post' type and not $_POST
1245 * @uses $allowedposttags
1247 * @param string $data Post content to filter, expected to be escaped with slashes
1248 * @return string Filtered post content with allowed HTML tags and attributes intact.
1250 function wp_filter_post_kses($data) {
1251 global $allowedposttags;
1252 return addslashes ( wp_kses(stripslashes( $data ), $allowedposttags) );
1256 * Sanitize content for allowed HTML tags for post content.
1258 * Post content refers to the page contents of the 'post' type and not $_POST
1262 * @uses $allowedposttags
1264 * @param string $data Post content to filter
1265 * @return string Filtered post content with allowed HTML tags and attributes intact.
1267 function wp_kses_post($data) {
1268 global $allowedposttags;
1269 return wp_kses( $data , $allowedposttags );
1273 * Strips all of the HTML in the content.
1277 * @param string $data Content to strip all HTML from
1278 * @return string Filtered content without any HTML
1280 function wp_filter_nohtml_kses($data) {
1281 return addslashes ( wp_kses(stripslashes( $data ), array()) );
1285 * Adds all Kses input form content filters.
1287 * All hooks have default priority. The wp_filter_kses() function is added to
1288 * the 'pre_comment_content' and 'title_save_pre' hooks.
1290 * The wp_filter_post_kses() function is added to the 'content_save_pre',
1291 * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks.
1294 * @uses add_filter() See description for what functions are added to what hooks.
1296 function kses_init_filters() {
1297 // Normal filtering.
1298 add_filter('pre_comment_content', 'wp_filter_kses');
1299 add_filter('title_save_pre', 'wp_filter_kses');
1302 add_filter('content_save_pre', 'wp_filter_post_kses');
1303 add_filter('excerpt_save_pre', 'wp_filter_post_kses');
1304 add_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1308 * Removes all Kses input form content filters.
1310 * A quick procedural method to removing all of the filters that kses uses for
1311 * content in WordPress Loop.
1313 * Does not remove the kses_init() function from 'init' hook (priority is
1314 * default). Also does not remove kses_init() function from 'set_current_user'
1315 * hook (priority is also default).
1319 function kses_remove_filters() {
1320 // Normal filtering.
1321 remove_filter('pre_comment_content', 'wp_filter_kses');
1322 remove_filter('title_save_pre', 'wp_filter_kses');
1325 remove_filter('content_save_pre', 'wp_filter_post_kses');
1326 remove_filter('excerpt_save_pre', 'wp_filter_post_kses');
1327 remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1331 * Sets up most of the Kses filters for input form content.
1333 * If you remove the kses_init() function from 'init' hook and
1334 * 'set_current_user' (priority is default), then none of the Kses filter hooks
1337 * First removes all of the Kses filters in case the current user does not need
1338 * to have Kses filter the content. If the user does not have unfiltered html
1339 * capability, then Kses filters are added.
1341 * @uses kses_remove_filters() Removes the Kses filters
1342 * @uses kses_init_filters() Adds the Kses filters back if the user
1343 * does not have unfiltered HTML capability.
1346 function kses_init() {
1347 kses_remove_filters();
1349 if (current_user_can('unfiltered_html') == false)
1350 kses_init_filters();
1353 add_action('init', 'kses_init');
1354 add_action('set_current_user', 'kses_init');
1361 function safecss_filter_attr( $css, $deprecated = '' ) {
1362 if ( !empty( $deprecated ) )
1363 _deprecated_argument( __FUNCTION__, '2.8.1' ); // Never implemented
1365 $css = wp_kses_no_null($css);
1366 $css = str_replace(array("\n","\r","\t"), '', $css);
1368 if ( preg_match( '%[\\(&=}]|/\*%', $css ) ) // remove any inline css containing \ ( & } = or comments
1371 $css_array = explode( ';', trim( $css ) );
1372 $allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float',
1373 'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color',
1374 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',
1375 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color',
1376 'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top',
1377 'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side',
1378 'clear', 'cursor', 'direction', 'font', 'font-family', 'font-size', 'font-style',
1379 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin-bottom',
1380 'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom',
1381 'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align',
1384 if ( empty($allowed_attr) )
1388 foreach ( $css_array as $css_item ) {
1389 if ( $css_item == '' )
1391 $css_item = trim( $css_item );
1393 if ( strpos( $css_item, ':' ) === false ) {
1396 $parts = split( ':', $css_item );
1397 if ( in_array( trim( $parts[0] ), $allowed_attr ) )