X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/16b9f61a8ab25bd6c9fbfd0cea00c7bda22f6a71..refs/tags/wordpress-3.4.2:/wp-includes/formatting.php?ds=sidebyside diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index dc9ed2f6..fcf519c3 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -28,18 +28,33 @@ */ function wptexturize($text) { global $wp_cockneyreplace; - static $static_setup = false, $opening_quote, $closing_quote, $default_no_texturize_tags, $default_no_texturize_shortcodes, $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements; - $output = ''; - $curl = ''; - $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); - $stop = count($textarr); - - // No need to setup these variables more than once - if (!$static_setup) { - /* translators: opening curly quote */ - $opening_quote = _x('“', 'opening curly quote'); - /* translators: closing curly quote */ - $closing_quote = _x('”', 'closing curly quote'); + static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements, + $default_no_texturize_tags, $default_no_texturize_shortcodes; + + // No need to set up these static variables more than once + if ( ! isset( $static_characters ) ) { + /* translators: opening curly double quote */ + $opening_quote = _x( '“', 'opening curly double quote' ); + /* translators: closing curly double quote */ + $closing_quote = _x( '”', 'closing curly double quote' ); + + /* translators: apostrophe, for example in 'cause or can't */ + $apos = _x( '’', 'apostrophe' ); + + /* translators: prime, for example in 9' (nine feet) */ + $prime = _x( '′', 'prime' ); + /* translators: double prime, for example in 9" (nine inches) */ + $double_prime = _x( '″', 'double prime' ); + + /* translators: opening curly single quote */ + $opening_single_quote = _x( '‘', 'opening curly single quote' ); + /* translators: closing curly single quote */ + $closing_single_quote = _x( '’', 'closing curly single quote' ); + + /* translators: en dash */ + $en_dash = _x( '–', 'en dash' ); + /* translators: em dash */ + $em_dash = _x( '—', 'em dash' ); $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); $default_no_texturize_shortcodes = array('code'); @@ -48,18 +63,40 @@ function wptexturize($text) { if ( isset($wp_cockneyreplace) ) { $cockney = array_keys($wp_cockneyreplace); $cockneyreplace = array_values($wp_cockneyreplace); + } elseif ( "'" != $apos ) { // Only bother if we're doing a replacement. + $cockney = array( "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause" ); + $cockneyreplace = array( $apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause" ); } else { - $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause"); - $cockneyreplace = array("’tain’t","’twere","’twas","’tis","’twill","’til","’bout","’nuff","’round","’cause"); + $cockney = $cockneyreplace = array(); } - $static_characters = array_merge(array('---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'s', '\'\'', ' (tm)'), $cockney); - $static_replacements = array_merge(array('—', ' — ', '–', ' – ', 'xn--', '…', $opening_quote, '’s', $closing_quote, ' ™'), $cockneyreplace); - - $dynamic_characters = array('/\'(\d\d(?:’|\')?s)/', '/(\s|\A|[([{<]|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A|[([{<])"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/(\d+)x(\d+)/'); - $dynamic_replacements = array('’$1','$1‘', '$1″', '$1′', '$1’$2', '$1' . $opening_quote . '$2', $closing_quote . '$1', '’$1', '$1×$2'); + $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney ); + $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); - $static_setup = true; + $dynamic = array(); + if ( "'" != $apos ) { + $dynamic[ '/\'(\d\d(?:’|\')?s)/' ] = $apos . '$1'; // '99's + $dynamic[ '/\'(\d)/' ] = $apos . '$1'; // '99 + } + if ( "'" != $opening_single_quote ) + $dynamic[ '/(\s|\A|[([{<]|")\'/' ] = '$1' . $opening_single_quote; // opening single quote, even after (, {, <, [ + if ( '"' != $double_prime ) + $dynamic[ '/(\d)"/' ] = '$1' . $double_prime; // 9" (double prime) + if ( "'" != $prime ) + $dynamic[ '/(\d)\'/' ] = '$1' . $prime; // 9' (prime) + if ( "'" != $apos ) + $dynamic[ '/(\S)\'([^\'\s])/' ] = '$1' . $apos . '$2'; // apostrophe in a word + if ( '"' != $opening_quote ) + $dynamic[ '/(\s|\A|[([{<])"(?!\s)/' ] = '$1' . $opening_quote . '$2'; // opening double quote, even after (, {, <, [ + if ( '"' != $closing_quote ) + $dynamic[ '/"(\s|\S|\Z)/' ] = $closing_quote . '$1'; // closing double quote + if ( "'" != $closing_single_quote ) + $dynamic[ '/\'([\s.]|\Z)/' ] = $closing_single_quote . '$1'; // closing single quote + + $dynamic[ '/\b(\d+)x(\d+)\b/' ] = '$1×$2'; // 9x9 (times) + + $dynamic_characters = array_keys( $dynamic ); + $dynamic_replacements = array_values( $dynamic ); } // Transform into regexp sub-expression used in _wptexturize_pushpop_element @@ -70,32 +107,27 @@ function wptexturize($text) { $no_texturize_tags_stack = array(); $no_texturize_shortcodes_stack = array(); - for ( $i = 0; $i < $stop; $i++ ) { - $curl = $textarr[$i]; + $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + + foreach ( $textarr as &$curl ) { + if ( empty( $curl ) ) + continue; - if ( !empty($curl) && '<' != $curl{0} && '[' != $curl{0} - && empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) { - // This is not a tag, nor is the texturization disabled - // static strings + // Only call _wptexturize_pushpop_element if first char is correct tag opening + $first = $curl[0]; + if ( '<' === $first ) { + _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); + } elseif ( '[' === $first ) { + _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); + } elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) { + // This is not a tag, nor is the texturization disabled static strings $curl = str_replace($static_characters, $static_replacements, $curl); // regular expressions $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl); - } elseif (!empty($curl)) { - /* - * Only call _wptexturize_pushpop_element if first char is correct - * tag opening - */ - if ('<' == $curl{0}) - _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); - elseif ('[' == $curl{0}) - _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); } - $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl); - $output .= $curl; } - - return $output; + return implode( '', $textarr ); } /** @@ -120,7 +152,7 @@ function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $openi /* * This disables texturize until we find a closing tag of our type * (e.g.
) even if there was invalid nesting before that
-			 * 
+			 *
 			 * Example: in the case 
sadsadasd"baba"
* "baba" won't be texturize */ @@ -140,30 +172,6 @@ function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $openi } } -/** - * Accepts matches array from preg_replace_callback in wpautop() or a string. - * - * Ensures that the contents of a <
>...<
> HTML block are not - * converted into paragraphs or line-breaks. - * - * @since 1.2.0 - * - * @param array|string $matches The array or string - * @return string The pre block without paragraph/line-break conversion. - */ -function clean_pre($matches) { - if ( is_array($matches) ) - $text = $matches[1] . $matches[2] . "
"; - else - $text = $matches; - - $text = str_replace('
', '', $text); - $text = str_replace('

', "\n", $text); - $text = str_replace('

', '', $text); - - return $text; -} - /** * Replaces double line-breaks with paragraph elements. * @@ -175,17 +183,45 @@ function clean_pre($matches) { * @since 0.71 * * @param string $pee The text which has to be formatted. - * @param int|bool $br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true. + * @param bool $br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true. * @return string Text which has been converted into correct paragraph tags. */ -function wpautop($pee, $br = 1) { +function wpautop($pee, $br = true) { + $pre_tags = array(); if ( trim($pee) === '' ) return ''; + $pee = $pee . "\n"; // just to make things a little easier, pad the end + + if ( strpos($pee, '', $pee ); + $last_pee = array_pop($pee_parts); + $pee = ''; + $i = 0; + + foreach ( $pee_parts as $pee_part ) { + $start = strpos($pee_part, ''; + + $pee .= substr( $pee_part, 0, $start ) . $name; + $i++; + } + + $pee .= $last_pee; + } + $pee = preg_replace('|
\s*
|', "\n\n", $pee); // Space things out a little - $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|map|area|blockquote|address|math|style|input|p|h[1-6]|hr|fieldset|legend)'; + $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'; $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee); $pee = preg_replace('!()!', "$1\n\n", $pee); $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines @@ -207,20 +243,33 @@ function wpautop($pee, $br = 1) { $pee = str_replace('

', '

', $pee); $pee = preg_replace('!

\s*(]*>)!', "$1", $pee); $pee = preg_replace('!(]*>)\s*

!', "$1", $pee); - if ($br) { - $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', create_function('$matches', 'return str_replace("\n", "", $matches[0]);'), $pee); + if ( $br ) { + $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', '_autop_newline_preservation_helper', $pee); $pee = preg_replace('|(?)\s*\n|', "
\n", $pee); // optionally make line breaks $pee = str_replace('', "\n", $pee); } $pee = preg_replace('!(]*>)\s*
!', "$1", $pee); $pee = preg_replace('!
(\s*]*>)!', '$1', $pee); - if (strpos($pee, ']*>)(.*?)!is', 'clean_pre', $pee ); $pee = preg_replace( "|\n

$|", '

', $pee ); + if ( !empty($pre_tags) ) + $pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee); + return $pee; } +/** + * Newline preservation help function for wpautop + * + * @since 3.1.0 + * @access private + * @param array $matches preg_replace_callback matches array + * @returns string + */ +function _autop_newline_preservation_helper( $matches ) { + return str_replace("\n", "", $matches[0]); +} + /** * Don't auto-p wrap shortcodes that stand alone * @@ -231,16 +280,48 @@ function wpautop($pee, $br = 1) { * @param string $pee The content. * @return string The filtered content. */ -function shortcode_unautop($pee) { +function shortcode_unautop( $pee ) { global $shortcode_tags; - if ( !empty($shortcode_tags) && is_array($shortcode_tags) ) { - $tagnames = array_keys($shortcode_tags); - $tagregexp = join( '|', array_map('preg_quote', $tagnames) ); - $pee = preg_replace('/

\\s*?(\\[(' . $tagregexp . ')\\b.*?\\/?\\](?:.+?\\[\\/\\2\\])?)\\s*<\\/p>/s', '$1', $pee); - } - - return $pee; + if ( empty( $shortcode_tags ) || !is_array( $shortcode_tags ) ) { + return $pee; + } + + $tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) ); + + $pattern = + '/' + . '

' // Opening paragraph + . '\\s*+' // Optional leading whitespace + . '(' // 1: The shortcode + . '\\[' // Opening bracket + . "($tagregexp)" // 2: Shortcode name + . '\\b' // Word boundary + // Unroll the loop: Inside the opening shortcode tag + . '[^\\]\\/]*' // Not a closing bracket or forward slash + . '(?:' + . '\\/(?!\\])' // A forward slash not followed by a closing bracket + . '[^\\]\\/]*' // Not a closing bracket or forward slash + . ')*?' + . '(?:' + . '\\/\\]' // Self closing tag and closing bracket + . '|' + . '\\]' // Closing bracket + . '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags + . '[^\\[]*+' // Not an opening bracket + . '(?:' + . '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag + . '[^\\[]*+' // Not an opening bracket + . ')*+' + . '\\[\\/\\2\\]' // Closing shortcode tag + . ')?' + . ')' + . ')' + . '\\s*+' // optional trailing whitespace + . '<\\/p>' // closing paragraph + . '/s'; + + return preg_replace( $pattern, '$1', $pee ); } /** @@ -287,40 +368,37 @@ function seems_utf8($str) { * @param string $string The text which is to be encoded. * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES. * @param string $charset Optional. The character encoding of the string. Default is false. - * @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false. + * @param boolean $double_encode Optional. Whether to encode existing html entities. Default is false. * @return string The encoded text with HTML entities. */ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { $string = (string) $string; - if ( 0 === strlen( $string ) ) { + if ( 0 === strlen( $string ) ) return ''; - } // Don't bother if there are no specialchars - saves some processing - if ( !preg_match( '/[&<>"\']/', $string ) ) { + if ( ! preg_match( '/[&<>"\']/', $string ) ) return $string; - } // Account for the previous behaviour of the function when the $quote_style is not an accepted value - if ( empty( $quote_style ) ) { + if ( empty( $quote_style ) ) $quote_style = ENT_NOQUOTES; - } elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) { + elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) $quote_style = ENT_QUOTES; - } // Store the site charset as a static to avoid multiple calls to wp_load_alloptions() - if ( !$charset ) { + if ( ! $charset ) { static $_charset; - if ( !isset( $_charset ) ) { + if ( ! isset( $_charset ) ) { $alloptions = wp_load_alloptions(); $_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : ''; } $charset = $_charset; } - if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) { + + if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) $charset = 'UTF-8'; - } $_quote_style = $quote_style; @@ -332,22 +410,27 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals } // Handle double encoding ourselves - if ( !$double_encode ) { + if ( $double_encode ) { + $string = @htmlspecialchars( $string, $quote_style, $charset ); + } else { + // Decode & into & $string = wp_specialchars_decode( $string, $_quote_style ); - $string = preg_replace( '/&(#?x?[0-9a-z]+);/i', '|wp_entity|$1|/wp_entity|', $string ); - } - $string = @htmlspecialchars( $string, $quote_style, $charset ); + // Guarantee every &entity; is valid or re-encode the & + $string = wp_kses_normalize_entities( $string ); - // Handle double encoding ourselves - if ( !$double_encode ) { - $string = str_replace( array( '|wp_entity|', '|/wp_entity|' ), array( '&', ';' ), $string ); + // Now re-encode everything except &entity; + $string = preg_split( '/(&#?x?[0-9a-z]+;)/i', $string, -1, PREG_SPLIT_DELIM_CAPTURE ); + + for ( $i = 0; $i < count( $string ); $i += 2 ) + $string[$i] = @htmlspecialchars( $string[$i], $quote_style, $charset ); + + $string = implode( '', $string ); } // Backwards compatibility - if ( 'single' === $_quote_style ) { + if ( 'single' === $_quote_style ) $string = str_replace( "'", ''', $string ); - } return $string; } @@ -529,34 +612,38 @@ function remove_accents($string) { if (seems_utf8($string)) { $chars = array( // Decompositions for Latin-1 Supplement + chr(194).chr(170) => 'a', chr(194).chr(186) => 'o', chr(195).chr(128) => 'A', chr(195).chr(129) => 'A', chr(195).chr(130) => 'A', chr(195).chr(131) => 'A', chr(195).chr(132) => 'A', chr(195).chr(133) => 'A', - chr(195).chr(135) => 'C', chr(195).chr(136) => 'E', - chr(195).chr(137) => 'E', chr(195).chr(138) => 'E', - chr(195).chr(139) => 'E', chr(195).chr(140) => 'I', - chr(195).chr(141) => 'I', chr(195).chr(142) => 'I', - chr(195).chr(143) => 'I', chr(195).chr(145) => 'N', + chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C', + chr(195).chr(136) => 'E', chr(195).chr(137) => 'E', + chr(195).chr(138) => 'E', chr(195).chr(139) => 'E', + chr(195).chr(140) => 'I', chr(195).chr(141) => 'I', + chr(195).chr(142) => 'I', chr(195).chr(143) => 'I', + chr(195).chr(144) => 'D', chr(195).chr(145) => 'N', chr(195).chr(146) => 'O', chr(195).chr(147) => 'O', chr(195).chr(148) => 'O', chr(195).chr(149) => 'O', chr(195).chr(150) => 'O', chr(195).chr(153) => 'U', chr(195).chr(154) => 'U', chr(195).chr(155) => 'U', chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y', - chr(195).chr(159) => 's', chr(195).chr(160) => 'a', - chr(195).chr(161) => 'a', chr(195).chr(162) => 'a', - chr(195).chr(163) => 'a', chr(195).chr(164) => 'a', - chr(195).chr(165) => 'a', chr(195).chr(167) => 'c', + chr(195).chr(158) => 'TH',chr(195).chr(159) => 's', + chr(195).chr(160) => 'a', chr(195).chr(161) => 'a', + chr(195).chr(162) => 'a', chr(195).chr(163) => 'a', + chr(195).chr(164) => 'a', chr(195).chr(165) => 'a', + chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c', chr(195).chr(168) => 'e', chr(195).chr(169) => 'e', chr(195).chr(170) => 'e', chr(195).chr(171) => 'e', chr(195).chr(172) => 'i', chr(195).chr(173) => 'i', chr(195).chr(174) => 'i', chr(195).chr(175) => 'i', - chr(195).chr(177) => 'n', chr(195).chr(178) => 'o', - chr(195).chr(179) => 'o', chr(195).chr(180) => 'o', - chr(195).chr(181) => 'o', chr(195).chr(182) => 'o', - chr(195).chr(182) => 'o', chr(195).chr(185) => 'u', - chr(195).chr(186) => 'u', chr(195).chr(187) => 'u', - chr(195).chr(188) => 'u', chr(195).chr(189) => 'y', - chr(195).chr(191) => 'y', + chr(195).chr(176) => 'd', chr(195).chr(177) => 'n', + chr(195).chr(178) => 'o', chr(195).chr(179) => 'o', + chr(195).chr(180) => 'o', chr(195).chr(181) => 'o', + chr(195).chr(182) => 'o', chr(195).chr(184) => 'o', + chr(195).chr(185) => 'u', chr(195).chr(186) => 'u', + chr(195).chr(187) => 'u', chr(195).chr(188) => 'u', + chr(195).chr(189) => 'y', chr(195).chr(190) => 'th', + chr(195).chr(191) => 'y', chr(195).chr(152) => 'O', // Decompositions for Latin Extended-A chr(196).chr(128) => 'A', chr(196).chr(129) => 'a', chr(196).chr(130) => 'A', chr(196).chr(131) => 'a', @@ -622,10 +709,68 @@ function remove_accents($string) { chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z', chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z', chr(197).chr(190) => 'z', chr(197).chr(191) => 's', + // Decompositions for Latin Extended-B + chr(200).chr(152) => 'S', chr(200).chr(153) => 's', + chr(200).chr(154) => 'T', chr(200).chr(155) => 't', // Euro Sign chr(226).chr(130).chr(172) => 'E', // GBP (Pound) Sign - chr(194).chr(163) => ''); + chr(194).chr(163) => '', + // Vowels with diacritic (Vietnamese) + // unmarked + chr(198).chr(160) => 'O', chr(198).chr(161) => 'o', + chr(198).chr(175) => 'U', chr(198).chr(176) => 'u', + // grave accent + chr(225).chr(186).chr(166) => 'A', chr(225).chr(186).chr(167) => 'a', + chr(225).chr(186).chr(176) => 'A', chr(225).chr(186).chr(177) => 'a', + chr(225).chr(187).chr(128) => 'E', chr(225).chr(187).chr(129) => 'e', + chr(225).chr(187).chr(146) => 'O', chr(225).chr(187).chr(147) => 'o', + chr(225).chr(187).chr(156) => 'O', chr(225).chr(187).chr(157) => 'o', + chr(225).chr(187).chr(170) => 'U', chr(225).chr(187).chr(171) => 'u', + chr(225).chr(187).chr(178) => 'Y', chr(225).chr(187).chr(179) => 'y', + // hook + chr(225).chr(186).chr(162) => 'A', chr(225).chr(186).chr(163) => 'a', + chr(225).chr(186).chr(168) => 'A', chr(225).chr(186).chr(169) => 'a', + chr(225).chr(186).chr(178) => 'A', chr(225).chr(186).chr(179) => 'a', + chr(225).chr(186).chr(186) => 'E', chr(225).chr(186).chr(187) => 'e', + chr(225).chr(187).chr(130) => 'E', chr(225).chr(187).chr(131) => 'e', + chr(225).chr(187).chr(136) => 'I', chr(225).chr(187).chr(137) => 'i', + chr(225).chr(187).chr(142) => 'O', chr(225).chr(187).chr(143) => 'o', + chr(225).chr(187).chr(148) => 'O', chr(225).chr(187).chr(149) => 'o', + chr(225).chr(187).chr(158) => 'O', chr(225).chr(187).chr(159) => 'o', + chr(225).chr(187).chr(166) => 'U', chr(225).chr(187).chr(167) => 'u', + chr(225).chr(187).chr(172) => 'U', chr(225).chr(187).chr(173) => 'u', + chr(225).chr(187).chr(182) => 'Y', chr(225).chr(187).chr(183) => 'y', + // tilde + chr(225).chr(186).chr(170) => 'A', chr(225).chr(186).chr(171) => 'a', + chr(225).chr(186).chr(180) => 'A', chr(225).chr(186).chr(181) => 'a', + chr(225).chr(186).chr(188) => 'E', chr(225).chr(186).chr(189) => 'e', + chr(225).chr(187).chr(132) => 'E', chr(225).chr(187).chr(133) => 'e', + chr(225).chr(187).chr(150) => 'O', chr(225).chr(187).chr(151) => 'o', + chr(225).chr(187).chr(160) => 'O', chr(225).chr(187).chr(161) => 'o', + chr(225).chr(187).chr(174) => 'U', chr(225).chr(187).chr(175) => 'u', + chr(225).chr(187).chr(184) => 'Y', chr(225).chr(187).chr(185) => 'y', + // acute accent + chr(225).chr(186).chr(164) => 'A', chr(225).chr(186).chr(165) => 'a', + chr(225).chr(186).chr(174) => 'A', chr(225).chr(186).chr(175) => 'a', + chr(225).chr(186).chr(190) => 'E', chr(225).chr(186).chr(191) => 'e', + chr(225).chr(187).chr(144) => 'O', chr(225).chr(187).chr(145) => 'o', + chr(225).chr(187).chr(154) => 'O', chr(225).chr(187).chr(155) => 'o', + chr(225).chr(187).chr(168) => 'U', chr(225).chr(187).chr(169) => 'u', + // dot below + chr(225).chr(186).chr(160) => 'A', chr(225).chr(186).chr(161) => 'a', + chr(225).chr(186).chr(172) => 'A', chr(225).chr(186).chr(173) => 'a', + chr(225).chr(186).chr(182) => 'A', chr(225).chr(186).chr(183) => 'a', + chr(225).chr(186).chr(184) => 'E', chr(225).chr(186).chr(185) => 'e', + chr(225).chr(187).chr(134) => 'E', chr(225).chr(187).chr(135) => 'e', + chr(225).chr(187).chr(138) => 'I', chr(225).chr(187).chr(139) => 'i', + chr(225).chr(187).chr(140) => 'O', chr(225).chr(187).chr(141) => 'o', + chr(225).chr(187).chr(152) => 'O', chr(225).chr(187).chr(153) => 'o', + chr(225).chr(187).chr(162) => 'O', chr(225).chr(187).chr(163) => 'o', + chr(225).chr(187).chr(164) => 'U', chr(225).chr(187).chr(165) => 'u', + chr(225).chr(187).chr(176) => 'U', chr(225).chr(187).chr(177) => 'u', + chr(225).chr(187).chr(180) => 'Y', chr(225).chr(187).chr(181) => 'y', + ); $string = strtr($string, $chars); } else { @@ -686,15 +831,15 @@ function sanitize_file_name( $filename ) { $extension = array_pop($parts); $mimes = get_allowed_mime_types(); - // Loop over any intermediate extensions. Munge them with a trailing underscore if they are a 2 - 5 character + // Loop over any intermediate extensions. Munge them with a trailing underscore if they are a 2 - 5 character // long alpha string not in the extension whitelist. foreach ( (array) $parts as $part) { $filename .= '.' . $part; - + if ( preg_match("/^[a-zA-Z]{2,5}\d?$/", $part) ) { $allowed = false; foreach ( $mimes as $ext_preg => $mime_match ) { - $ext_preg = '!(^' . $ext_preg . ')$!i'; + $ext_preg = '!^(' . $ext_preg . ')$!i'; if ( preg_match( $ext_preg, $part ) ) { $allowed = true; break; @@ -712,12 +857,10 @@ function sanitize_file_name( $filename ) { /** * Sanitize username stripping out unsafe characters. * - * If $strict is true, only alphanumeric characters (as well as _, space, ., -, - * @) are returned. - * Removes tags, octets, entities, and if strict is enabled, will remove all - * non-ASCII characters. After sanitizing, it passes the username, raw username - * (the username in the parameter), and the strict parameter as parameters for - * the filter. + * Removes tags, octets, entities, and if strict is enabled, will only keep + * alphanumeric, _, space, ., -, @. After sanitizing, it passes the username, + * raw username (the username in the parameter), and the value of $strict as + * parameters for the 'sanitize_user' filter. * * @since 2.0.0 * @uses apply_filters() Calls 'sanitize_user' hook on username, raw username, @@ -729,19 +872,38 @@ function sanitize_file_name( $filename ) { */ function sanitize_user( $username, $strict = false ) { $raw_username = $username; - $username = wp_strip_all_tags($username); + $username = wp_strip_all_tags( $username ); + $username = remove_accents( $username ); // Kill octets - $username = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '', $username); - $username = preg_replace('/&.+?;/', '', $username); // Kill entities + $username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username ); + $username = preg_replace( '/&.+?;/', '', $username ); // Kill entities // If strict, reduce to ASCII for max portability. if ( $strict ) - $username = preg_replace('|[^a-z0-9 _.\-@]|i', '', $username); + $username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username ); + $username = trim( $username ); // Consolidate contiguous whitespace - $username = preg_replace('|\s+|', ' ', $username); + $username = preg_replace( '|\s+|', ' ', $username ); - return apply_filters('sanitize_user', $username, $raw_username, $strict); + return apply_filters( 'sanitize_user', $username, $raw_username, $strict ); +} + +/** + * Sanitize a string key. + * + * Keys are used as internal identifiers. Lowercase alphanumeric characters, dashes and underscores are allowed. + * + * @since 3.0.0 + * + * @param string $key String key + * @return string Sanitized key + */ +function sanitize_key( $key ) { + $raw_key = $key; + $key = strtolower( $key ); + $key = preg_replace( '/[^a-z0-9_\-]/', '', $key ); + return apply_filters( 'sanitize_key', $key, $raw_key ); } /** @@ -755,12 +917,16 @@ function sanitize_user( $username, $strict = false ) { * * @param string $title The string to be sanitized. * @param string $fallback_title Optional. A title to use if $title is empty. + * @param string $context Optional. The operation for which the string is sanitized * @return string The sanitized string. */ -function sanitize_title($title, $fallback_title = '') { +function sanitize_title($title, $fallback_title = '', $context = 'save') { $raw_title = $title; - $title = strip_tags($title); - $title = apply_filters('sanitize_title', $title, $raw_title); + + if ( 'save' == $context ) + $title = remove_accents($title); + + $title = apply_filters('sanitize_title', $title, $raw_title, $context); if ( '' === $title || false === $title ) $title = $fallback_title; @@ -768,8 +934,12 @@ function sanitize_title($title, $fallback_title = '') { return $title; } +function sanitize_title_for_query($title) { + return sanitize_title($title, '', 'query'); +} + /** - * Sanitizes title, replacing whitespace with dashes. + * Sanitizes title, replacing whitespace and a few other characters with dashes. * * Limits the output to alphanumeric characters, underscore (_) and dash (-). * Whitespace becomes a dash. @@ -777,9 +947,11 @@ function sanitize_title($title, $fallback_title = '') { * @since 1.2.0 * * @param string $title The title to be sanitized. + * @param string $raw_title Optional. Not used. + * @param string $context Optional. The operation for which the string is sanitized. * @return string The sanitized title. */ -function sanitize_title_with_dashes($title) { +function sanitize_title_with_dashes($title, $raw_title = '', $context = 'display') { $title = strip_tags($title); // Preserve escaped octets. $title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title); @@ -788,7 +960,6 @@ function sanitize_title_with_dashes($title) { // Restore octets. $title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title); - $title = remove_accents($title); if (seems_utf8($title)) { if (function_exists('mb_strtolower')) { $title = mb_strtolower($title, 'UTF-8'); @@ -799,6 +970,28 @@ function sanitize_title_with_dashes($title) { $title = strtolower($title); $title = preg_replace('/&.+?;/', '', $title); // kill entities $title = str_replace('.', '-', $title); + + if ( 'save' == $context ) { + // Convert nbsp, ndash and mdash to hyphens + $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title ); + + // Strip these characters entirely + $title = str_replace( array( + // iexcl and iquest + '%c2%a1', '%c2%bf', + // angle quotes + '%c2%ab', '%c2%bb', '%e2%80%b9', '%e2%80%ba', + // curly quotes + '%e2%80%98', '%e2%80%99', '%e2%80%9c', '%e2%80%9d', + '%e2%80%9a', '%e2%80%9b', '%e2%80%9e', '%e2%80%9f', + // copy, reg, deg, hellip and trade + '%c2%a9', '%c2%ae', '%c2%b0', '%e2%80%a6', '%e2%84%a2', + ), '', $title ); + + // Convert times to x + $title = str_replace( '%c3%97', 'x', $title ); + } + $title = preg_replace('/[^%a-z0-9 _-]/', '', $title); $title = preg_replace('/\s+/', '-', $title); $title = preg_replace('|-+|', '-', $title); @@ -828,7 +1021,7 @@ function sanitize_sql_orderby( $orderby ){ /** * Santizes a html classname to ensure it only contains valid characters * - * Strips the string down to A-Z,a-z,0-9,'-' if this results in an empty + * Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty * string then it will return the alternative value supplied. * * @todo Expand to support the full range of CDATA that a class attribute can contain. @@ -836,20 +1029,21 @@ function sanitize_sql_orderby( $orderby ){ * @since 2.8.0 * * @param string $class The classname to be sanitized - * @param string $fallback The value to return if the sanitization end's up as an empty string. + * @param string $fallback Optional. The value to return if the sanitization end's up as an empty string. + * Defaults to an empty string. * @return string The sanitized value */ -function sanitize_html_class($class, $fallback){ +function sanitize_html_class( $class, $fallback = '' ) { //Strip out any % encoded octets - $sanitized = preg_replace('|%[a-fA-F0-9][a-fA-F0-9]|', '', $class); + $sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $class ); - //Limit to A-Z,a-z,0-9,'-' - $sanitized = preg_replace('/[^A-Za-z0-9-]/', '', $sanitized); + //Limit to A-Z,a-z,0-9,_,- + $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized ); - if ('' == $sanitized) + if ( '' == $sanitized ) $sanitized = $fallback; - return apply_filters('sanitize_html_class',$sanitized, $class, $fallback); + return apply_filters( 'sanitize_html_class', $sanitized, $class, $fallback ); } /** @@ -866,6 +1060,9 @@ function sanitize_html_class($class, $fallback){ * @return string Converted string. */ function convert_chars($content, $deprecated = '') { + if ( !empty( $deprecated ) ) + _deprecated_argument( __FUNCTION__, '0.71' ); + // Translation of invalid Unicode references range to valid range $wp_htmltranswinuni = array( '€' => '€', // the Euro sign @@ -882,7 +1079,7 @@ function convert_chars($content, $deprecated = '') { '‹' => '‹', 'Œ' => 'Œ', '' => '', - 'Ž' => 'ž', + 'Ž' => 'Ž', '' => '', '' => '', '‘' => '‘', @@ -898,7 +1095,7 @@ function convert_chars($content, $deprecated = '') { '›' => '›', 'œ' => 'œ', '' => '', - 'ž' => '', + 'ž' => 'ž', 'Ÿ' => 'Ÿ' ); @@ -919,53 +1116,15 @@ function convert_chars($content, $deprecated = '') { return $content; } -/** - * Callback used to change %uXXXX to &#YYY; syntax - * - * @since 2.8? - * - * @param array $matches Single Match - * @return string An HTML entity - */ -function funky_javascript_callback($matches) { - return "&#".base_convert($matches[1],16,10).";"; -} - -/** - * Fixes javascript bugs in browsers. - * - * Converts unicode characters to HTML numbered entities. - * - * @since 1.5.0 - * @uses $is_macIE - * @uses $is_winIE - * - * @param string $text Text to be made safe. - * @return string Fixed text. - */ -function funky_javascript_fix($text) { - // Fixes for browsers' javascript bugs - global $is_macIE, $is_winIE; - - if ( $is_winIE || $is_macIE ) - $text = preg_replace_callback("/\%u([0-9A-F]{4,4})/", - "funky_javascript_callback", - $text); - - return $text; -} - /** * Will only balance the tags if forced to and the option is set to balance tags. * - * The option 'use_balanceTags' is used for whether the tags will be balanced. - * Both the $force parameter and 'use_balanceTags' option will have to be true - * before the tags will be balanced. + * The option 'use_balanceTags' is used to determine whether the tags will be balanced. * * @since 0.71 * * @param string $text Text to be balanced - * @param bool $force Forces balancing, ignoring the value of the option. Default false. + * @param bool $force If true, forces balancing, ignoring the value of the option. Default false. * @return string Balanced text */ function balanceTags( $text, $force = false ) { @@ -980,7 +1139,7 @@ function balanceTags( $text, $force = false ) { * @since 2.0.4 * * @author Leonard Lin - * @license GPL v2.0 + * @license GPL * @copyright November 4, 2001 * @version 1.1 * @todo Make better - change loop condition to $text in 1.2 @@ -993,19 +1152,22 @@ function balanceTags( $text, $force = false ) { * @return string Balanced text. */ function force_balance_tags( $text ) { - $tagstack = array(); $stacksize = 0; $tagqueue = ''; $newtext = ''; - $single_tags = array('br', 'hr', 'img', 'input'); //Known single-entity/self-closing tags - $nestable_tags = array('blockquote', 'div', 'span'); //Tags that can be immediately nested within themselves - - # WP bug fix for comments - in case you REALLY meant to type '< !--' + $tagstack = array(); + $stacksize = 0; + $tagqueue = ''; + $newtext = ''; + $single_tags = array( 'br', 'hr', 'img', 'input' ); // Known single-entity/self-closing tags + $nestable_tags = array( 'blockquote', 'div', 'span', 'q' ); // Tags that can be immediately nested within themselves + + // WP bug fix for comments - in case you REALLY meant to type '< !--' $text = str_replace('< !--', '< !--', $text); - # WP bug fix for LOVE <3 (and other situations with '<' before a number) + // WP bug fix for LOVE <3 (and other situations with '<' before a number) $text = preg_replace('#<([0-9]{1})#', '<$1', $text); - while (preg_match("/<(\/?\w*)\s*([^>]*)>/",$text,$regex)) { + while ( preg_match("/<(\/?[\w:]*)\s*([^>]*)>/", $text, $regex) ) { $newtext .= $tagqueue; - $i = strpos($text,$regex[0]); + $i = strpos($text, $regex[0]); $l = strlen($regex[0]); // clear the shifter @@ -1014,22 +1176,22 @@ function force_balance_tags( $text ) { if ( isset($regex[1][0]) && '/' == $regex[1][0] ) { // End Tag $tag = strtolower(substr($regex[1],1)); // if too many closing tags - if($stacksize <= 0) { + if( $stacksize <= 0 ) { $tag = ''; - //or close to be safe $tag = '/' . $tag; + // or close to be safe $tag = '/' . $tag; } // if stacktop value = tag close value then pop - else if ($tagstack[$stacksize - 1] == $tag) { // found closing tag + else if ( $tagstack[$stacksize - 1] == $tag ) { // found closing tag $tag = ''; // Close Tag // Pop - array_pop ($tagstack); + array_pop( $tagstack ); $stacksize--; } else { // closing tag not at top, search for it - for ($j=$stacksize-1;$j>=0;$j--) { - if ($tagstack[$j] == $tag) { + for ( $j = $stacksize-1; $j >= 0; $j-- ) { + if ( $tagstack[$j] == $tag ) { // add tag to tagqueue - for ($k=$stacksize-1;$k>=$j;$k--){ - $tagqueue .= ''; + for ( $k = $stacksize-1; $k >= $j; $k--) { + $tagqueue .= ''; $stacksize--; } break; @@ -1043,14 +1205,15 @@ function force_balance_tags( $text ) { // Tag Cleaning // If self-closing or '', don't do anything. - if((substr($regex[2],-1) == '/') || ($tag == '')) { + if ( substr($regex[2],-1) == '/' || $tag == '' ) { + // do nothing } // ElseIf it's a known single-entity tag but it doesn't close itself, do so elseif ( in_array($tag, $single_tags) ) { $regex[2] .= '/'; } else { // Push the tag onto the stack // If the top of the stack is the same as the tag we want to push, close previous tag - if (($stacksize > 0) && !in_array($tag, $nestable_tags) && ($tagstack[$stacksize - 1] == $tag)) { + if ( $stacksize > 0 && !in_array($tag, $nestable_tags) && $tagstack[$stacksize - 1] == $tag ) { $tagqueue = ''; $stacksize--; } @@ -1059,18 +1222,18 @@ function force_balance_tags( $text ) { // Attributes $attributes = $regex[2]; - if($attributes) { + if( !empty($attributes) ) $attributes = ' '.$attributes; - } - $tag = '<'.$tag.$attributes.'>'; + + $tag = '<' . $tag . $attributes . '>'; //If already queuing a close tag, then put this tag on, too - if ($tagqueue) { + if ( !empty($tagqueue) ) { $tagqueue .= $tag; $tag = ''; } } - $newtext .= substr($text,0,$i) . $tag; - $text = substr($text,$i+$l); + $newtext .= substr($text, 0, $i) . $tag; + $text = substr($text, $i + $l); } // Clear Tag Queue @@ -1080,9 +1243,8 @@ function force_balance_tags( $text ) { $newtext .= $text; // Empty Stack - while($x = array_pop($tagstack)) { + while( $x = array_pop($tagstack) ) $newtext .= ''; // Add remaining tags to close - } // WP fix for the bug with HTML comments $newtext = str_replace("< !--","