X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/11be8dc178e77d0b46189bbd8e33a216a9b90942..refs/tags/wordpress-3.4.2:/wp-includes/formatting.php
diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php
index 53df8084..fcf519c3 100644
--- a/wp-includes/formatting.php
+++ b/wp-includes/formatting.php
@@ -1,6 +1,6 @@
|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
- $stop = count($textarr);
+ static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
+ $default_no_texturize_tags, $default_no_texturize_shortcodes;
+
+ // No need to set up these static variables more than once
+ if ( ! isset( $static_characters ) ) {
+ /* translators: opening curly double quote */
+ $opening_quote = _x( '“', 'opening curly double quote' );
+ /* translators: closing curly double quote */
+ $closing_quote = _x( '”', 'closing curly double quote' );
+
+ /* translators: apostrophe, for example in 'cause or can't */
+ $apos = _x( '’', 'apostrophe' );
+
+ /* translators: prime, for example in 9' (nine feet) */
+ $prime = _x( '′', 'prime' );
+ /* translators: double prime, for example in 9" (nine inches) */
+ $double_prime = _x( '″', 'double prime' );
+
+ /* translators: opening curly single quote */
+ $opening_single_quote = _x( '‘', 'opening curly single quote' );
+ /* translators: closing curly single quote */
+ $closing_single_quote = _x( '’', 'closing curly single quote' );
+
+ /* translators: en dash */
+ $en_dash = _x( '–', 'en dash' );
+ /* translators: em dash */
+ $em_dash = _x( '—', 'em dash' );
+
+ $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt');
+ $default_no_texturize_shortcodes = array('code');
+
+ // if a plugin has provided an autocorrect array, use it
+ if ( isset($wp_cockneyreplace) ) {
+ $cockney = array_keys($wp_cockneyreplace);
+ $cockneyreplace = array_values($wp_cockneyreplace);
+ } elseif ( "'" != $apos ) { // Only bother if we're doing a replacement.
+ $cockney = array( "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause" );
+ $cockneyreplace = array( $apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause" );
+ } else {
+ $cockney = $cockneyreplace = array();
+ }
- // if a plugin has provided an autocorrect array, use it
- if ( isset($wp_cockneyreplace) ) {
- $cockney = array_keys($wp_cockneyreplace);
- $cockneyreplace = array_values($wp_cockneyreplace);
- } else {
- $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause");
- $cockneyreplace = array("’tain’t","’twere","’twas","’tis","’twill","’til","’bout","’nuff","’round","’cause");
+ $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney );
+ $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace );
+
+ $dynamic = array();
+ if ( "'" != $apos ) {
+ $dynamic[ '/\'(\d\d(?:’|\')?s)/' ] = $apos . '$1'; // '99's
+ $dynamic[ '/\'(\d)/' ] = $apos . '$1'; // '99
+ }
+ if ( "'" != $opening_single_quote )
+ $dynamic[ '/(\s|\A|[([{<]|")\'/' ] = '$1' . $opening_single_quote; // opening single quote, even after (, {, <, [
+ if ( '"' != $double_prime )
+ $dynamic[ '/(\d)"/' ] = '$1' . $double_prime; // 9" (double prime)
+ if ( "'" != $prime )
+ $dynamic[ '/(\d)\'/' ] = '$1' . $prime; // 9' (prime)
+ if ( "'" != $apos )
+ $dynamic[ '/(\S)\'([^\'\s])/' ] = '$1' . $apos . '$2'; // apostrophe in a word
+ if ( '"' != $opening_quote )
+ $dynamic[ '/(\s|\A|[([{<])"(?!\s)/' ] = '$1' . $opening_quote . '$2'; // opening double quote, even after (, {, <, [
+ if ( '"' != $closing_quote )
+ $dynamic[ '/"(\s|\S|\Z)/' ] = $closing_quote . '$1'; // closing double quote
+ if ( "'" != $closing_single_quote )
+ $dynamic[ '/\'([\s.]|\Z)/' ] = $closing_single_quote . '$1'; // closing single quote
+
+ $dynamic[ '/\b(\d+)x(\d+)\b/' ] = '$1×$2'; // 9x9 (times)
+
+ $dynamic_characters = array_keys( $dynamic );
+ $dynamic_replacements = array_values( $dynamic );
}
- $static_characters = array_merge(array('---', ' -- ', '--', 'xn–', '...', '``', '\'s', '\'\'', ' (tm)'), $cockney);
- $static_replacements = array_merge(array('—', ' — ', '–', 'xn--', '…', '“', '’s', '”', ' ™'), $cockneyreplace);
+ // Transform into regexp sub-expression used in _wptexturize_pushpop_element
+ // Must do this everytime in case plugins use these filters in a context sensitive manner
+ $no_texturize_tags = '(' . implode('|', apply_filters('no_texturize_tags', $default_no_texturize_tags) ) . ')';
+ $no_texturize_shortcodes = '(' . implode('|', apply_filters('no_texturize_shortcodes', $default_no_texturize_shortcodes) ) . ')';
+
+ $no_texturize_tags_stack = array();
+ $no_texturize_shortcodes_stack = array();
- $dynamic_characters = array('/\'(\d\d(?:’|\')?s)/', '/(\s|\A|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A)"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/(\d+)x(\d+)/');
- $dynamic_replacements = array('’$1','$1‘', '$1″', '$1′', '$1’$2', '$1“$2', '”$1', '’$1', '$1×$2');
+ $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
- for ( $i = 0; $i < $stop; $i++ ) {
- $curl = $textarr[$i];
+ foreach ( $textarr as &$curl ) {
+ if ( empty( $curl ) )
+ continue;
- if ( !empty($curl) && '<' != $curl{0} && '[' != $curl{0} && $next && !$has_pre_parent) { // If it's not a tag
- // static strings
+ // Only call _wptexturize_pushpop_element if first char is correct tag opening
+ $first = $curl[0];
+ if ( '<' === $first ) {
+ _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>');
+ } elseif ( '[' === $first ) {
+ _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']');
+ } elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) {
+ // This is not a tag, nor is the texturization disabled static strings
$curl = str_replace($static_characters, $static_replacements, $curl);
// regular expressions
$curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
- } elseif (strpos($curl, ' ', "\n", $text);
- $text = str_replace('') !== false) {
- $has_pre_parent = false;
- } else {
- $next = true;
}
-
$curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl);
- $output .= $curl;
}
-
- return $output;
+ return implode( '', $textarr );
}
/**
- * Accepts matches array from preg_replace_callback in wpautop() or a string.
- *
- * Ensures that the contents of a <
"baba"
+ * "baba" won't be texturize
+ */
+
+ array_push($stack, $matches[1]);
+ }
+ } else {
+ // Closing? Check $text+2 against disabled elements
+ $c = preg_quote($closing, '/');
+ if (preg_match('/^' . $disabled_elements . $c . '/', substr($text, 2), $matches)) {
+ $last = array_pop($stack);
+
+ // Make sure it matches the opening tag
+ if ($last != $matches[1])
+ array_push($stack, $last);
+ }
+ }
}
/**
@@ -110,14 +183,45 @@ function clean_pre($matches) {
* @since 0.71
*
* @param string $pee The text which has to be formatted.
- * @param int|bool $br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true.
+ * @param bool $br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true.
* @return string Text which has been converted into correct paragraph tags.
*/
-function wpautop($pee, $br = 1) {
+function wpautop($pee, $br = true) {
+ $pre_tags = array();
+
+ if ( trim($pee) === '' )
+ return '';
+
$pee = $pee . "\n"; // just to make things a little easier, pad the end
+
+ if ( strpos($pee, '>...<
> HTML block are not
- * converted into paragraphs or line-breaks.
+ * Search for disabled element tags. Push element to stack on tag open and pop
+ * on tag close. Assumes first character of $text is tag opening.
*
- * @since 1.2.0
- *
- * @param array|string $matches The array or string
- * @return string The pre block without paragraph/line-break conversion.
- */
-function clean_pre($matches) {
- if ( is_array($matches) )
- $text = $matches[1] . $matches[2] . "";
- else
- $text = $matches;
-
- $text = str_replace('
', '', $text);
- $text = str_replace(') even if there was invalid nesting before that
+ *
+ * Example: in the case
sadsadasd
', $pee ); + $last_pee = array_pop($pee_parts); + $pee = ''; + $i = 0; + + foreach ( $pee_parts as $pee_part ) { + $start = strpos($pee_part, '"; + $pre_tags[$name] = substr( $pee_part, $start ) . ''; + + $pee .= substr( $pee_part, 0, $start ) . $name; + $i++; + } + + $pee .= $last_pee; + } + $pee = preg_replace('|
' . trim($tinkle, "\n") . "
\n"; - $pee = preg_replace('|\s*?
|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace - $pee = preg_replace('!([^<]+)\s*?((?:div|address|form)[^>]*>)!', "
$1
$2", $pee); - $pee = preg_replace( '||', "$1
", $pee ); + $pee = preg_replace('|
\s*
|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace + $pee = preg_replace('!([^<]+)(div|address|form)>!', "
$1
$2>", $pee); $pee = preg_replace('!\s*(?' . $allblocks . '[^>]*>)\s*
!', "$1", $pee); // don't pee all over a tag $pee = preg_replace("|(
]*)>|i', "', $pee); $pee = preg_replace('!', '", $pee); $pee = str_replace('
\s*(?' . $allblocks . '[^>]*>)!', "$1", $pee); $pee = preg_replace('!(?' . $allblocks . '[^>]*>)\s*
!', "$1", $pee); - if ($br) { - $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', create_function('$matches', 'return str_replace("\n", ")(.*?)!is', 'clean_pre', $pee ); $pee = preg_replace( "|\n$|", '', $pee ); - $pee = preg_replace('/
\s*?(' . get_shortcode_regex() . ')\s*<\/p>/s', '$1', $pee); // don't auto-p wrap shortcodes that stand alone
+
+ if ( !empty($pre_tags) )
+ $pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee);
return $pee;
}
+/**
+ * Newline preservation help function for wpautop
+ *
+ * @since 3.1.0
+ * @access private
+ * @param array $matches preg_replace_callback matches array
+ * @returns string
+ */
+function _autop_newline_preservation_helper( $matches ) {
+ return str_replace("\n", "
>...<
>. + * + * @since 2.9.0 + * + * @param string $pee The content. + * @return string The filtered content. + */ +function shortcode_unautop( $pee ) { + global $shortcode_tags; + + if ( empty( $shortcode_tags ) || !is_array( $shortcode_tags ) ) { + return $pee; + } + + $tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) ); + + $pattern = + '/' + . '' // Opening paragraph
+ . '\\s*+' // Optional leading whitespace
+ . '(' // 1: The shortcode
+ . '\\[' // Opening bracket
+ . "($tagregexp)" // 2: Shortcode name
+ . '\\b' // Word boundary
+ // Unroll the loop: Inside the opening shortcode tag
+ . '[^\\]\\/]*' // Not a closing bracket or forward slash
+ . '(?:'
+ . '\\/(?!\\])' // A forward slash not followed by a closing bracket
+ . '[^\\]\\/]*' // Not a closing bracket or forward slash
+ . ')*?'
+ . '(?:'
+ . '\\/\\]' // Self closing tag and closing bracket
+ . '|'
+ . '\\]' // Closing bracket
+ . '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags
+ . '[^\\[]*+' // Not an opening bracket
+ . '(?:'
+ . '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag
+ . '[^\\[]*+' // Not an opening bracket
+ . ')*+'
+ . '\\[\\/\\2\\]' // Closing shortcode tag
+ . ')?'
+ . ')'
+ . ')'
+ . '\\s*+' // optional trailing whitespace
+ . '<\\/p>' // closing paragraph
+ . '/s';
+
+ return preg_replace( $pattern, '$1', $pee );
+}
+
/**
* Checks to see if a string is utf8 encoded.
*
- * @author bmorel at ssi dot fr
+ * NOTE: This function checks for 5-Byte sequences, UTF8
+ * has Bytes Sequences with a maximum length of 4.
*
+ * @author bmorel at ssi dot fr (modified)
* @since 1.2.1
*
- * @param string $Str The string to be checked
- * @return bool True if $Str fits a UTF-8 model, false otherwise.
+ * @param string $str The string to be checked
+ * @return bool True if $str fits a UTF-8 model, false otherwise.
*/
-function seems_utf8($Str) { # by bmorel at ssi dot fr
- $length = strlen($Str);
+function seems_utf8($str) {
+ $length = strlen($str);
for ($i=0; $i < $length; $i++) {
- if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb
- elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb
- elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb
- elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb
- elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb
- elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b
+ $c = ord($str[$i]);
+ if ($c < 0x80) $n = 0; # 0bbbbbbb
+ elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb
+ elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb
+ elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb
+ elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb
+ elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b
else return false; # Does not match any model
for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
- if ((++$i == $length) || ((ord($Str[$i]) & 0xC0) != 0x80))
- return false;
+ if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80))
+ return false;
}
}
return true;
@@ -196,41 +368,37 @@ function seems_utf8($Str) { # by bmorel at ssi dot fr
* @param string $string The text which is to be encoded.
* @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
* @param string $charset Optional. The character encoding of the string. Default is false.
- * @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false.
+ * @param boolean $double_encode Optional. Whether to encode existing html entities. Default is false.
* @return string The encoded text with HTML entities.
*/
-function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false )
-{
+function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) {
$string = (string) $string;
- if ( 0 === strlen( $string ) ) {
+ if ( 0 === strlen( $string ) )
return '';
- }
// Don't bother if there are no specialchars - saves some processing
- if ( !preg_match( '/[&<>"\']/', $string ) ) {
+ if ( ! preg_match( '/[&<>"\']/', $string ) )
return $string;
- }
// Account for the previous behaviour of the function when the $quote_style is not an accepted value
- if ( empty( $quote_style ) ) {
+ if ( empty( $quote_style ) )
$quote_style = ENT_NOQUOTES;
- } elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
+ elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) )
$quote_style = ENT_QUOTES;
- }
// Store the site charset as a static to avoid multiple calls to wp_load_alloptions()
- if ( !$charset ) {
+ if ( ! $charset ) {
static $_charset;
- if ( !isset( $_charset ) ) {
+ if ( ! isset( $_charset ) ) {
$alloptions = wp_load_alloptions();
$_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : '';
}
$charset = $_charset;
}
- if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) {
+
+ if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) )
$charset = 'UTF-8';
- }
$_quote_style = $quote_style;
@@ -242,22 +410,27 @@ function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false
}
// Handle double encoding ourselves
- if ( !$double_encode ) {
+ if ( $double_encode ) {
+ $string = @htmlspecialchars( $string, $quote_style, $charset );
+ } else {
+ // Decode & into &
$string = wp_specialchars_decode( $string, $_quote_style );
- $string = preg_replace( '/&(#?x?[0-9]+|[a-z]+);/i', '|wp_entity|$1|/wp_entity|', $string );
- }
- $string = @htmlspecialchars( $string, $quote_style, $charset );
+ // Guarantee every &entity; is valid or re-encode the &
+ $string = wp_kses_normalize_entities( $string );
- // Handle double encoding ourselves
- if ( !$double_encode ) {
- $string = str_replace( array( '|wp_entity|', '|/wp_entity|' ), array( '&', ';' ), $string );
+ // Now re-encode everything except &entity;
+ $string = preg_split( '/(?x?[0-9a-z]+;)/i', $string, -1, PREG_SPLIT_DELIM_CAPTURE );
+
+ for ( $i = 0; $i < count( $string ); $i += 2 )
+ $string[$i] = @htmlspecialchars( $string[$i], $quote_style, $charset );
+
+ $string = implode( '', $string );
}
// Backwards compatibility
- if ( 'single' === $_quote_style ) {
+ if ( 'single' === $_quote_style )
$string = str_replace( "'", ''', $string );
- }
return $string;
}
@@ -273,11 +446,10 @@ function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false
* @since 2.8
*
* @param string $string The text which is to be decoded.
- * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
+ * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old _wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
* @return string The decoded text without HTML entities.
*/
-function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES )
-{
+function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) {
$string = (string) $string;
if ( 0 === strlen( $string ) ) {
@@ -289,7 +461,7 @@ function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES )
return $string;
}
- // Match the previous behaviour of wp_specialchars() when the $quote_style is not an accepted value
+ // Match the previous behaviour of _wp_specialchars() when the $quote_style is not an accepted value
if ( empty( $quote_style ) ) {
$quote_style = ENT_NOQUOTES;
} elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
@@ -334,8 +506,7 @@ function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES )
* @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false.
* @return string The checked text.
*/
-function wp_check_invalid_utf8( $string, $strip = false )
-{
+function wp_check_invalid_utf8( $string, $strip = false ) {
$string = (string) $string;
if ( 0 === strlen( $string ) ) {
@@ -441,34 +612,38 @@ function remove_accents($string) {
if (seems_utf8($string)) {
$chars = array(
// Decompositions for Latin-1 Supplement
+ chr(194).chr(170) => 'a', chr(194).chr(186) => 'o',
chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
- chr(195).chr(135) => 'C', chr(195).chr(136) => 'E',
- chr(195).chr(137) => 'E', chr(195).chr(138) => 'E',
- chr(195).chr(139) => 'E', chr(195).chr(140) => 'I',
- chr(195).chr(141) => 'I', chr(195).chr(142) => 'I',
- chr(195).chr(143) => 'I', chr(195).chr(145) => 'N',
+ chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C',
+ chr(195).chr(136) => 'E', chr(195).chr(137) => 'E',
+ chr(195).chr(138) => 'E', chr(195).chr(139) => 'E',
+ chr(195).chr(140) => 'I', chr(195).chr(141) => 'I',
+ chr(195).chr(142) => 'I', chr(195).chr(143) => 'I',
+ chr(195).chr(144) => 'D', chr(195).chr(145) => 'N',
chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
- chr(195).chr(159) => 's', chr(195).chr(160) => 'a',
- chr(195).chr(161) => 'a', chr(195).chr(162) => 'a',
- chr(195).chr(163) => 'a', chr(195).chr(164) => 'a',
- chr(195).chr(165) => 'a', chr(195).chr(167) => 'c',
+ chr(195).chr(158) => 'TH',chr(195).chr(159) => 's',
+ chr(195).chr(160) => 'a', chr(195).chr(161) => 'a',
+ chr(195).chr(162) => 'a', chr(195).chr(163) => 'a',
+ chr(195).chr(164) => 'a', chr(195).chr(165) => 'a',
+ chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c',
chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
- chr(195).chr(177) => 'n', chr(195).chr(178) => 'o',
- chr(195).chr(179) => 'o', chr(195).chr(180) => 'o',
- chr(195).chr(181) => 'o', chr(195).chr(182) => 'o',
- chr(195).chr(182) => 'o', chr(195).chr(185) => 'u',
- chr(195).chr(186) => 'u', chr(195).chr(187) => 'u',
- chr(195).chr(188) => 'u', chr(195).chr(189) => 'y',
- chr(195).chr(191) => 'y',
+ chr(195).chr(176) => 'd', chr(195).chr(177) => 'n',
+ chr(195).chr(178) => 'o', chr(195).chr(179) => 'o',
+ chr(195).chr(180) => 'o', chr(195).chr(181) => 'o',
+ chr(195).chr(182) => 'o', chr(195).chr(184) => 'o',
+ chr(195).chr(185) => 'u', chr(195).chr(186) => 'u',
+ chr(195).chr(187) => 'u', chr(195).chr(188) => 'u',
+ chr(195).chr(189) => 'y', chr(195).chr(190) => 'th',
+ chr(195).chr(191) => 'y', chr(195).chr(152) => 'O',
// Decompositions for Latin Extended-A
chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
@@ -534,10 +709,68 @@ function remove_accents($string) {
chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
+ // Decompositions for Latin Extended-B
+ chr(200).chr(152) => 'S', chr(200).chr(153) => 's',
+ chr(200).chr(154) => 'T', chr(200).chr(155) => 't',
// Euro Sign
chr(226).chr(130).chr(172) => 'E',
// GBP (Pound) Sign
- chr(194).chr(163) => '');
+ chr(194).chr(163) => '',
+ // Vowels with diacritic (Vietnamese)
+ // unmarked
+ chr(198).chr(160) => 'O', chr(198).chr(161) => 'o',
+ chr(198).chr(175) => 'U', chr(198).chr(176) => 'u',
+ // grave accent
+ chr(225).chr(186).chr(166) => 'A', chr(225).chr(186).chr(167) => 'a',
+ chr(225).chr(186).chr(176) => 'A', chr(225).chr(186).chr(177) => 'a',
+ chr(225).chr(187).chr(128) => 'E', chr(225).chr(187).chr(129) => 'e',
+ chr(225).chr(187).chr(146) => 'O', chr(225).chr(187).chr(147) => 'o',
+ chr(225).chr(187).chr(156) => 'O', chr(225).chr(187).chr(157) => 'o',
+ chr(225).chr(187).chr(170) => 'U', chr(225).chr(187).chr(171) => 'u',
+ chr(225).chr(187).chr(178) => 'Y', chr(225).chr(187).chr(179) => 'y',
+ // hook
+ chr(225).chr(186).chr(162) => 'A', chr(225).chr(186).chr(163) => 'a',
+ chr(225).chr(186).chr(168) => 'A', chr(225).chr(186).chr(169) => 'a',
+ chr(225).chr(186).chr(178) => 'A', chr(225).chr(186).chr(179) => 'a',
+ chr(225).chr(186).chr(186) => 'E', chr(225).chr(186).chr(187) => 'e',
+ chr(225).chr(187).chr(130) => 'E', chr(225).chr(187).chr(131) => 'e',
+ chr(225).chr(187).chr(136) => 'I', chr(225).chr(187).chr(137) => 'i',
+ chr(225).chr(187).chr(142) => 'O', chr(225).chr(187).chr(143) => 'o',
+ chr(225).chr(187).chr(148) => 'O', chr(225).chr(187).chr(149) => 'o',
+ chr(225).chr(187).chr(158) => 'O', chr(225).chr(187).chr(159) => 'o',
+ chr(225).chr(187).chr(166) => 'U', chr(225).chr(187).chr(167) => 'u',
+ chr(225).chr(187).chr(172) => 'U', chr(225).chr(187).chr(173) => 'u',
+ chr(225).chr(187).chr(182) => 'Y', chr(225).chr(187).chr(183) => 'y',
+ // tilde
+ chr(225).chr(186).chr(170) => 'A', chr(225).chr(186).chr(171) => 'a',
+ chr(225).chr(186).chr(180) => 'A', chr(225).chr(186).chr(181) => 'a',
+ chr(225).chr(186).chr(188) => 'E', chr(225).chr(186).chr(189) => 'e',
+ chr(225).chr(187).chr(132) => 'E', chr(225).chr(187).chr(133) => 'e',
+ chr(225).chr(187).chr(150) => 'O', chr(225).chr(187).chr(151) => 'o',
+ chr(225).chr(187).chr(160) => 'O', chr(225).chr(187).chr(161) => 'o',
+ chr(225).chr(187).chr(174) => 'U', chr(225).chr(187).chr(175) => 'u',
+ chr(225).chr(187).chr(184) => 'Y', chr(225).chr(187).chr(185) => 'y',
+ // acute accent
+ chr(225).chr(186).chr(164) => 'A', chr(225).chr(186).chr(165) => 'a',
+ chr(225).chr(186).chr(174) => 'A', chr(225).chr(186).chr(175) => 'a',
+ chr(225).chr(186).chr(190) => 'E', chr(225).chr(186).chr(191) => 'e',
+ chr(225).chr(187).chr(144) => 'O', chr(225).chr(187).chr(145) => 'o',
+ chr(225).chr(187).chr(154) => 'O', chr(225).chr(187).chr(155) => 'o',
+ chr(225).chr(187).chr(168) => 'U', chr(225).chr(187).chr(169) => 'u',
+ // dot below
+ chr(225).chr(186).chr(160) => 'A', chr(225).chr(186).chr(161) => 'a',
+ chr(225).chr(186).chr(172) => 'A', chr(225).chr(186).chr(173) => 'a',
+ chr(225).chr(186).chr(182) => 'A', chr(225).chr(186).chr(183) => 'a',
+ chr(225).chr(186).chr(184) => 'E', chr(225).chr(186).chr(185) => 'e',
+ chr(225).chr(187).chr(134) => 'E', chr(225).chr(187).chr(135) => 'e',
+ chr(225).chr(187).chr(138) => 'I', chr(225).chr(187).chr(139) => 'i',
+ chr(225).chr(187).chr(140) => 'O', chr(225).chr(187).chr(141) => 'o',
+ chr(225).chr(187).chr(152) => 'O', chr(225).chr(187).chr(153) => 'o',
+ chr(225).chr(187).chr(162) => 'O', chr(225).chr(187).chr(163) => 'o',
+ chr(225).chr(187).chr(164) => 'U', chr(225).chr(187).chr(165) => 'u',
+ chr(225).chr(187).chr(176) => 'U', chr(225).chr(187).chr(177) => 'u',
+ chr(225).chr(187).chr(180) => 'Y', chr(225).chr(187).chr(181) => 'y',
+ );
$string = strtr($string, $chars);
} else {
@@ -565,38 +798,69 @@ function remove_accents($string) {
}
/**
- * Filters certain characters from the file name.
+ * Sanitizes a filename replacing whitespace with dashes
*
- * Turns all strings to lowercase removing most characters except alphanumeric
- * with spaces, dashes and periods. All spaces and underscores are converted to
- * dashes. Multiple dashes are converted to a single dash. Finally, if the file
- * name ends with a dash, it is removed.
+ * Removes special characters that are illegal in filenames on certain
+ * operating systems and special characters requiring special escaping
+ * to manipulate at the command line. Replaces spaces and consecutive
+ * dashes with a single dash. Trim period, dash and underscore from beginning
+ * and end of filename.
*
* @since 2.1.0
*
- * @param string $name The file name
- * @return string Sanitized file name
- */
-function sanitize_file_name( $name ) { // Like sanitize_title, but with periods
- $name = strtolower( $name );
- $name = preg_replace('/&.+?;/', '', $name); // kill entities
- $name = str_replace( '_', '-', $name );
- $name = preg_replace('/[^a-z0-9\s-.]/', '', $name);
- $name = preg_replace('/\s+/', '-', $name);
- $name = preg_replace('|-+|', '-', $name);
- $name = trim($name, '-');
- return $name;
+ * @param string $filename The filename to be sanitized
+ * @return string The sanitized filename
+ */
+function sanitize_file_name( $filename ) {
+ $filename_raw = $filename;
+ $special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", chr(0));
+ $special_chars = apply_filters('sanitize_file_name_chars', $special_chars, $filename_raw);
+ $filename = str_replace($special_chars, '', $filename);
+ $filename = preg_replace('/[\s-]+/', '-', $filename);
+ $filename = trim($filename, '.-_');
+
+ // Split the filename into a base and extension[s]
+ $parts = explode('.', $filename);
+
+ // Return if only one extension
+ if ( count($parts) <= 2 )
+ return apply_filters('sanitize_file_name', $filename, $filename_raw);
+
+ // Process multiple extensions
+ $filename = array_shift($parts);
+ $extension = array_pop($parts);
+ $mimes = get_allowed_mime_types();
+
+ // Loop over any intermediate extensions. Munge them with a trailing underscore if they are a 2 - 5 character
+ // long alpha string not in the extension whitelist.
+ foreach ( (array) $parts as $part) {
+ $filename .= '.' . $part;
+
+ if ( preg_match("/^[a-zA-Z]{2,5}\d?$/", $part) ) {
+ $allowed = false;
+ foreach ( $mimes as $ext_preg => $mime_match ) {
+ $ext_preg = '!^(' . $ext_preg . ')$!i';
+ if ( preg_match( $ext_preg, $part ) ) {
+ $allowed = true;
+ break;
+ }
+ }
+ if ( !$allowed )
+ $filename .= '_';
+ }
+ }
+ $filename .= '.' . $extension;
+
+ return apply_filters('sanitize_file_name', $filename, $filename_raw);
}
/**
* Sanitize username stripping out unsafe characters.
*
- * If $strict is true, only alphanumeric characters (as well as _, space, ., -,
- * @) are returned.
- * Removes tags, octets, entities, and if strict is enabled, will remove all
- * non-ASCII characters. After sanitizing, it passes the username, raw username
- * (the username in the parameter), and the strict parameter as parameters for
- * the filter.
+ * Removes tags, octets, entities, and if strict is enabled, will only keep
+ * alphanumeric, _, space, ., -, @. After sanitizing, it passes the username,
+ * raw username (the username in the parameter), and the value of $strict as
+ * parameters for the 'sanitize_user' filter.
*
* @since 2.0.0
* @uses apply_filters() Calls 'sanitize_user' hook on username, raw username,
@@ -608,19 +872,38 @@ function sanitize_file_name( $name ) { // Like sanitize_title, but with periods
*/
function sanitize_user( $username, $strict = false ) {
$raw_username = $username;
- $username = strip_tags($username);
+ $username = wp_strip_all_tags( $username );
+ $username = remove_accents( $username );
// Kill octets
- $username = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '', $username);
- $username = preg_replace('/&.+?;/', '', $username); // Kill entities
+ $username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username );
+ $username = preg_replace( '/&.+?;/', '', $username ); // Kill entities
// If strict, reduce to ASCII for max portability.
if ( $strict )
- $username = preg_replace('|[^a-z0-9 _.\-@]|i', '', $username);
+ $username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username );
+ $username = trim( $username );
// Consolidate contiguous whitespace
- $username = preg_replace('|\s+|', ' ', $username);
+ $username = preg_replace( '|\s+|', ' ', $username );
- return apply_filters('sanitize_user', $username, $raw_username, $strict);
+ return apply_filters( 'sanitize_user', $username, $raw_username, $strict );
+}
+
+/**
+ * Sanitize a string key.
+ *
+ * Keys are used as internal identifiers. Lowercase alphanumeric characters, dashes and underscores are allowed.
+ *
+ * @since 3.0.0
+ *
+ * @param string $key String key
+ * @return string Sanitized key
+ */
+function sanitize_key( $key ) {
+ $raw_key = $key;
+ $key = strtolower( $key );
+ $key = preg_replace( '/[^a-z0-9_\-]/', '', $key );
+ return apply_filters( 'sanitize_key', $key, $raw_key );
}
/**
@@ -634,11 +917,16 @@ function sanitize_user( $username, $strict = false ) {
*
* @param string $title The string to be sanitized.
* @param string $fallback_title Optional. A title to use if $title is empty.
+ * @param string $context Optional. The operation for which the string is sanitized
* @return string The sanitized string.
*/
-function sanitize_title($title, $fallback_title = '') {
- $title = strip_tags($title);
- $title = apply_filters('sanitize_title', $title);
+function sanitize_title($title, $fallback_title = '', $context = 'save') {
+ $raw_title = $title;
+
+ if ( 'save' == $context )
+ $title = remove_accents($title);
+
+ $title = apply_filters('sanitize_title', $title, $raw_title, $context);
if ( '' === $title || false === $title )
$title = $fallback_title;
@@ -646,8 +934,12 @@ function sanitize_title($title, $fallback_title = '') {
return $title;
}
+function sanitize_title_for_query($title) {
+ return sanitize_title($title, '', 'query');
+}
+
/**
- * Sanitizes title, replacing whitespace with dashes.
+ * Sanitizes title, replacing whitespace and a few other characters with dashes.
*
* Limits the output to alphanumeric characters, underscore (_) and dash (-).
* Whitespace becomes a dash.
@@ -655,9 +947,11 @@ function sanitize_title($title, $fallback_title = '') {
* @since 1.2.0
*
* @param string $title The title to be sanitized.
+ * @param string $raw_title Optional. Not used.
+ * @param string $context Optional. The operation for which the string is sanitized.
* @return string The sanitized title.
*/
-function sanitize_title_with_dashes($title) {
+function sanitize_title_with_dashes($title, $raw_title = '', $context = 'display') {
$title = strip_tags($title);
// Preserve escaped octets.
$title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
@@ -666,7 +960,6 @@ function sanitize_title_with_dashes($title) {
// Restore octets.
$title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
- $title = remove_accents($title);
if (seems_utf8($title)) {
if (function_exists('mb_strtolower')) {
$title = mb_strtolower($title, 'UTF-8');
@@ -676,6 +969,29 @@ function sanitize_title_with_dashes($title) {
$title = strtolower($title);
$title = preg_replace('/&.+?;/', '', $title); // kill entities
+ $title = str_replace('.', '-', $title);
+
+ if ( 'save' == $context ) {
+ // Convert nbsp, ndash and mdash to hyphens
+ $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title );
+
+ // Strip these characters entirely
+ $title = str_replace( array(
+ // iexcl and iquest
+ '%c2%a1', '%c2%bf',
+ // angle quotes
+ '%c2%ab', '%c2%bb', '%e2%80%b9', '%e2%80%ba',
+ // curly quotes
+ '%e2%80%98', '%e2%80%99', '%e2%80%9c', '%e2%80%9d',
+ '%e2%80%9a', '%e2%80%9b', '%e2%80%9e', '%e2%80%9f',
+ // copy, reg, deg, hellip and trade
+ '%c2%a9', '%c2%ae', '%c2%b0', '%e2%80%a6', '%e2%84%a2',
+ ), '', $title );
+
+ // Convert times to x
+ $title = str_replace( '%c3%97', 'x', $title );
+ }
+
$title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
$title = preg_replace('/\s+/', '-', $title);
$title = preg_replace('|-+|', '-', $title);
@@ -702,6 +1018,34 @@ function sanitize_sql_orderby( $orderby ){
return $orderby;
}
+/**
+ * Santizes a html classname to ensure it only contains valid characters
+ *
+ * Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty
+ * string then it will return the alternative value supplied.
+ *
+ * @todo Expand to support the full range of CDATA that a class attribute can contain.
+ *
+ * @since 2.8.0
+ *
+ * @param string $class The classname to be sanitized
+ * @param string $fallback Optional. The value to return if the sanitization end's up as an empty string.
+ * Defaults to an empty string.
+ * @return string The sanitized value
+ */
+function sanitize_html_class( $class, $fallback = '' ) {
+ //Strip out any % encoded octets
+ $sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $class );
+
+ //Limit to A-Z,a-z,0-9,_,-
+ $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized );
+
+ if ( '' == $sanitized )
+ $sanitized = $fallback;
+
+ return apply_filters( 'sanitize_html_class', $sanitized, $class, $fallback );
+}
+
/**
* Converts a number of characters from a string.
*
@@ -716,6 +1060,9 @@ function sanitize_sql_orderby( $orderby ){
* @return string Converted string.
*/
function convert_chars($content, $deprecated = '') {
+ if ( !empty( $deprecated ) )
+ _deprecated_argument( __FUNCTION__, '0.71' );
+
// Translation of invalid Unicode references range to valid range
$wp_htmltranswinuni = array(
'' => '€', // the Euro sign
@@ -732,7 +1079,7 @@ function convert_chars($content, $deprecated = '') {
'' => '‹',
'' => 'Œ',
'' => '',
- '' => 'ž',
+ '' => 'Ž',
'' => '',
'' => '',
'' => '‘',
@@ -748,7 +1095,7 @@ function convert_chars($content, $deprecated = '') {
'' => '›',
'' => 'œ',
'' => '',
- '' => '',
+ '' => 'ž',
'' => 'Ÿ'
);
@@ -769,40 +1116,15 @@ function convert_chars($content, $deprecated = '') {
return $content;
}
-/**
- * Fixes javascript bugs in browsers.
- *
- * Converts unicode characters to HTML numbered entities.
- *
- * @since 1.5.0
- * @uses $is_macIE
- * @uses $is_winIE
- *
- * @param string $text Text to be made safe.
- * @return string Fixed text.
- */
-function funky_javascript_fix($text) {
- // Fixes for browsers' javascript bugs
- global $is_macIE, $is_winIE;
-
- /** @todo use preg_replace_callback() instead */
- if ( $is_winIE || $is_macIE )
- $text = preg_replace("/\%u([0-9A-F]{4,4})/e", "''.base_convert('\\1',16,10).';'", $text);
-
- return $text;
-}
-
/**
* Will only balance the tags if forced to and the option is set to balance tags.
*
- * The option 'use_balanceTags' is used for whether the tags will be balanced.
- * Both the $force parameter and 'use_balanceTags' option will have to be true
- * before the tags will be balanced.
+ * The option 'use_balanceTags' is used to determine whether the tags will be balanced.
*
* @since 0.71
*
* @param string $text Text to be balanced
- * @param bool $force Forces balancing, ignoring the value of the option. Default false.
+ * @param bool $force If true, forces balancing, ignoring the value of the option. Default false.
* @return string Balanced text
*/
function balanceTags( $text, $force = false ) {
@@ -817,7 +1139,7 @@ function balanceTags( $text, $force = false ) {
* @since 2.0.4
*
* @author Leonard Lin