X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/6359b807ff8b4ffa151d8756cdefb861c6c1d4db..f739362bc2205036998dacdccb4f07eda9048877:/wp-includes/formatting.php?ds=sidebyside diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 0151e1fc..2701c1a1 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -20,17 +20,43 @@ * * Code within certain html blocks are skipped. * + * Do not use this function before the {@see 'init'} action hook; everything will break. + * * @since 0.71 - * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases + * + * @global array $wp_cockneyreplace Array of formatted entities for certain common phrases + * @global array $shortcode_tags + * @staticvar array $static_characters + * @staticvar array $static_replacements + * @staticvar array $dynamic_characters + * @staticvar array $dynamic_replacements + * @staticvar array $default_no_texturize_tags + * @staticvar array $default_no_texturize_shortcodes + * @staticvar bool $run_texturize * * @param string $text The text to be formatted - * @param bool $reset Set to true for unit testing. Translated patterns will reset. + * @param bool $reset Set to true for unit testing. Translated patterns will reset. * @return string The string replaced with html entities */ -function wptexturize($text, $reset = false) { +function wptexturize( $text, $reset = false ) { global $wp_cockneyreplace, $shortcode_tags; - static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements, - $default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true; + static $static_characters = null, + $static_replacements = null, + $dynamic_characters = null, + $dynamic_replacements = null, + $default_no_texturize_tags = null, + $default_no_texturize_shortcodes = null, + $run_texturize = true, + $apos = null, + $prime = null, + $double_prime = null, + $opening_quote = null, + $closing_quote = null, + $opening_single_quote = null, + $closing_single_quote = null, + $open_q_flag = '', + $open_sq_flag = '', + $apos_flag = ''; // If there's nothing to do, just stop. if ( empty( $text ) || false === $run_texturize ) { @@ -40,7 +66,7 @@ function wptexturize($text, $reset = false) { // Set up static variables. Run once only. if ( $reset || ! isset( $static_characters ) ) { /** - * Filter whether to skip running wptexturize(). + * Filters whether to skip running wptexturize(). * * Passing false to the filter will effectively short-circuit wptexturize(). * returning the original text passed to the function instead. @@ -88,11 +114,16 @@ function wptexturize($text, $reset = false) { if ( isset($wp_cockneyreplace) ) { $cockney = array_keys( $wp_cockneyreplace ); $cockneyreplace = array_values( $wp_cockneyreplace ); - } elseif ( "'" != $apos ) { // Only bother if we're doing a replacement. - $cockney = array( "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause", "'em" ); - $cockneyreplace = array( $apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause", $apos . "em" ); } else { - $cockney = $cockneyreplace = array(); + /* translators: This is a comma-separated list of words that defy the syntax of quotations in normal use, + * for example... 'We do not have enough words yet' ... is a typical quoted phrase. But when we write + * lines of code 'til we have enough of 'em, then we need to insert apostrophes instead of quotes. + */ + $cockney = explode( ',', _x( "'tain't,'twere,'twas,'tis,'twill,'til,'bout,'nuff,'round,'cause,'em", + 'Comma-separated list of words to texturize in your language' ) ); + + $cockneyreplace = explode( ',', _x( '’tain’t,’twere,’twas,’tis,’twill,’til,’bout,’nuff,’round,’cause,’em', + 'Comma-separated list of replacement words in your language' ) ); } $static_characters = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney ); @@ -108,40 +139,30 @@ function wptexturize($text, $reset = false) { // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. if ( "'" !== $apos || "'" !== $closing_single_quote ) { - $dynamic[ '/\'(\d\d)\'(?=\Z|[.,)}\-\]]|>|' . $spaces . ')/' ] = $apos . '$1' . $closing_single_quote; + $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote; } if ( "'" !== $apos || '"' !== $closing_quote ) { - $dynamic[ '/\'(\d\d)"(?=\Z|[.,)}\-\]]|>|' . $spaces . ')/' ] = $apos . '$1' . $closing_quote; + $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote; } // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. if ( "'" !== $apos ) { - $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos; + $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos_flag; } // Quoted Numbers like '0.42' if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { - $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $opening_single_quote . '$1' . $closing_single_quote; + $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote; } // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. if ( "'" !== $opening_single_quote ) { - $dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $opening_single_quote; + $dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $open_sq_flag; } // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. if ( "'" !== $apos ) { - $dynamic[ '/(? is found. - . '-(?!->)' // Dash not followed by end of comment. - . '[^\-]*+' // Consume non-dashes. - . ')*+' // Loop possessively. - . '(?:-->)?'; // End of comment. If not found, match all input. - - $shortcode_regex = - '\[' // Find start of shortcode. - . '[\/\[]?' // Shortcodes may begin with [/ or [[ - . $tagregexp // Only match registered shortcodes, because performance. - . '(?:' - . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. - . '|' - . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. - . ')*+' // Possessive critical. - . '\]' // Find end of shortcode. - . '\]?'; // Shortcodes may end with ]] - - $regex = - '/(' // Capture the entire match. - . '<' // Find start of element. - . '(?(?=!--)' // Is this a comment? - . $comment_regex // Find end of comment. - . '|' - . '[^>]*>' // Find end of element. - . ')' - . '|' - . $shortcode_regex // Find shortcodes. - . ')/s'; + preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20=]++)@', $text, $matches ); + $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); + $found_shortcodes = ! empty( $tagnames ); + $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : ''; + $regex = _get_wptexturize_split_regex( $shortcode_regex ); $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); foreach ( $textarr as &$curl ) { // Only call _wptexturize_pushpop_element if $curl is a delimiter. $first = $curl[0]; - if ( '<' === $first && ''; + $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|>|" . $spaces . ")/"; + $prime_pattern = "/(?<=\\d)$needle/"; + $flag_after_digit = "/(?<=\\d)$flag/"; + $flag_no_digit = "/(? &$sentence ) { + if ( false === strpos( $sentence, $needle ) ) { + continue; + } elseif ( 0 !== $key && 0 === substr_count( $sentence, $close_quote ) ) { + $sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count ); + if ( $count > 1 ) { + // This sentence appears to have multiple closing quotes. Attempt Vulcan logic. + $sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 ); + if ( 0 === $count2 ) { + // Try looking for a quote followed by a period. + $count2 = substr_count( $sentence, "$flag." ); + if ( $count2 > 0 ) { + // Assume the rightmost quote-period match is the end of quotation. + $pos = strrpos( $sentence, "$flag." ); + } else { + // When all else fails, make the rightmost candidate a closing quote. + // This is most likely to be problematic in the context of bug #18549. + $pos = strrpos( $sentence, $flag ); + } + $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) ); + } + // Use conventional replacement on any remaining primes and quotes. + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + $sentence = preg_replace( $flag_after_digit, $prime, $sentence ); + $sentence = str_replace( $flag, $close_quote, $sentence ); + } elseif ( 1 == $count ) { + // Found only one closing quote candidate, so give it priority over primes. + $sentence = str_replace( $flag, $close_quote, $sentence ); + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + } else { + // No closing quotes found. Just run primes pattern. + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + } + } else { + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + $sentence = preg_replace( $quote_pattern, $close_quote, $sentence ); + } + if ( '"' == $needle && false !== strpos( $sentence, '"' ) ) { + $sentence = str_replace( '"', $close_quote, $sentence ); + } + } - return $text; + return implode( $open_quote, $sentences ); } /** @@ -313,12 +369,12 @@ function wptexturize($text, $reset = false) { * @access private * * @param string $text Text to check. Must be a tag like `` or `[shortcode]`. - * @param array $stack List of open tag elements. - * @param array $disabled_elements The tag names to match against. Spaces are not allowed in tag names. + * @param array $stack List of open tag elements. + * @param array $disabled_elements The tag names to match against. Spaces are not allowed in tag names. */ -function _wptexturize_pushpop_element($text, &$stack, $disabled_elements) { +function _wptexturize_pushpop_element( $text, &$stack, $disabled_elements ) { // Is it an opening tag or closing tag? - if ( '/' !== $text[1] ) { + if ( isset( $text[1] ) && '/' !== $text[1] ) { $opening_tag = true; $name_offset = 1; } elseif ( 0 == count( $stack ) ) { @@ -370,7 +426,7 @@ function _wptexturize_pushpop_element($text, &$stack, $disabled_elements) { * after paragraphing. Default true. * @return string Text which has been converted into correct paragraph tags. */ -function wpautop($pee, $br = true) { +function wpautop( $pee, $br = true ) { $pre_tags = array(); if ( trim($pee) === '' ) @@ -408,12 +464,12 @@ function wpautop($pee, $br = true) { $pee .= $last_pee; } // Change multiple
s into two line breaks, which will turn into paragraphs. - $pee = preg_replace('|
\s*
|', "\n\n", $pee); + $pee = preg_replace('|\s*|', "\n\n", $pee); $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'; // Add a single line break above block-level opening tags. - $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee); + $pee = preg_replace('!(<' . $allblocks . '[\s/>])!', "\n$1", $pee); // Add a double line break below block-level closing tags. $pee = preg_replace('!()!', "$1\n\n", $pee); @@ -465,24 +521,24 @@ function wpautop($pee, $br = true) { } // Under certain strange conditions it could create a P of entirely whitespace. - $pee = preg_replace('|

\s*

|', '', $pee); + $pee = preg_replace('|

\s*

|', '', $pee); // Add a closing

inside

,
, or
tag if missing. $pee = preg_replace('!

([^<]+)!', "

$1

", $pee); - + // If an opening or closing block element tag is wrapped in a

, unwrap it. - $pee = preg_replace('!

\s*(]*>)\s*

!', "$1", $pee); - + $pee = preg_replace('!

\s*(]*>)\s*

!', "$1", $pee); + // In some cases
  • may get wrapped in

    , fix them. - $pee = preg_replace("|

    (|", "$1", $pee); - + $pee = preg_replace("|

    (|", "$1", $pee); + // If a

    is wrapped with a

    , move it inside the

    . $pee = preg_replace('|

    ]*)>|i', "

    ", $pee); $pee = str_replace('

    ', '

    ', $pee); - + // If an opening or closing block element tag is preceded by an opening

    tag, remove it. $pee = preg_replace('!

    \s*(]*>)!', "$1", $pee); - + // If an opening or closing block element tag is followed by a closing

    tag, remove it. $pee = preg_replace('!(]*>)\s*

    !', "$1", $pee); @@ -491,8 +547,11 @@ function wpautop($pee, $br = true) { // Replace newlines that shouldn't be touched with a placeholder. $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', '_autop_newline_preservation_helper', $pee); + // Normalize
    + $pee = str_replace( array( '
    ', '
    ' ), '
    ', $pee ); + // Replace any new line characters that aren't preceded by a
    with a
    . - $pee = preg_replace('|(?)\s*\n|', "
    \n", $pee); + $pee = preg_replace('|(?)\s*\n|', "
    \n", $pee); // Replace newline placeholders with newlines. $pee = str_replace('', "\n", $pee); @@ -500,7 +559,7 @@ function wpautop($pee, $br = true) { // If a
    tag is after an opening or closing block tag, remove it. $pee = preg_replace('!(]*>)\s*
    !', "$1", $pee); - + // If a
    tag is before a subset of opening or closing block tags, remove it. $pee = preg_replace('!
    (\s*]*>)!', '$1', $pee); $pee = preg_replace( "|\n

    $|", '

    ', $pee ); @@ -510,7 +569,9 @@ function wpautop($pee, $br = true) { $pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee); // Restore newlines in all elements. - $pee = str_replace( " ", "\n", $pee ); + if ( false !== strpos( $pee, '' ) ) { + $pee = str_replace( array( ' ', '' ), "\n", $pee ); + } return $pee; } @@ -524,6 +585,17 @@ function wpautop($pee, $br = true) { * @return array The formatted text. */ function wp_html_split( $input ) { + return preg_split( get_html_split_regex(), $input, -1, PREG_SPLIT_DELIM_CAPTURE ); +} + +/** + * Retrieve the regular expression for an HTML element. + * + * @since 4.4.0 + * + * @return string The regular expression + */ +function get_html_split_regex() { static $regex; if ( ! isset( $regex ) ) { @@ -544,22 +616,100 @@ function wp_html_split( $input ) { . ')*+' // Loop possessively. . '(?:]]>)?'; // End of comment. If not found, match all input. + $escaped = + '(?=' // Is the element escaped? + . '!--' + . '|' + . '!\[CDATA\[' + . ')' + . '(?(?=!-)' // If yes, which type? + . $comments + . '|' + . $cdata + . ')'; + $regex = '/(' // Capture the entire match. . '<' // Find start of element. - . '(?(?=!--)' // Is this a comment? - . $comments // Find end of comment. - . '|' - . '(?(?=!\[CDATA\[)' // Is this a comment? - . $cdata // Find end of comment. - . '|' - . '[^>]*>?' // Find end of element. If not found, match all input. - . ')' + . '(?' // Conditional expression follows. + . $escaped // Find end of escaped element. + . '|' // ... else ... + . '[^>]*>?' // Find end of normal element. . ')' - . ')/s'; + . ')/'; + } + + return $regex; +} + +/** + * Retrieve the combined regular expression for HTML and shortcodes. + * + * @access private + * @ignore + * @internal This function will be removed in 4.5.0 per Shortcode API Roadmap. + * @since 4.4.0 + * + * @param string $shortcode_regex The result from _get_wptexturize_shortcode_regex(). Optional. + * @return string The regular expression + */ +function _get_wptexturize_split_regex( $shortcode_regex = '' ) { + static $html_regex; + + if ( ! isset( $html_regex ) ) { + $comment_regex = + '!' // Start of comment, after the <. + . '(?:' // Unroll the loop: Consume everything until --> is found. + . '-(?!->)' // Dash not followed by end of comment. + . '[^\-]*+' // Consume non-dashes. + . ')*+' // Loop possessively. + . '(?:-->)?'; // End of comment. If not found, match all input. + + $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap. + '<' // Find start of element. + . '(?(?=!--)' // Is this a comment? + . $comment_regex // Find end of comment. + . '|' + . '[^>]*>?' // Find end of element. If not found, match all input. + . ')'; + } + + if ( empty( $shortcode_regex ) ) { + $regex = '/(' . $html_regex . ')/'; + } else { + $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/'; } - return preg_split( $regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE ); + return $regex; +} + +/** + * Retrieve the regular expression for shortcodes. + * + * @access private + * @ignore + * @internal This function will be removed in 4.5.0 per Shortcode API Roadmap. + * @since 4.4.0 + * + * @param array $tagnames List of shortcodes to find. + * @return string The regular expression + */ +function _get_wptexturize_shortcode_regex( $tagnames ) { + $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); + $tagregexp = "(?:$tagregexp)(?=[\\s\\]\\/])"; // Excerpt of get_shortcode_regex(). + $regex = + '\[' // Find start of shortcode. + . '[\/\[]?' // Shortcodes may begin with [/ or [[ + . $tagregexp // Only match registered shortcodes, because performance. + . '(?:' + . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. + . '|' + . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. + . ')*+' // Possessive critical. + . '\]' // Find end of shortcode. + . '\]?'; // Shortcodes may end with ]] + + return $regex; } /** @@ -581,8 +731,8 @@ function wp_replace_in_html_tags( $haystack, $replace_pairs ) { // Extract $needle and $replace. foreach ( $replace_pairs as $needle => $replace ); - // Loop through delimeters (elements) only. - for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { + // Loop through delimiters (elements) only. + for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { if ( false !== strpos( $textarr[$i], $needle ) ) { $textarr[$i] = str_replace( $needle, $replace, $textarr[$i] ); $changed = true; @@ -592,8 +742,8 @@ function wp_replace_in_html_tags( $haystack, $replace_pairs ) { // Extract all $needles. $needles = array_keys( $replace_pairs ); - // Loop through delimeters (elements) only. - for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { + // Loop through delimiters (elements) only. + for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { foreach ( $needles as $needle ) { if ( false !== strpos( $textarr[$i], $needle ) ) { $textarr[$i] = strtr( $textarr[$i], $replace_pairs ); @@ -622,7 +772,7 @@ function wp_replace_in_html_tags( $haystack, $replace_pairs ) { * @return string */ function _autop_newline_preservation_helper( $matches ) { - return str_replace("\n", "", $matches[0]); + return str_replace( "\n", "", $matches[0] ); } /** @@ -632,6 +782,8 @@ function _autop_newline_preservation_helper( $matches ) { * * @since 2.9.0 * + * @global array $shortcode_tags + * * @param string $pee The content. * @return string The filtered content. */ @@ -675,7 +827,7 @@ function shortcode_unautop( $pee ) { . ')' . '(?:' . $spaces . ')*+' // optional trailing whitespace . '<\\/p>' // closing paragraph - . '/s'; + . '/'; return preg_replace( $pattern, '$1', $pee ); } @@ -692,7 +844,7 @@ function shortcode_unautop( $pee ) { * @param string $str The string to be checked * @return bool True if $str fits a UTF-8 model, false otherwise. */ -function seems_utf8($str) { +function seems_utf8( $str ) { mbstring_binary_safe_encoding(); $length = strlen($str); reset_mbstring_encoding(); @@ -724,10 +876,16 @@ function seems_utf8($str) { * @since 1.2.2 * @access private * - * @param string $string The text which is to be encoded. - * @param int $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES. - * @param string $charset Optional. The character encoding of the string. Default is false. - * @param boolean $double_encode Optional. Whether to encode existing html entities. Default is false. + * @staticvar string $_charset + * + * @param string $string The text which is to be encoded. + * @param int|string $quote_style Optional. Converts double quotes if set to ENT_COMPAT, + * both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. + * Also compatible with old values; converting single quotes if set to 'single', + * double if set to 'double' or both if otherwise set. + * Default is ENT_NOQUOTES. + * @param string $charset Optional. The character encoding of the string. Default is false. + * @param bool $double_encode Optional. Whether to encode existing html entities. Default is false. * @return string The encoded text with HTML entities. */ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { @@ -748,7 +906,7 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals // Store the site charset as a static to avoid multiple calls to wp_load_alloptions() if ( ! $charset ) { - static $_charset; + static $_charset = null; if ( ! isset( $_charset ) ) { $alloptions = wp_load_alloptions(); $_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : ''; @@ -768,26 +926,15 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals $quote_style = ENT_NOQUOTES; } - // Handle double encoding ourselves - if ( $double_encode ) { - $string = @htmlspecialchars( $string, $quote_style, $charset ); - } else { - // Decode & into & - $string = wp_specialchars_decode( $string, $_quote_style ); - - // Guarantee every &entity; is valid or re-encode the & + if ( ! $double_encode ) { + // Guarantee every &entity; is valid, convert &garbage; into &garbage; + // This is required for PHP < 5.4.0 because ENT_HTML401 flag is unavailable. $string = wp_kses_normalize_entities( $string ); - - // Now re-encode everything except &entity; - $string = preg_split( '/(&#?x?[0-9a-z]+;)/i', $string, -1, PREG_SPLIT_DELIM_CAPTURE ); - - for ( $i = 0, $c = count( $string ); $i < $c; $i += 2 ) { - $string[$i] = @htmlspecialchars( $string[$i], $quote_style, $charset ); - } - $string = implode( '', $string ); } - // Backwards compatibility + $string = @htmlspecialchars( $string, $quote_style, $charset, $double_encode ); + + // Back-compat. if ( 'single' === $_quote_style ) $string = str_replace( "'", ''', $string ); @@ -804,8 +951,14 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals * * @since 2.8.0 * - * @param string $string The text which is to be decoded. - * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old _wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES. + * @param string $string The text which is to be decoded. + * @param string|int $quote_style Optional. Converts double quotes if set to ENT_COMPAT, + * both single and double if set to ENT_QUOTES or + * none if set to ENT_NOQUOTES. + * Also compatible with old _wp_specialchars() values; + * converting single quotes if set to 'single', + * double if set to 'double' or both if otherwise set. + * Default is ENT_NOQUOTES. * @return string The decoded text without HTML entities. */ function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) { @@ -861,8 +1014,11 @@ function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) { * * @since 2.8.0 * - * @param string $string The text which is to be checked. - * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false. + * @staticvar bool $is_utf8 + * @staticvar bool $utf8_pcre + * + * @param string $string The text which is to be checked. + * @param bool $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false. * @return string The checked text. */ function wp_check_invalid_utf8( $string, $strip = false ) { @@ -873,17 +1029,17 @@ function wp_check_invalid_utf8( $string, $strip = false ) { } // Store the site charset as a static to avoid multiple calls to get_option() - static $is_utf8; - if ( !isset( $is_utf8 ) ) { + static $is_utf8 = null; + if ( ! isset( $is_utf8 ) ) { $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ); } - if ( !$is_utf8 ) { + if ( ! $is_utf8 ) { return $string; } // Check for support for utf8 in the installed PCRE library once and store the result in a static - static $utf8_pcre; - if ( !isset( $utf8_pcre ) ) { + static $utf8_pcre = null; + if ( ! isset( $utf8_pcre ) ) { $utf8_pcre = @preg_match( '/^./u', 'a' ); } // We can't demand utf8 in the PCRE installation, so just return the string in those cases @@ -910,7 +1066,7 @@ function wp_check_invalid_utf8( $string, $strip = false ) { * @since 1.5.0 * * @param string $utf8_string - * @param int $length Max length of the string + * @param int $length Max length of the string * @return string String with Unicode encoded for URI. */ function utf8_uri_encode( $utf8_string, $length = 0 ) { @@ -968,12 +1124,379 @@ function utf8_uri_encode( $utf8_string, $length = 0 ) { * * If there are no accent characters, then the string given is just returned. * + * **Accent characters converted:** + * + * Currency signs: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | ------------------- | + * | U+00A3 | £ | (empty) | British Pound sign | + * | U+20AC | € | E | Euro sign | + * + * Decompositions for Latin-1 Supplement: + * + * | Code | Glyph | Replacement | Description | + * | ------- | ----- | ----------- | -------------------------------------- | + * | U+00AA | ª | a | Feminine ordinal indicator | + * | U+00BA | º | o | Masculine ordinal indicator | + * | U+00C0 | À | A | Latin capital letter A with grave | + * | U+00C1 | Á | A | Latin capital letter A with acute | + * | U+00C2 |  | A | Latin capital letter A with circumflex | + * | U+00C3 | à | A | Latin capital letter A with tilde | + * | U+00C4 | Ä | A | Latin capital letter A with diaeresis | + * | U+00C5 | Å | A | Latin capital letter A with ring above | + * | U+00C6 | Æ | AE | Latin capital letter AE | + * | U+00C7 | Ç | C | Latin capital letter C with cedilla | + * | U+00C8 | È | E | Latin capital letter E with grave | + * | U+00C9 | É | E | Latin capital letter E with acute | + * | U+00CA | Ê | E | Latin capital letter E with circumflex | + * | U+00CB | Ë | E | Latin capital letter E with diaeresis | + * | U+00CC | Ì | I | Latin capital letter I with grave | + * | U+00CD | Í | I | Latin capital letter I with acute | + * | U+00CE | Î | I | Latin capital letter I with circumflex | + * | U+00CF | Ï | I | Latin capital letter I with diaeresis | + * | U+00D0 | Ð | D | Latin capital letter Eth | + * | U+00D1 | Ñ | N | Latin capital letter N with tilde | + * | U+00D2 | Ò | O | Latin capital letter O with grave | + * | U+00D3 | Ó | O | Latin capital letter O with acute | + * | U+00D4 | Ô | O | Latin capital letter O with circumflex | + * | U+00D5 | Õ | O | Latin capital letter O with tilde | + * | U+00D6 | Ö | O | Latin capital letter O with diaeresis | + * | U+00D8 | Ø | O | Latin capital letter O with stroke | + * | U+00D9 | Ù | U | Latin capital letter U with grave | + * | U+00DA | Ú | U | Latin capital letter U with acute | + * | U+00DB | Û | U | Latin capital letter U with circumflex | + * | U+00DC | Ü | U | Latin capital letter U with diaeresis | + * | U+00DD | Ý | Y | Latin capital letter Y with acute | + * | U+00DE | Þ | TH | Latin capital letter Thorn | + * | U+00DF | ß | s | Latin small letter sharp s | + * | U+00E0 | à | a | Latin small letter a with grave | + * | U+00E1 | á | a | Latin small letter a with acute | + * | U+00E2 | â | a | Latin small letter a with circumflex | + * | U+00E3 | ã | a | Latin small letter a with tilde | + * | U+00E4 | ä | a | Latin small letter a with diaeresis | + * | U+00E5 | Ã¥ | a | Latin small letter a with ring above | + * | U+00E6 | æ | ae | Latin small letter ae | + * | U+00E7 | ç | c | Latin small letter c with cedilla | + * | U+00E8 | è | e | Latin small letter e with grave | + * | U+00E9 | é | e | Latin small letter e with acute | + * | U+00EA | ê | e | Latin small letter e with circumflex | + * | U+00EB | ë | e | Latin small letter e with diaeresis | + * | U+00EC | ì | i | Latin small letter i with grave | + * | U+00ED | í | i | Latin small letter i with acute | + * | U+00EE | î | i | Latin small letter i with circumflex | + * | U+00EF | ï | i | Latin small letter i with diaeresis | + * | U+00F0 | ð | d | Latin small letter Eth | + * | U+00F1 | ñ | n | Latin small letter n with tilde | + * | U+00F2 | ò | o | Latin small letter o with grave | + * | U+00F3 | ó | o | Latin small letter o with acute | + * | U+00F4 | ô | o | Latin small letter o with circumflex | + * | U+00F5 | õ | o | Latin small letter o with tilde | + * | U+00F6 | ö | o | Latin small letter o with diaeresis | + * | U+00F8 | ø | o | Latin small letter o with stroke | + * | U+00F9 | ù | u | Latin small letter u with grave | + * | U+00FA | ú | u | Latin small letter u with acute | + * | U+00FB | û | u | Latin small letter u with circumflex | + * | U+00FC | ü | u | Latin small letter u with diaeresis | + * | U+00FD | ý | y | Latin small letter y with acute | + * | U+00FE | þ | th | Latin small letter Thorn | + * | U+00FF | ÿ | y | Latin small letter y with diaeresis | + * + * Decompositions for Latin Extended-A: + * + * | Code | Glyph | Replacement | Description | + * | ------- | ----- | ----------- | ------------------------------------------------- | + * | U+0100 | Ā | A | Latin capital letter A with macron | + * | U+0101 | ā | a | Latin small letter a with macron | + * | U+0102 | Ă | A | Latin capital letter A with breve | + * | U+0103 | ă | a | Latin small letter a with breve | + * | U+0104 | Ą | A | Latin capital letter A with ogonek | + * | U+0105 | ą | a | Latin small letter a with ogonek | + * | U+01006 | Ć | C | Latin capital letter C with acute | + * | U+0107 | ć | c | Latin small letter c with acute | + * | U+0108 | Ĉ | C | Latin capital letter C with circumflex | + * | U+0109 | ĉ | c | Latin small letter c with circumflex | + * | U+010A | Ċ | C | Latin capital letter C with dot above | + * | U+010B | ċ | c | Latin small letter c with dot above | + * | U+010C | Č | C | Latin capital letter C with caron | + * | U+010D | č | c | Latin small letter c with caron | + * | U+010E | Ď | D | Latin capital letter D with caron | + * | U+010F | ď | d | Latin small letter d with caron | + * | U+0110 | Đ | D | Latin capital letter D with stroke | + * | U+0111 | đ | d | Latin small letter d with stroke | + * | U+0112 | Ē | E | Latin capital letter E with macron | + * | U+0113 | ē | e | Latin small letter e with macron | + * | U+0114 | Ĕ | E | Latin capital letter E with breve | + * | U+0115 | ĕ | e | Latin small letter e with breve | + * | U+0116 | Ė | E | Latin capital letter E with dot above | + * | U+0117 | ė | e | Latin small letter e with dot above | + * | U+0118 | Ę | E | Latin capital letter E with ogonek | + * | U+0119 | ę | e | Latin small letter e with ogonek | + * | U+011A | Ě | E | Latin capital letter E with caron | + * | U+011B | ě | e | Latin small letter e with caron | + * | U+011C | Ĝ | G | Latin capital letter G with circumflex | + * | U+011D | ĝ | g | Latin small letter g with circumflex | + * | U+011E | Ğ | G | Latin capital letter G with breve | + * | U+011F | ğ | g | Latin small letter g with breve | + * | U+0120 | Ä  | G | Latin capital letter G with dot above | + * | U+0121 | Ä¡ | g | Latin small letter g with dot above | + * | U+0122 | Ä¢ | G | Latin capital letter G with cedilla | + * | U+0123 | Ä£ | g | Latin small letter g with cedilla | + * | U+0124 | Ĥ | H | Latin capital letter H with circumflex | + * | U+0125 | Ä¥ | h | Latin small letter h with circumflex | + * | U+0126 | Ħ | H | Latin capital letter H with stroke | + * | U+0127 | ħ | h | Latin small letter h with stroke | + * | U+0128 | Ĩ | I | Latin capital letter I with tilde | + * | U+0129 | Ä© | i | Latin small letter i with tilde | + * | U+012A | Ī | I | Latin capital letter I with macron | + * | U+012B | Ä« | i | Latin small letter i with macron | + * | U+012C | Ĭ | I | Latin capital letter I with breve | + * | U+012D | Ä­ | i | Latin small letter i with breve | + * | U+012E | Ä® | I | Latin capital letter I with ogonek | + * | U+012F | į | i | Latin small letter i with ogonek | + * | U+0130 | Ä° | I | Latin capital letter I with dot above | + * | U+0131 | ı | i | Latin small letter dotless i | + * | U+0132 | IJ | IJ | Latin capital ligature IJ | + * | U+0133 | ij | ij | Latin small ligature ij | + * | U+0134 | Ä´ | J | Latin capital letter J with circumflex | + * | U+0135 | ĵ | j | Latin small letter j with circumflex | + * | U+0136 | Ķ | K | Latin capital letter K with cedilla | + * | U+0137 | Ä· | k | Latin small letter k with cedilla | + * | U+0138 | ĸ | k | Latin small letter Kra | + * | U+0139 | Ĺ | L | Latin capital letter L with acute | + * | U+013A | ĺ | l | Latin small letter l with acute | + * | U+013B | Ä» | L | Latin capital letter L with cedilla | + * | U+013C | ļ | l | Latin small letter l with cedilla | + * | U+013D | Ľ | L | Latin capital letter L with caron | + * | U+013E | ľ | l | Latin small letter l with caron | + * | U+013F | Ä¿ | L | Latin capital letter L with middle dot | + * | U+0140 | ŀ | l | Latin small letter l with middle dot | + * | U+0141 | Ł | L | Latin capital letter L with stroke | + * | U+0142 | ł | l | Latin small letter l with stroke | + * | U+0143 | Ń | N | Latin capital letter N with acute | + * | U+0144 | ń | n | Latin small letter N with acute | + * | U+0145 | Ņ | N | Latin capital letter N with cedilla | + * | U+0146 | ņ | n | Latin small letter n with cedilla | + * | U+0147 | Ň | N | Latin capital letter N with caron | + * | U+0148 | ň | n | Latin small letter n with caron | + * | U+0149 | ʼn | N | Latin small letter n preceded by apostrophe | + * | U+014A | Ŋ | n | Latin capital letter Eng | + * | U+014B | ŋ | N | Latin small letter Eng | + * | U+014C | Ō | O | Latin capital letter O with macron | + * | U+014D | ō | o | Latin small letter o with macron | + * | U+014E | Ŏ | O | Latin capital letter O with breve | + * | U+014F | ŏ | o | Latin small letter o with breve | + * | U+0150 | Ő | O | Latin capital letter O with double acute | + * | U+0151 | ő | o | Latin small letter o with double acute | + * | U+0152 | Œ | OE | Latin capital ligature OE | + * | U+0153 | œ | oe | Latin small ligature oe | + * | U+0154 | Ŕ | R | Latin capital letter R with acute | + * | U+0155 | ŕ | r | Latin small letter r with acute | + * | U+0156 | Ŗ | R | Latin capital letter R with cedilla | + * | U+0157 | ŗ | r | Latin small letter r with cedilla | + * | U+0158 | Ř | R | Latin capital letter R with caron | + * | U+0159 | ř | r | Latin small letter r with caron | + * | U+015A | Ś | S | Latin capital letter S with acute | + * | U+015B | ś | s | Latin small letter s with acute | + * | U+015C | Ŝ | S | Latin capital letter S with circumflex | + * | U+015D | ŝ | s | Latin small letter s with circumflex | + * | U+015E | Ş | S | Latin capital letter S with cedilla | + * | U+015F | ş | s | Latin small letter s with cedilla | + * | U+0160 | Å  | S | Latin capital letter S with caron | + * | U+0161 | Å¡ | s | Latin small letter s with caron | + * | U+0162 | Å¢ | T | Latin capital letter T with cedilla | + * | U+0163 | Å£ | t | Latin small letter t with cedilla | + * | U+0164 | Ť | T | Latin capital letter T with caron | + * | U+0165 | Å¥ | t | Latin small letter t with caron | + * | U+0166 | Ŧ | T | Latin capital letter T with stroke | + * | U+0167 | ŧ | t | Latin small letter t with stroke | + * | U+0168 | Ũ | U | Latin capital letter U with tilde | + * | U+0169 | Å© | u | Latin small letter u with tilde | + * | U+016A | Ū | U | Latin capital letter U with macron | + * | U+016B | Å« | u | Latin small letter u with macron | + * | U+016C | Ŭ | U | Latin capital letter U with breve | + * | U+016D | Å­ | u | Latin small letter u with breve | + * | U+016E | Å® | U | Latin capital letter U with ring above | + * | U+016F | ů | u | Latin small letter u with ring above | + * | U+0170 | Å° | U | Latin capital letter U with double acute | + * | U+0171 | ű | u | Latin small letter u with double acute | + * | U+0172 | Ų | U | Latin capital letter U with ogonek | + * | U+0173 | ų | u | Latin small letter u with ogonek | + * | U+0174 | Å´ | W | Latin capital letter W with circumflex | + * | U+0175 | ŵ | w | Latin small letter w with circumflex | + * | U+0176 | Ŷ | Y | Latin capital letter Y with circumflex | + * | U+0177 | Å· | y | Latin small letter y with circumflex | + * | U+0178 | Ÿ | Y | Latin capital letter Y with diaeresis | + * | U+0179 | Ź | Z | Latin capital letter Z with acute | + * | U+017A | ź | z | Latin small letter z with acute | + * | U+017B | Å» | Z | Latin capital letter Z with dot above | + * | U+017C | ż | z | Latin small letter z with dot above | + * | U+017D | Ž | Z | Latin capital letter Z with caron | + * | U+017E | ž | z | Latin small letter z with caron | + * | U+017F | Å¿ | s | Latin small letter long s | + * | U+01A0 | Æ  | O | Latin capital letter O with horn | + * | U+01A1 | Æ¡ | o | Latin small letter o with horn | + * | U+01AF | Ư | U | Latin capital letter U with horn | + * | U+01B0 | Æ° | u | Latin small letter u with horn | + * | U+01CD | Ǎ | A | Latin capital letter A with caron | + * | U+01CE | ǎ | a | Latin small letter a with caron | + * | U+01CF | Ǐ | I | Latin capital letter I with caron | + * | U+01D0 | ǐ | i | Latin small letter i with caron | + * | U+01D1 | Ǒ | O | Latin capital letter O with caron | + * | U+01D2 | ǒ | o | Latin small letter o with caron | + * | U+01D3 | Ǔ | U | Latin capital letter U with caron | + * | U+01D4 | ǔ | u | Latin small letter u with caron | + * | U+01D5 | Ǖ | U | Latin capital letter U with diaeresis and macron | + * | U+01D6 | ǖ | u | Latin small letter u with diaeresis and macron | + * | U+01D7 | Ǘ | U | Latin capital letter U with diaeresis and acute | + * | U+01D8 | ǘ | u | Latin small letter u with diaeresis and acute | + * | U+01D9 | Ǚ | U | Latin capital letter U with diaeresis and caron | + * | U+01DA | ǚ | u | Latin small letter u with diaeresis and caron | + * | U+01DB | Ǜ | U | Latin capital letter U with diaeresis and grave | + * | U+01DC | ǜ | u | Latin small letter u with diaeresis and grave | + * + * Decompositions for Latin Extended-B: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | ----------------------------------------- | + * | U+0218 | Ș | S | Latin capital letter S with comma below | + * | U+0219 | ș | s | Latin small letter s with comma below | + * | U+021A | Ț | T | Latin capital letter T with comma below | + * | U+021B | ț | t | Latin small letter t with comma below | + * + * Vowels with diacritic (Chinese, Hanyu Pinyin): + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | ----------------------------------------------------- | + * | U+0251 | ɑ | a | Latin small letter alpha | + * | U+1EA0 | Ạ | A | Latin capital letter A with dot below | + * | U+1EA1 | ạ | a | Latin small letter a with dot below | + * | U+1EA2 | Ả | A | Latin capital letter A with hook above | + * | U+1EA3 | ả | a | Latin small letter a with hook above | + * | U+1EA4 | Ấ | A | Latin capital letter A with circumflex and acute | + * | U+1EA5 | ấ | a | Latin small letter a with circumflex and acute | + * | U+1EA6 | Ầ | A | Latin capital letter A with circumflex and grave | + * | U+1EA7 | ầ | a | Latin small letter a with circumflex and grave | + * | U+1EA8 | Ẩ | A | Latin capital letter A with circumflex and hook above | + * | U+1EA9 | ẩ | a | Latin small letter a with circumflex and hook above | + * | U+1EAA | Ẫ | A | Latin capital letter A with circumflex and tilde | + * | U+1EAB | ẫ | a | Latin small letter a with circumflex and tilde | + * | U+1EA6 | Ậ | A | Latin capital letter A with circumflex and dot below | + * | U+1EAD | ậ | a | Latin small letter a with circumflex and dot below | + * | U+1EAE | Ắ | A | Latin capital letter A with breve and acute | + * | U+1EAF | ắ | a | Latin small letter a with breve and acute | + * | U+1EB0 | Ằ | A | Latin capital letter A with breve and grave | + * | U+1EB1 | ằ | a | Latin small letter a with breve and grave | + * | U+1EB2 | Ẳ | A | Latin capital letter A with breve and hook above | + * | U+1EB3 | ẳ | a | Latin small letter a with breve and hook above | + * | U+1EB4 | Ẵ | A | Latin capital letter A with breve and tilde | + * | U+1EB5 | ẵ | a | Latin small letter a with breve and tilde | + * | U+1EB6 | Ặ | A | Latin capital letter A with breve and dot below | + * | U+1EB7 | ặ | a | Latin small letter a with breve and dot below | + * | U+1EB8 | Ẹ | E | Latin capital letter E with dot below | + * | U+1EB9 | ẹ | e | Latin small letter e with dot below | + * | U+1EBA | Ẻ | E | Latin capital letter E with hook above | + * | U+1EBB | ẻ | e | Latin small letter e with hook above | + * | U+1EBC | Ẽ | E | Latin capital letter E with tilde | + * | U+1EBD | ẽ | e | Latin small letter e with tilde | + * | U+1EBE | Ế | E | Latin capital letter E with circumflex and acute | + * | U+1EBF | ế | e | Latin small letter e with circumflex and acute | + * | U+1EC0 | Ề | E | Latin capital letter E with circumflex and grave | + * | U+1EC1 | ề | e | Latin small letter e with circumflex and grave | + * | U+1EC2 | Ể | E | Latin capital letter E with circumflex and hook above | + * | U+1EC3 | ể | e | Latin small letter e with circumflex and hook above | + * | U+1EC4 | Ễ | E | Latin capital letter E with circumflex and tilde | + * | U+1EC5 | ễ | e | Latin small letter e with circumflex and tilde | + * | U+1EC6 | Ệ | E | Latin capital letter E with circumflex and dot below | + * | U+1EC7 | ệ | e | Latin small letter e with circumflex and dot below | + * | U+1EC8 | Ỉ | I | Latin capital letter I with hook above | + * | U+1EC9 | ỉ | i | Latin small letter i with hook above | + * | U+1ECA | Ị | I | Latin capital letter I with dot below | + * | U+1ECB | ị | i | Latin small letter i with dot below | + * | U+1ECC | Ọ | O | Latin capital letter O with dot below | + * | U+1ECD | ọ | o | Latin small letter o with dot below | + * | U+1ECE | Ỏ | O | Latin capital letter O with hook above | + * | U+1ECF | ỏ | o | Latin small letter o with hook above | + * | U+1ED0 | Ố | O | Latin capital letter O with circumflex and acute | + * | U+1ED1 | ố | o | Latin small letter o with circumflex and acute | + * | U+1ED2 | Ồ | O | Latin capital letter O with circumflex and grave | + * | U+1ED3 | ồ | o | Latin small letter o with circumflex and grave | + * | U+1ED4 | Ổ | O | Latin capital letter O with circumflex and hook above | + * | U+1ED5 | ổ | o | Latin small letter o with circumflex and hook above | + * | U+1ED6 | Ỗ | O | Latin capital letter O with circumflex and tilde | + * | U+1ED7 | ỗ | o | Latin small letter o with circumflex and tilde | + * | U+1ED8 | Ộ | O | Latin capital letter O with circumflex and dot below | + * | U+1ED9 | ộ | o | Latin small letter o with circumflex and dot below | + * | U+1EDA | Ớ | O | Latin capital letter O with horn and acute | + * | U+1EDB | ớ | o | Latin small letter o with horn and acute | + * | U+1EDC | Ờ | O | Latin capital letter O with horn and grave | + * | U+1EDD | ờ | o | Latin small letter o with horn and grave | + * | U+1EDE | Ở | O | Latin capital letter O with horn and hook above | + * | U+1EDF | ở | o | Latin small letter o with horn and hook above | + * | U+1EE0 | á»  | O | Latin capital letter O with horn and tilde | + * | U+1EE1 | ỡ | o | Latin small letter o with horn and tilde | + * | U+1EE2 | Ợ | O | Latin capital letter O with horn and dot below | + * | U+1EE3 | ợ | o | Latin small letter o with horn and dot below | + * | U+1EE4 | Ụ | U | Latin capital letter U with dot below | + * | U+1EE5 | ụ | u | Latin small letter u with dot below | + * | U+1EE6 | Ủ | U | Latin capital letter U with hook above | + * | U+1EE7 | ủ | u | Latin small letter u with hook above | + * | U+1EE8 | Ứ | U | Latin capital letter U with horn and acute | + * | U+1EE9 | ứ | u | Latin small letter u with horn and acute | + * | U+1EEA | Ừ | U | Latin capital letter U with horn and grave | + * | U+1EEB | ừ | u | Latin small letter u with horn and grave | + * | U+1EEC | Ử | U | Latin capital letter U with horn and hook above | + * | U+1EED | á»­ | u | Latin small letter u with horn and hook above | + * | U+1EEE | á»® | U | Latin capital letter U with horn and tilde | + * | U+1EEF | ữ | u | Latin small letter u with horn and tilde | + * | U+1EF0 | á»° | U | Latin capital letter U with horn and dot below | + * | U+1EF1 | á»± | u | Latin small letter u with horn and dot below | + * | U+1EF2 | Ỳ | Y | Latin capital letter Y with grave | + * | U+1EF3 | ỳ | y | Latin small letter y with grave | + * | U+1EF4 | á»´ | Y | Latin capital letter Y with dot below | + * | U+1EF5 | ỵ | y | Latin small letter y with dot below | + * | U+1EF6 | Ỷ | Y | Latin capital letter Y with hook above | + * | U+1EF7 | á»· | y | Latin small letter y with hook above | + * | U+1EF8 | Ỹ | Y | Latin capital letter Y with tilde | + * | U+1EF9 | ỹ | y | Latin small letter y with tilde | + * + * German (`de_DE`), German formal (`de_DE_formal`), German (Switzerland) formal (`de_CH`), + * and German (Switzerland) informal (`de_CH_informal`) locales: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | --------------------------------------- | + * | U+00C4 | Ä | Ae | Latin capital letter A with diaeresis | + * | U+00E4 | ä | ae | Latin small letter a with diaeresis | + * | U+00D6 | Ö | Oe | Latin capital letter O with diaeresis | + * | U+00F6 | ö | oe | Latin small letter o with diaeresis | + * | U+00DC | Ü | Ue | Latin capital letter U with diaeresis | + * | U+00FC | ü | ue | Latin small letter u with diaeresis | + * | U+00DF | ß | ss | Latin small letter sharp s | + * + * Danish (`da_DK`) locale: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | --------------------------------------- | + * | U+00C6 | Æ | Ae | Latin capital letter AE | + * | U+00E6 | æ | ae | Latin small letter ae | + * | U+00D8 | Ø | Oe | Latin capital letter O with stroke | + * | U+00F8 | ø | oe | Latin small letter o with stroke | + * | U+00C5 | Å | Aa | Latin capital letter A with ring above | + * | U+00E5 | Ã¥ | aa | Latin small letter a with ring above | + * + * Catalan (`ca`) locale: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | --------------------------------------- | + * | U+00B7 | l·l | ll | Flown dot (between two Ls) | + * * @since 1.2.1 + * @since 4.6.0 Added locale support for `de_CH`, `de_CH_informal`, and `ca`. * * @param string $string Text that might have accent characters * @return string Filtered string with replaced "nice" characters. */ -function remove_accents($string) { +function remove_accents( $string ) { if ( !preg_match('/[\x80-\xff]/', $string) ) return $string; @@ -1157,7 +1680,7 @@ function remove_accents($string) { // Used for locale-specific rules $locale = get_locale(); - if ( 'de_DE' == $locale ) { + if ( 'de_DE' == $locale || 'de_DE_formal' == $locale || 'de_CH' == $locale || 'de_CH_informal' == $locale ) { $chars[ chr(195).chr(132) ] = 'Ae'; $chars[ chr(195).chr(164) ] = 'ae'; $chars[ chr(195).chr(150) ] = 'Oe'; @@ -1172,6 +1695,8 @@ function remove_accents($string) { $chars[ chr(195).chr(184) ] = 'oe'; $chars[ chr(195).chr(133) ] = 'Aa'; $chars[ chr(195).chr(165) ] = 'aa'; + } elseif ( 'ca' === $locale ) { + $chars[ chr(108).chr(194).chr(183).chr(108) ] = 'll'; } $string = strtr($string, $chars); @@ -1208,7 +1733,8 @@ function remove_accents($string) { * operating systems and special characters requiring special escaping * to manipulate at the command line. Replaces spaces and consecutive * dashes with a single dash. Trims period, dash and underscore from beginning - * and end of filename. + * and end of filename. It is not guaranteed that this function will return a + * filename that is allowed to be uploaded. * * @since 2.1.0 * @@ -1217,9 +1743,9 @@ function remove_accents($string) { */ function sanitize_file_name( $filename ) { $filename_raw = $filename; - $special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", chr(0)); + $special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", "%", "+", chr(0)); /** - * Filter the list of characters to remove from a filename. + * Filters the list of characters to remove from a filename. * * @since 2.8.0 * @@ -1233,13 +1759,21 @@ function sanitize_file_name( $filename ) { $filename = preg_replace( '/[\r\n\t -]+/', '-', $filename ); $filename = trim( $filename, '.-_' ); + if ( false === strpos( $filename, '.' ) ) { + $mime_types = wp_get_mime_types(); + $filetype = wp_check_filetype( 'test.' . $filename, $mime_types ); + if ( $filetype['ext'] === $filename ) { + $filename = 'unnamed-file.' . $filetype['ext']; + } + } + // Split the filename into a base and extension[s] $parts = explode('.', $filename); // Return if only one extension if ( count( $parts ) <= 2 ) { /** - * Filter a sanitized filename string. + * Filters a sanitized filename string. * * @since 2.8.0 * @@ -1285,12 +1819,12 @@ function sanitize_file_name( $filename ) { * Removes tags, octets, entities, and if strict is enabled, will only keep * alphanumeric, _, space, ., -, @. After sanitizing, it passes the username, * raw username (the username in the parameter), and the value of $strict as - * parameters for the 'sanitize_user' filter. + * parameters for the {@see 'sanitize_user'} filter. * * @since 2.0.0 * * @param string $username The username to be sanitized. - * @param bool $strict If set limits $username to specific characters. Default false. + * @param bool $strict If set limits $username to specific characters. Default false. * @return string The sanitized username, after passing through filters. */ function sanitize_user( $username, $strict = false ) { @@ -1310,7 +1844,7 @@ function sanitize_user( $username, $strict = false ) { $username = preg_replace( '|\s+|', ' ', $username ); /** - * Filter a sanitized username string. + * Filters a sanitized username string. * * @since 2.0.1 * @@ -1337,7 +1871,7 @@ function sanitize_key( $key ) { $key = preg_replace( '/[^a-z0-9_\-]/', '', $key ); /** - * Filter a sanitized key string. + * Filters a sanitized key string. * * @since 3.0.0 * @@ -1356,9 +1890,9 @@ function sanitize_key( $key ) { * * @since 1.0.0 * - * @param string $title The string to be sanitized. + * @param string $title The string to be sanitized. * @param string $fallback_title Optional. A title to use if $title is empty. - * @param string $context Optional. The operation for which the string is sanitized + * @param string $context Optional. The operation for which the string is sanitized * @return string The sanitized string. */ function sanitize_title( $title, $fallback_title = '', $context = 'save' ) { @@ -1368,7 +1902,7 @@ function sanitize_title( $title, $fallback_title = '', $context = 'save' ) { $title = remove_accents($title); /** - * Filter a sanitized title string. + * Filters a sanitized title string. * * @since 1.2.0 * @@ -1406,9 +1940,9 @@ function sanitize_title_for_query( $title ) { * * @since 1.2.0 * - * @param string $title The title to be sanitized. + * @param string $title The title to be sanitized. * @param string $raw_title Optional. Not used. - * @param string $context Optional. The operation for which the string is sanitized. + * @param string $context Optional. The operation for which the string is sanitized. * @return string The sanitized title. */ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'display' ) { @@ -1428,12 +1962,12 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa } $title = strtolower($title); - $title = preg_replace('/&.+?;/', '', $title); // kill entities - $title = str_replace('.', '-', $title); if ( 'save' == $context ) { // Convert nbsp, ndash and mdash to hyphens $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title ); + // Convert nbsp, ndash and mdash HTML entities to hyphens + $title = str_replace( array( ' ', ' ', '–', '–', '—', '—' ), '-', $title ); // Strip these characters entirely $title = str_replace( array( @@ -1456,6 +1990,9 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa $title = str_replace( '%c3%97', 'x', $title ); } + $title = preg_replace('/&.+?;/', '', $title); // kill entities + $title = str_replace('.', '-', $title); + $title = preg_replace('/[^%a-z0-9 _-]/', '', $title); $title = preg_replace('/\s+/', '-', $title); $title = preg_replace('|-+|', '-', $title); @@ -1475,7 +2012,7 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa * @since 2.5.1 * * @param string $orderby Order by clause to be validated. - * @return string|bool Returns $orderby if valid, false otherwise. + * @return string|false Returns $orderby if valid, false otherwise. */ function sanitize_sql_orderby( $orderby ) { if ( preg_match( '/^\s*(([a-z0-9_]+|`[a-z0-9_]+`)(\s+(ASC|DESC))?\s*(,\s*(?=[a-z0-9_`])|$))+$/i', $orderby ) || preg_match( '/^\s*RAND\(\s*\)\s*$/i', $orderby ) ) { @@ -1494,7 +2031,7 @@ function sanitize_sql_orderby( $orderby ) { * * @since 2.8.0 * - * @param string $class The classname to be sanitized + * @param string $class The classname to be sanitized * @param string $fallback Optional. The value to return if the sanitization ends up as an empty string. * Defaults to an empty string. * @return string The sanitized value @@ -1506,11 +2043,11 @@ function sanitize_html_class( $class, $fallback = '' ) { //Limit to A-Z,a-z,0-9,_,- $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized ); - if ( '' == $sanitized ) - $sanitized = $fallback; - + if ( '' == $sanitized && $fallback ) { + return sanitize_html_class( $fallback ); + } /** - * Filter a sanitized HTML class string. + * Filters a sanitized HTML class string. * * @since 2.8.0 * @@ -1522,71 +2059,73 @@ function sanitize_html_class( $class, $fallback = '' ) { } /** - * Converts a number of characters from a string. - * - * Metadata tags `` and `<category>` are removed, `<br>` and `<hr>` are - * converted into correct XHTML and Unicode characters are converted to the - * valid range. + * Converts lone & characters into `&` (a.k.a. `&`) * * @since 0.71 * - * @param string $content String of characters to be converted. + * @param string $content String of characters to be converted. * @param string $deprecated Not used. * @return string Converted string. */ -function convert_chars($content, $deprecated = '') { - if ( !empty( $deprecated ) ) +function convert_chars( $content, $deprecated = '' ) { + if ( ! empty( $deprecated ) ) { _deprecated_argument( __FUNCTION__, '0.71' ); + } - // Translation of invalid Unicode references range to valid range - $wp_htmltranswinuni = array( - '€' => '€', // the Euro sign - '' => '', - '‚' => '‚', // these are Windows CP1252 specific characters - 'ƒ' => 'ƒ', // they would look weird on non-Windows browsers - '„' => '„', - '…' => '…', - '†' => '†', - '‡' => '‡', - 'ˆ' => 'ˆ', - '‰' => '‰', - 'Š' => 'Š', - '‹' => '‹', - 'Œ' => 'Œ', - '' => '', - 'Ž' => 'Ž', - '' => '', - '' => '', - '‘' => '‘', - '’' => '’', - '“' => '“', - '”' => '”', - '•' => '•', - '–' => '–', - '—' => '—', - '˜' => '˜', - '™' => '™', - 'š' => 'š', - '›' => '›', - 'œ' => 'œ', - '' => '', - 'ž' => 'ž', - 'Ÿ' => 'Ÿ' - ); - - // Remove metadata tags - $content = preg_replace('/<title>(.+?)<\/title>/','',$content); - $content = preg_replace('/<category>(.+?)<\/category>/','',$content); + if ( strpos( $content, '&' ) !== false ) { + $content = preg_replace( '/&([^#])(?![a-z1-4]{1,8};)/i', '&$1', $content ); + } - // Converts lone & characters into & (a.k.a. &) - $content = preg_replace('/&([^#])(?![a-z1-4]{1,8};)/i', '&$1', $content); + return $content; +} - // Fix Word pasting - $content = strtr($content, $wp_htmltranswinuni); +/** + * Converts invalid Unicode references range to valid range. + * + * @since 4.3.0 + * + * @param string $content String with entities that need converting. + * @return string Converted string. + */ +function convert_invalid_entities( $content ) { + $wp_htmltranswinuni = array( + '€' => '€', // the Euro sign + '' => '', + '‚' => '‚', // these are Windows CP1252 specific characters + 'ƒ' => 'ƒ', // they would look weird on non-Windows browsers + '„' => '„', + '…' => '…', + '†' => '†', + '‡' => '‡', + 'ˆ' => 'ˆ', + '‰' => '‰', + 'Š' => 'Š', + '‹' => '‹', + 'Œ' => 'Œ', + '' => '', + 'Ž' => 'Ž', + '' => '', + '' => '', + '‘' => '‘', + '’' => '’', + '“' => '“', + '”' => '”', + '•' => '•', + '–' => '–', + '—' => '—', + '˜' => '˜', + '™' => '™', + 'š' => 'š', + '›' => '›', + 'œ' => 'œ', + '' => '', + 'ž' => 'ž', + 'Ÿ' => 'Ÿ' + ); - // Just a little XHTML help - $content = str_replace('<br>', '<br />', $content); - $content = str_replace('<hr>', '<hr />', $content); + if ( strpos( $content, '' ) !== false ) { + $content = strtr( $content, $wp_htmltranswinuni ); + } return $content; } @@ -1596,8 +2135,8 @@ function convert_chars($content, $deprecated = '') { * * @since 0.71 * - * @param string $text Text to be balanced - * @param bool $force If true, forces balancing, ignoring the value of the option. Default false. + * @param string $text Text to be balanced + * @param bool $force If true, forces balancing, ignoring the value of the option. Default false. * @return string Balanced text */ function balanceTags( $text, $force = false ) { @@ -1653,7 +2192,7 @@ function force_balance_tags( $text ) { if ( isset($regex[1][0]) && '/' == $regex[1][0] ) { // End Tag $tag = strtolower(substr($regex[1],1)); // if too many closing tags - if( $stacksize <= 0 ) { + if ( $stacksize <= 0 ) { $tag = ''; // or close to be safe $tag = '/' . $tag; } @@ -1708,7 +2247,7 @@ function force_balance_tags( $text ) { // Attributes $attributes = $regex[2]; - if( ! empty( $attributes ) && $attributes[0] != '>' ) + if ( ! empty( $attributes ) && $attributes[0] != '>' ) $attributes = ' ' . $attributes; $tag = '<' . $tag . $attributes . '>'; @@ -1743,25 +2282,28 @@ function force_balance_tags( $text ) { * Acts on text which is about to be edited. * * The $content is run through esc_textarea(), which uses htmlspecialchars() - * to convert special characters to HTML entities. If $richedit is set to true, - * it is simply a holder for the 'format_to_edit' filter. + * to convert special characters to HTML entities. If `$richedit` is set to true, + * it is simply a holder for the {@see 'format_to_edit'} filter. * * @since 0.71 + * @since 4.4.0 The `$richedit` parameter was renamed to `$rich_text` for clarity. * - * @param string $content The text about to be edited. - * @param bool $richedit Whether the $content should not pass through htmlspecialchars(). Default false (meaning it will be passed). + * @param string $content The text about to be edited. + * @param bool $rich_text Optional. Whether `$content` should be considered rich text, + * in which case it would not be passed through esc_textarea(). + * Default false. * @return string The text after the filter (and possibly htmlspecialchars()) has been run. */ -function format_to_edit( $content, $richedit = false ) { +function format_to_edit( $content, $rich_text = false ) { /** - * Filter the text to be formatted for editing. + * Filters the text to be formatted for editing. * * @since 1.2.0 * * @param string $content The text, prior to formatting for editing. */ $content = apply_filters( 'format_to_edit', $content ); - if ( ! $richedit ) + if ( ! $rich_text ) $content = esc_textarea( $content ); return $content; } @@ -1779,12 +2321,12 @@ function format_to_edit( $content, $richedit = false ) { * * @since 0.71 * - * @param mixed $number Number to append zeros to if not greater than threshold. - * @param int $threshold Digit places number needs to be to not have zeros added. + * @param int $number Number to append zeros to if not greater than threshold. + * @param int $threshold Digit places number needs to be to not have zeros added. * @return string Adds leading zeros to number if needed. */ -function zeroise($number, $threshold) { - return sprintf('%0'.$threshold.'s', $number); +function zeroise( $number, $threshold ) { + return sprintf( '%0' . $threshold . 's', $number ); } /** @@ -1795,7 +2337,7 @@ function zeroise($number, $threshold) { * @param string $string Value to which backslashes will be added. * @return string String with backslashes inserted. */ -function backslashit($string) { +function backslashit( $string ) { if ( isset( $string[0] ) && $string[0] >= '0' && $string[0] <= '9' ) $string = '\\\\' . $string; return addcslashes( $string, 'A..Za..z' ); @@ -1838,7 +2380,7 @@ function untrailingslashit( $string ) { * Adds slashes to escape strings. * * Slashes will first be removed if magic_quotes_gpc is set, see {@link - * http://www.php.net/magic_quotes} for more details. + * https://secure.php.net/magic_quotes} for more details. * * @since 0.71 * @@ -1853,55 +2395,63 @@ function addslashes_gpc($gpc) { } /** - * Navigates through an array and removes slashes from the values. - * - * If an array is passed, the array_map() function causes a callback to pass the - * value back to the function. The slashes from this value will removed. + * Navigates through an array, object, or scalar, and removes slashes from the values. * * @since 2.0.0 * * @param mixed $value The value to be stripped. * @return mixed Stripped value. */ -function stripslashes_deep($value) { - if ( is_array($value) ) { - $value = array_map('stripslashes_deep', $value); - } elseif ( is_object($value) ) { - $vars = get_object_vars( $value ); - foreach ($vars as $key=>$data) { - $value->{$key} = stripslashes_deep( $data ); - } - } elseif ( is_string( $value ) ) { - $value = stripslashes($value); - } - - return $value; +function stripslashes_deep( $value ) { + return map_deep( $value, 'stripslashes_from_strings_only' ); } /** - * Navigates through an array and encodes the values to be used in a URL. + * Callback function for `stripslashes_deep()` which strips slashes from strings. * + * @since 4.4.0 + * + * @param mixed $value The array or string to be stripped. + * @return mixed $value The stripped value. + */ +function stripslashes_from_strings_only( $value ) { + return is_string( $value ) ? stripslashes( $value ) : $value; +} + +/** + * Navigates through an array, object, or scalar, and encodes the values to be used in a URL. * * @since 2.2.0 * - * @param array|string $value The array or string to be encoded. - * @return array|string $value The encoded array (or string from the callback). + * @param mixed $value The array or string to be encoded. + * @return mixed $value The encoded value. */ -function urlencode_deep($value) { - $value = is_array($value) ? array_map('urlencode_deep', $value) : urlencode($value); - return $value; +function urlencode_deep( $value ) { + return map_deep( $value, 'urlencode' ); } /** - * Navigates through an array and raw encodes the values to be used in a URL. + * Navigates through an array, object, or scalar, and raw-encodes the values to be used in a URL. * * @since 3.4.0 * - * @param array|string $value The array or string to be encoded. - * @return array|string $value The encoded array (or string from the callback). + * @param mixed $value The array or string to be encoded. + * @return mixed $value The encoded value. */ function rawurlencode_deep( $value ) { - return is_array( $value ) ? array_map( 'rawurlencode_deep', $value ) : rawurlencode( $value ); + return map_deep( $value, 'rawurlencode' ); +} + +/** + * Navigates through an array, object, or scalar, and decodes URL-encoded values + * + * @since 4.4.0 + * + * @param mixed $value The array or string to be decoded. + * @return mixed $value The decoded value. + */ +function urldecode_deep( $value ) { + return map_deep( $value, 'urldecode' ); } /** @@ -1910,7 +2460,7 @@ function rawurlencode_deep( $value ) { * @since 0.71 * * @param string $email_address Email address. - * @param int $hex_encoding Optional. Set to 1 to enable hex encoding. + * @param int $hex_encoding Optional. Set to 1 to enable hex encoding. * @return string Converted email address. */ function antispambot( $email_address, $hex_encoding = 0 ) { @@ -1926,16 +2476,13 @@ function antispambot( $email_address, $hex_encoding = 0 ) { } } - $email_no_spam_address = str_replace( '@', '@', $email_no_spam_address ); - - return $email_no_spam_address; + return str_replace( '@', '@', $email_no_spam_address ); } /** * Callback to convert URI match to HTML A element. * - * This function was backported from 2.5.0 to 2.3.2. Regex callback for {@link - * make_clickable()}. + * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable(). * * @since 2.3.2 * @access private @@ -1943,7 +2490,7 @@ function antispambot( $email_address, $hex_encoding = 0 ) { * @param array $matches Single Regex Match. * @return string HTML A element with URI address. */ -function _make_url_clickable_cb($matches) { +function _make_url_clickable_cb( $matches ) { $url = $matches[2]; if ( ')' == $matches[3] && strpos( $url, '(' ) ) { @@ -1971,8 +2518,7 @@ function _make_url_clickable_cb($matches) { /** * Callback to convert URL match to HTML A element. * - * This function was backported from 2.5.0 to 2.3.2. Regex callback for {@link - * make_clickable()}. + * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable(). * * @since 2.3.2 * @access private @@ -1980,27 +2526,28 @@ function _make_url_clickable_cb($matches) { * @param array $matches Single Regex Match. * @return string HTML A element with URL address. */ -function _make_web_ftp_clickable_cb($matches) { +function _make_web_ftp_clickable_cb( $matches ) { $ret = ''; $dest = $matches[2]; $dest = 'http://' . $dest; - $dest = esc_url($dest); - if ( empty($dest) ) - return $matches[0]; // removed trailing [.,;:)] from URL if ( in_array( substr($dest, -1), array('.', ',', ';', ':', ')') ) === true ) { $ret = substr($dest, -1); $dest = substr($dest, 0, strlen($dest)-1); } + + $dest = esc_url($dest); + if ( empty($dest) ) + return $matches[0]; + return $matches[1] . "<a href=\"$dest\" rel=\"nofollow\">$dest</a>$ret"; } /** * Callback to convert email address match to HTML A element. * - * This function was backported from 2.5.0 to 2.3.2. Regex callback for {@link - * make_clickable()}. + * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable(). * * @since 2.3.2 * @access private @@ -2008,7 +2555,7 @@ function _make_web_ftp_clickable_cb($matches) { * @param array $matches Single Regex Match. * @return string HTML A element with email address. */ -function _make_email_clickable_cb($matches) { +function _make_email_clickable_cb( $matches ) { $email = $matches[2] . '@' . $matches[3]; return $matches[1] . "<a href=\"mailto:$email\">$email</a>"; } @@ -2030,9 +2577,9 @@ function make_clickable( $text ) { $nested_code_pre = 0; // Keep track of how many levels link is nested inside <pre> or <code> foreach ( $textarr as $piece ) { - if ( preg_match( '|^<code[\s>]|i', $piece ) || preg_match( '|^<pre[\s>]|i', $piece ) ) + if ( preg_match( '|^<code[\s>]|i', $piece ) || preg_match( '|^<pre[\s>]|i', $piece ) || preg_match( '|^<script[\s>]|i', $piece ) || preg_match( '|^<style[\s>]|i', $piece ) ) $nested_code_pre++; - elseif ( ( '</code>' === strtolower( $piece ) || '</pre>' === strtolower( $piece ) ) && $nested_code_pre ) + elseif ( $nested_code_pre && ( '</code>' === strtolower( $piece ) || '</pre>' === strtolower( $piece ) || '</script>' === strtolower( $piece ) || '</style>' === strtolower( $piece ) ) ) $nested_code_pre--; if ( $nested_code_pre || empty( $piece ) || ( $piece[0] === '<' && ! preg_match( '|^<\s*[\w]{1,20}+://|', $piece ) ) ) { @@ -2079,8 +2626,7 @@ function make_clickable( $text ) { } // Cleanup of accidental links within links - $r = preg_replace( '#(<a([ \r\n\t]+[^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i', "$1$3</a>", $r ); - return $r; + return preg_replace( '#(<a([ \r\n\t]+[^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i', "$1$3</a>", $r ); } /** @@ -2109,7 +2655,7 @@ function make_clickable( $text ) { * @access private * * @param string $string The string to split. - * @param int $goal The desired chunk length. + * @param int $goal The desired chunk length. * @return array Numeric array of chunks. */ function _split_str_by_whitespace( $string, $goal ) { @@ -2151,8 +2697,7 @@ function wp_rel_nofollow( $text ) { // This is a pre save filter, so text is already escaped. $text = stripslashes($text); $text = preg_replace_callback('|<a (.+?)>|i', 'wp_rel_nofollow_callback', $text); - $text = wp_slash($text); - return $text; + return wp_slash( $text ); } /** @@ -2168,20 +2713,44 @@ function wp_rel_nofollow( $text ) { */ function wp_rel_nofollow_callback( $matches ) { $text = $matches[1]; - $text = str_replace(array(' rel="nofollow"', " rel='nofollow'"), '', $text); - return "<a $text rel=\"nofollow\">"; + $atts = shortcode_parse_atts( $matches[1] ); + $rel = 'nofollow'; + + if ( preg_match( '%href=["\'](' . preg_quote( set_url_scheme( home_url(), 'http' ) ) . ')%i', $text ) || + preg_match( '%href=["\'](' . preg_quote( set_url_scheme( home_url(), 'https' ) ) . ')%i', $text ) + ) { + return "<a $text>"; + } + + if ( ! empty( $atts['rel'] ) ) { + $parts = array_map( 'trim', explode( ' ', $atts['rel'] ) ); + if ( false === array_search( 'nofollow', $parts ) ) { + $parts[] = 'nofollow'; + } + $rel = implode( ' ', $parts ); + unset( $atts['rel'] ); + + $html = ''; + foreach ( $atts as $name => $value ) { + $html .= "{$name}=\"$value\" "; + } + $text = trim( $html ); + } + return "<a $text rel=\"$rel\">"; } /** * Convert one smiley code to the icon graphic file equivalent. * - * Callback handler for {@link convert_smilies()}. + * Callback handler for convert_smilies(). + * * Looks up one smiley code in the $wpsmiliestrans global array and returns an * `<img>` string for that smiley. * - * @global array $wpsmiliestrans * @since 2.8.0 * + * @global array $wpsmiliestrans + * * @param array $matches Single match. Smiley code to convert to image. * @return string Image string for smiley. */ @@ -2204,7 +2773,7 @@ function translate_smiley( $matches ) { } /** - * Filter the Smiley image URL before it's used in the image element. + * Filters the Smiley image URL before it's used in the image element. * * @since 2.9.0 * @@ -2224,7 +2793,8 @@ function translate_smiley( $matches ) { * used in the function isn't empty. * * @since 0.71 - * @uses $wp_smiliessearch + * + * @global string|array $wp_smiliessearch * * @param string $text Content to convert smilies from text. * @return string Converted content with text smilies replaced with images. @@ -2275,18 +2845,18 @@ function convert_smilies( $text ) { * * @since 0.71 * - * @param string $email Email address to verify. - * @param boolean $deprecated Deprecated. + * @param string $email Email address to verify. + * @param bool $deprecated Deprecated. * @return string|bool Either false or the valid email address. */ function is_email( $email, $deprecated = false ) { if ( ! empty( $deprecated ) ) - _deprecated_argument( __FUNCTION__, '3.0' ); + _deprecated_argument( __FUNCTION__, '3.0.0' ); // Test for the minimum length the email can be if ( strlen( $email ) < 3 ) { /** - * Filter whether an email address is valid. + * Filters whether an email address is valid. * * This filter is evaluated under several different contexts, such as 'email_too_short', * 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits', @@ -2296,7 +2866,6 @@ function is_email( $email, $deprecated = false ) { * * @param bool $is_email Whether the email address has passed the is_email() checks. Default false. * @param string $email The email address being checked. - * @param string $message An explanatory message to the user. * @param string $context Context under which the email was tested. */ return apply_filters( 'is_email', false, $email, 'email_too_short' ); @@ -2368,14 +2937,13 @@ function is_email( $email, $deprecated = false ) { * @param string $string Subject line * @return string Converted string to ASCII */ -function wp_iso_descrambler($string) { +function wp_iso_descrambler( $string ) { /* this may only work with iso-8859-1, I'm afraid */ if (!preg_match('#\=\?(.+)\?Q\?(.+)\?\=#i', $string, $matches)) { return $string; } else { $subject = str_replace('_', ' ', $matches[2]); - $subject = preg_replace_callback('#\=([0-9a-f]{2})#i', '_wp_iso_convert', $subject); - return $subject; + return preg_replace_callback( '#\=([0-9a-f]{2})#i', '_wp_iso_convert', $subject ); } } @@ -2410,13 +2978,19 @@ function get_gmt_from_date( $string, $format = 'Y-m-d H:i:s' ) { $tz = get_option( 'timezone_string' ); if ( $tz ) { $datetime = date_create( $string, new DateTimeZone( $tz ) ); - if ( ! $datetime ) + if ( ! $datetime ) { return gmdate( $format, 0 ); + } $datetime->setTimezone( new DateTimeZone( 'UTC' ) ); $string_gmt = $datetime->format( $format ); } else { - if ( ! preg_match( '#([0-9]{1,4})-([0-9]{1,2})-([0-9]{1,2}) ([0-9]{1,2}):([0-9]{1,2}):([0-9]{1,2})#', $string, $matches ) ) - return gmdate( $format, 0 ); + if ( ! preg_match( '#([0-9]{1,4})-([0-9]{1,2})-([0-9]{1,2}) ([0-9]{1,2}):([0-9]{1,2}):([0-9]{1,2})#', $string, $matches ) ) { + $datetime = strtotime( $string ); + if ( false === $datetime ) { + return gmdate( $format, 0 ); + } + return gmdate( $format, $datetime ); + } $string_time = gmmktime( $matches[4], $matches[5], $matches[6], $matches[2], $matches[3], $matches[1] ); $string_gmt = gmdate( $format, $string_time - get_option( 'gmt_offset' ) * HOUR_IN_SECONDS ); } @@ -2462,7 +3036,7 @@ function get_date_from_gmt( $string, $format = 'Y-m-d H:i:s' ) { * @param string $timezone Either 'Z' for 0 offset or '±hhmm'. * @return int|float The offset in seconds. */ -function iso8601_timezone_to_offset($timezone) { +function iso8601_timezone_to_offset( $timezone ) { // $timezone is either 'Z' or '[+|-]hhmm' if ($timezone == 'Z') { $offset = 0; @@ -2480,11 +3054,11 @@ function iso8601_timezone_to_offset($timezone) { * * @since 1.5.0 * - * @param string $date_string Date and time in ISO 8601 format {@link http://en.wikipedia.org/wiki/ISO_8601}. - * @param string $timezone Optional. If set to GMT returns the time minus gmt_offset. Default is 'user'. + * @param string $date_string Date and time in ISO 8601 format {@link https://en.wikipedia.org/wiki/ISO_8601}. + * @param string $timezone Optional. If set to GMT returns the time minus gmt_offset. Default is 'user'. * @return string The date and time in MySQL DateTime format - Y-m-d H:i:s. */ -function iso8601_to_datetime($date_string, $timezone = 'user') { +function iso8601_to_datetime( $date_string, $timezone = 'user' ) { $timezone = strtolower($timezone); if ($timezone == 'gmt') { @@ -2507,23 +3081,6 @@ function iso8601_to_datetime($date_string, $timezone = 'user') { } } -/** - * Adds a element attributes to open links in new windows. - * - * Comment text in popup windows should be filtered through this. Right now it's - * a moderately dumb function, ideally it would detect whether a target or rel - * attribute was already there and adjust its actions accordingly. - * - * @since 0.71 - * - * @param string $text Content to replace links to open in a new window. - * @return string Content that has filtered links. - */ -function popuplinks($text) { - $text = preg_replace('/<a (.+?)>/i', "<a $1 target='_blank' rel='external'>", $text); - return $text; -} - /** * Strips out all characters that are not allowable in an email. * @@ -2536,7 +3093,7 @@ function sanitize_email( $email ) { // Test for the minimum length the email can be if ( strlen( $email ) < 3 ) { /** - * Filter a sanitized email address. + * Filters a sanitized email address. * * This filter is evaluated under several contexts, including 'email_too_short', * 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits', @@ -2635,7 +3192,7 @@ function sanitize_email( $email ) { * @since 1.5.0 * * @param int $from Unix timestamp from which the difference begins. - * @param int $to Optional. Unix timestamp to end the time difference. Default becomes time() if not set. + * @param int $to Optional. Unix timestamp to end the time difference. Default becomes time() if not set. * @return string Human readable time difference. */ function human_time_diff( $from, $to = '' ) { @@ -2661,13 +3218,13 @@ function human_time_diff( $from, $to = '' ) { if ( $days <= 1 ) $days = 1; $since = sprintf( _n( '%s day', '%s days', $days ), $days ); - } elseif ( $diff < 30 * DAY_IN_SECONDS && $diff >= WEEK_IN_SECONDS ) { + } elseif ( $diff < MONTH_IN_SECONDS && $diff >= WEEK_IN_SECONDS ) { $weeks = round( $diff / WEEK_IN_SECONDS ); if ( $weeks <= 1 ) $weeks = 1; $since = sprintf( _n( '%s week', '%s weeks', $weeks ), $weeks ); - } elseif ( $diff < YEAR_IN_SECONDS && $diff >= 30 * DAY_IN_SECONDS ) { - $months = round( $diff / ( 30 * DAY_IN_SECONDS ) ); + } elseif ( $diff < YEAR_IN_SECONDS && $diff >= MONTH_IN_SECONDS ) { + $months = round( $diff / MONTH_IN_SECONDS ); if ( $months <= 1 ) $months = 1; $since = sprintf( _n( '%s month', '%s months', $months ), $months ); @@ -2679,7 +3236,7 @@ function human_time_diff( $from, $to = '' ) { } /** - * Filter the human readable difference between two timestamps. + * Filters the human readable difference between two timestamps. * * @since 4.0.0 * @@ -2698,15 +3255,15 @@ function human_time_diff( $from, $to = '' ) { * that, then the string ' […]' will be appended to the excerpt. If the string * is less than 55 words, then the content will be returned as is. * - * The 55 word limit can be modified by plugins/themes using the excerpt_length filter - * The ' […]' string can be modified by plugins/themes using the excerpt_more filter + * The 55 word limit can be modified by plugins/themes using the {@see 'excerpt_length'} filter + * The ' […]' string can be modified by plugins/themes using the {@see 'excerpt_more'} filter * * @since 1.5.0 * * @param string $text Optional. The excerpt. If set to empty, an excerpt is generated. * @return string The excerpt. */ -function wp_trim_excerpt($text = '') { +function wp_trim_excerpt( $text = '' ) { $raw_excerpt = $text; if ( '' == $text ) { $text = get_the_content(''); @@ -2718,7 +3275,7 @@ function wp_trim_excerpt($text = '') { $text = str_replace(']]>', ']]>', $text); /** - * Filter the number of words in an excerpt. + * Filters the number of words in an excerpt. * * @since 2.7.0 * @@ -2726,7 +3283,7 @@ function wp_trim_excerpt($text = '') { */ $excerpt_length = apply_filters( 'excerpt_length', 55 ); /** - * Filter the string in the "more" link displayed after a trimmed excerpt. + * Filters the string in the "more" link displayed after a trimmed excerpt. * * @since 2.9.0 * @@ -2736,7 +3293,7 @@ function wp_trim_excerpt($text = '') { $text = wp_trim_words( $text, $excerpt_length, $excerpt_more ); } /** - * Filter the trimmed excerpt string. + * Filters the trimmed excerpt string. * * @since 2.8.0 * @@ -2755,19 +3312,25 @@ function wp_trim_excerpt($text = '') { * * @since 3.3.0 * - * @param string $text Text to trim. - * @param int $num_words Number of words. Default 55. - * @param string $more Optional. What to append if $text needs to be trimmed. Default '…'. + * @param string $text Text to trim. + * @param int $num_words Number of words. Default 55. + * @param string $more Optional. What to append if $text needs to be trimmed. Default '…'. * @return string Trimmed text. */ function wp_trim_words( $text, $num_words = 55, $more = null ) { - if ( null === $more ) + if ( null === $more ) { $more = __( '…' ); + } + $original_text = $text; $text = wp_strip_all_tags( $text ); - /* translators: If your word count is based on single characters (East Asian characters), - enter 'characters'. Otherwise, enter 'words'. Do not translate into your own language. */ - if ( 'characters' == _x( 'words', 'word count: words or characters?' ) && preg_match( '/^utf\-?8$/i', get_option( 'blog_charset' ) ) ) { + + /* + * translators: If your word count is based on single characters (e.g. East Asian characters), + * enter 'characters_excluding_spaces' or 'characters_including_spaces'. Otherwise, enter 'words'. + * Do not translate into your own language. + */ + if ( strpos( _x( 'words', 'Word count type. Do not translate!' ), 'characters' ) === 0 && preg_match( '/^utf\-?8$/i', get_option( 'blog_charset' ) ) ) { $text = trim( preg_replace( "/[\n\r\t ]+/", ' ', $text ), ' ' ); preg_match_all( '/./u', $text, $words_array ); $words_array = array_slice( $words_array[0], 0, $num_words + 1 ); @@ -2776,6 +3339,7 @@ function wp_trim_words( $text, $num_words = 55, $more = null ) { $words_array = preg_split( "/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY ); $sep = ' '; } + if ( count( $words_array ) > $num_words ) { array_pop( $words_array ); $text = implode( $sep, $words_array ); @@ -2783,8 +3347,9 @@ function wp_trim_words( $text, $num_words = 55, $more = null ) { } else { $text = implode( $sep, $words_array ); } + /** - * Filter the text content after words have been trimmed. + * Filters the text content after words have been trimmed. * * @since 3.3.0 * @@ -2804,10 +3369,10 @@ function wp_trim_words( $text, $num_words = 55, $more = null ) { * @param string $text The text within which entities will be converted. * @return string Text with converted entities. */ -function ent2ncr($text) { +function ent2ncr( $text ) { /** - * Filter text before named entities are converted into numbered entities. + * Filters text before named entities are converted into numbered entities. * * A non-null string must be returned for the filter to be evaluated. * @@ -2817,7 +3382,7 @@ function ent2ncr($text) { * @param string $text The text prior to entity conversion. */ $filtered = apply_filters( 'pre_ent2ncr', null, $text ); - if( null !== $filtered ) + if ( null !== $filtered ) return $filtered; $to_ncr = array( @@ -3083,66 +3648,38 @@ function ent2ncr($text) { } /** - * Formats text for the rich text editor. - * - * The filter 'richedit_pre' is applied here. If $text is empty the filter will - * be applied to an empty string. + * Formats text for the editor. * - * @since 2.0.0 + * Generally the browsers treat everything inside a textarea as text, but + * it is still a good idea to HTML entity encode `<`, `>` and `&` in the content. * - * @param string $text The text to be formatted. - * @return string The formatted text after filter is applied. - */ -function wp_richedit_pre($text) { - if ( empty( $text ) ) { - /** - * Filter text returned for the rich text editor. - * - * This filter is first evaluated, and the value returned, if an empty string - * is passed to wp_richedit_pre(). If an empty string is passed, it results - * in a break tag and line feed. - * - * If a non-empty string is passed, the filter is evaluated on the wp_richedit_pre() - * return after being formatted. - * - * @since 2.0.0 - * - * @param string $output Text for the rich text editor. - */ - return apply_filters( 'richedit_pre', '' ); - } - - $output = convert_chars($text); - $output = wpautop($output); - $output = htmlspecialchars($output, ENT_NOQUOTES, get_option( 'blog_charset' ) ); - - /** This filter is documented in wp-includes/formatting.php */ - return apply_filters( 'richedit_pre', $output ); -} - -/** - * Formats text for the HTML editor. + * The filter {@see 'format_for_editor'} is applied here. If `$text` is empty the + * filter will be applied to an empty string. * - * Unless $output is empty it will pass through htmlspecialchars before the - * 'htmledit_pre' filter is applied. + * @since 4.3.0 * - * @since 2.5.0 + * @see _WP_Editors::editor() * - * @param string $output The text to be formatted. - * @return string Formatted text after filter applied. + * @param string $text The text to be formatted. + * @param string $default_editor The default editor for the current user. + * It is usually either 'html' or 'tinymce'. + * @return string The formatted text after filter is applied. */ -function wp_htmledit_pre($output) { - if ( !empty($output) ) - $output = htmlspecialchars($output, ENT_NOQUOTES, get_option( 'blog_charset' ) ); // convert only < > & +function format_for_editor( $text, $default_editor = null ) { + if ( $text ) { + $text = htmlspecialchars( $text, ENT_NOQUOTES, get_option( 'blog_charset' ) ); + } /** - * Filter the text before it is formatted for the HTML editor. + * Filters the text after it is formatted for the editor. * - * @since 2.5.0 + * @since 4.3.0 * - * @param string $output The HTML-formatted text. + * @param string $text The formatted text. + * @param string $default_editor The default editor for the current user. + * It is usually either 'html' or 'tinymce'. */ - return apply_filters( 'htmledit_pre', $output ); + return apply_filters( 'format_for_editor', $text, $default_editor ); } /** @@ -3155,8 +3692,9 @@ function wp_htmledit_pre($output) { * @since 2.8.1 * @access private * - * @param string|array $search The value being searched for, otherwise known as the needle. An array may be used to designate multiple needles. - * @param string $subject The string being searched and replaced on, otherwise known as the haystack. + * @param string|array $search The value being searched for, otherwise known as the needle. + * An array may be used to designate multiple needles. + * @param string $subject The string being searched and replaced on, otherwise known as the haystack. * @return string The string with the replaced svalues. */ function _deep_replace( $search, $subject ) { @@ -3178,6 +3716,9 @@ function _deep_replace( $search, $subject ) { * is preparing an array for use in an IN clause. * * @since 2.8.0 + * + * @global wpdb $wpdb WordPress database abstraction object. + * * @param string|array $data Unescaped data * @return string|array Escaped data */ @@ -3190,28 +3731,38 @@ function esc_sql( $data ) { * Checks and cleans a URL. * * A number of characters are removed from the URL. If the URL is for displaying - * (the default behaviour) ampersands are also replaced. The 'clean_url' filter + * (the default behaviour) ampersands are also replaced. The {@see 'clean_url'} filter * is applied to the returned cleaned URL. * * @since 2.8.0 * - * @param string $url The URL to be cleaned. - * @param array $protocols Optional. An array of acceptable protocols. - * Defaults to 'http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet', 'mms', 'rtsp', 'svn' if not set. - * @param string $_context Private. Use esc_url_raw() for database usage. - * @return string The cleaned $url after the 'clean_url' filter is applied. + * @param string $url The URL to be cleaned. + * @param array $protocols Optional. An array of acceptable protocols. + * Defaults to return value of wp_allowed_protocols() + * @param string $_context Private. Use esc_url_raw() for database usage. + * @return string The cleaned $url after the {@see 'clean_url'} filter is applied. */ function esc_url( $url, $protocols = null, $_context = 'display' ) { $original_url = $url; if ( '' == $url ) return $url; - $url = preg_replace('|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\\x80-\\xff]|i', '', $url); - $strip = array('%0d', '%0a', '%0D', '%0A'); - $url = _deep_replace($strip, $url); + + $url = str_replace( ' ', '%20', $url ); + $url = preg_replace('|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\[\]\\x80-\\xff]|i', '', $url); + + if ( '' === $url ) { + return $url; + } + + if ( 0 !== stripos( $url, 'mailto:' ) ) { + $strip = array('%0d', '%0a', '%0D', '%0A'); + $url = _deep_replace($strip, $url); + } + $url = str_replace(';//', '://', $url); /* If the URL doesn't appear to contain a scheme, we - * presume it needs http:// appended (unless a relative + * presume it needs http:// prepended (unless a relative * link starting with /, # or ? or a php file). */ if ( strpos($url, ':') === false && ! in_array( $url[0], array( '/', '#', '?' ) ) && @@ -3225,6 +3776,43 @@ function esc_url( $url, $protocols = null, $_context = 'display' ) { $url = str_replace( "'", ''', $url ); } + if ( ( false !== strpos( $url, '[' ) ) || ( false !== strpos( $url, ']' ) ) ) { + + $parsed = wp_parse_url( $url ); + $front = ''; + + if ( isset( $parsed['scheme'] ) ) { + $front .= $parsed['scheme'] . '://'; + } elseif ( '/' === $url[0] ) { + $front .= '//'; + } + + if ( isset( $parsed['user'] ) ) { + $front .= $parsed['user']; + } + + if ( isset( $parsed['pass'] ) ) { + $front .= ':' . $parsed['pass']; + } + + if ( isset( $parsed['user'] ) || isset( $parsed['pass'] ) ) { + $front .= '@'; + } + + if ( isset( $parsed['host'] ) ) { + $front .= $parsed['host']; + } + + if ( isset( $parsed['port'] ) ) { + $front .= ':' . $parsed['port']; + } + + $end_dirty = str_replace( $front, '', $url ); + $end_clean = str_replace( array( '[', ']' ), array( '%5B', '%5D' ), $end_dirty ); + $url = str_replace( $end_dirty, $end_clean, $url ); + + } + if ( '/' === $url[0] ) { $good_protocol_url = $url; } else { @@ -3236,7 +3824,7 @@ function esc_url( $url, $protocols = null, $_context = 'display' ) { } /** - * Filter a string cleaned and escaped for output as a URL. + * Filters a string cleaned and escaped for output as a URL. * * @since 2.3.0 * @@ -3252,8 +3840,8 @@ function esc_url( $url, $protocols = null, $_context = 'display' ) { * * @since 2.8.0 * - * @param string $url The URL to be cleaned. - * @param array $protocols An array of acceptable protocols. + * @param string $url The URL to be cleaned. + * @param array $protocols An array of acceptable protocols. * @return string The cleaned URL. */ function esc_url_raw( $url, $protocols = null ) { @@ -3263,14 +3851,14 @@ function esc_url_raw( $url, $protocols = null ) { /** * Convert entities, while preserving already-encoded entities. * - * @link http://www.php.net/htmlentities Borrowed from the PHP Manual user notes. + * @link https://secure.php.net/htmlentities Borrowed from the PHP Manual user notes. * * @since 1.2.2 * * @param string $myHTML The text to be converted. * @return string Converted text. */ -function htmlentities2($myHTML) { +function htmlentities2( $myHTML ) { $translation_table = get_html_translation_table( HTML_ENTITIES, ENT_QUOTES ); $translation_table[chr(38)] = '&'; return preg_replace( "/&(?![A-Za-z]{0,4}\w{2,3};|#[0-9]{2,3};)/", "&", strtr($myHTML, $translation_table) ); @@ -3281,7 +3869,7 @@ function htmlentities2($myHTML) { * * Escapes text strings for echoing in JS. It is intended to be used for inline JS * (in a tag attribute, for example onclick="..."). Note that the strings have to - * be in single quotes. The filter 'js_escape' is also applied here. + * be in single quotes. The {@see 'js_escape'} filter is also applied here. * * @since 2.8.0 * @@ -3295,7 +3883,7 @@ function esc_js( $text ) { $safe_text = str_replace( "\r", '', $safe_text ); $safe_text = str_replace( "\n", '\\n', addslashes( $safe_text ) ); /** - * Filter a string cleaned and escaped for output in JavaScript. + * Filters a string cleaned and escaped for output in JavaScript. * * Text passed to esc_js() is stripped of invalid or special characters, * and properly slashed for output. @@ -3320,7 +3908,7 @@ function esc_html( $text ) { $safe_text = wp_check_invalid_utf8( $text ); $safe_text = _wp_specialchars( $safe_text, ENT_QUOTES ); /** - * Filter a string cleaned and escaped for output in HTML. + * Filters a string cleaned and escaped for output in HTML. * * Text passed to esc_html() is stripped of invalid or special characters * before output. @@ -3345,7 +3933,7 @@ function esc_attr( $text ) { $safe_text = wp_check_invalid_utf8( $text ); $safe_text = _wp_specialchars( $safe_text, ENT_QUOTES ); /** - * Filter a string cleaned and escaped for output in an HTML attribute. + * Filters a string cleaned and escaped for output in an HTML attribute. * * Text passed to esc_attr() is stripped of invalid or special characters * before output. @@ -3369,7 +3957,7 @@ function esc_attr( $text ) { function esc_textarea( $text ) { $safe_text = htmlspecialchars( $text, ENT_QUOTES, get_option( 'blog_charset' ) ); /** - * Filter a string cleaned and escaped for output in a textarea element. + * Filters a string cleaned and escaped for output in a textarea element. * * @since 3.1.0 * @@ -3387,10 +3975,10 @@ function esc_textarea( $text ) { * @param string $tag_name * @return string */ -function tag_escape($tag_name) { +function tag_escape( $tag_name ) { $safe_tag = strtolower( preg_replace('/[^a-zA-Z0-9_:]/', '', $tag_name) ); /** - * Filter a string cleaned and escaped for output as an HTML tag. + * Filters a string cleaned and escaped for output as an HTML tag. * * @since 2.8.0 * @@ -3413,7 +4001,7 @@ function tag_escape($tag_name) { * @return string Absolute path. */ function wp_make_link_relative( $link ) { - return preg_replace( '|^(https?:)?//[^/]+(/.*)|i', '$2', $link ); + return preg_replace( '|^(https?:)?//[^/]+(/?.*)|i', '$2', $link ); } /** @@ -3424,12 +4012,16 @@ function wp_make_link_relative( $link ) { * * @since 2.0.5 * + * @global wpdb $wpdb WordPress database abstraction object. + * * @param string $option The name of the option. - * @param string $value The unsanitised value. + * @param string $value The unsanitised value. * @return string Sanitized value. */ -function sanitize_option($option, $value) { +function sanitize_option( $option, $value ) { global $wpdb; + + $original_value = $value; $error = ''; switch ( $option ) { @@ -3450,6 +4042,8 @@ function sanitize_option($option, $value) { case 'thumbnail_size_h': case 'medium_size_w': case 'medium_size_h': + case 'medium_large_size_w': + case 'medium_large_size_h': case 'large_size_w': case 'large_size_h': case 'mailserver_port': @@ -3465,6 +4059,7 @@ function sanitize_option($option, $value) { case 'thread_comments_depth': case 'users_can_register': case 'start_of_week': + case 'site_icon': $value = absint( $value ); break; @@ -3487,10 +4082,13 @@ function sanitize_option($option, $value) { case 'blogdescription': case 'blogname': $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); + if ( $value !== $original_value ) { + $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', wp_encode_emoji( $original_value ) ); + } + if ( is_wp_error( $value ) ) { $error = $value->get_error_message(); } else { - $value = wp_kses_post( $value ); $value = esc_html( $value ); } break; @@ -3623,6 +4221,14 @@ function sanitize_option($option, $value) { $value = esc_url_raw( $value ); $value = str_replace( 'http://', '', $value ); } + + if ( 'permalink_structure' === $option && '' !== $value && ! preg_match( '/%[^\/%]+%/', $value ) ) { + $error = sprintf( + /* translators: %s: Codex URL */ + __( 'A structure tag is required when using custom permalinks. <a href="%s">Learn more</a>' ), + __( 'https://codex.wordpress.org/Using_Permalinks#Choosing_your_permalink_structure' ) + ); + } break; case 'default_role' : @@ -3652,14 +4258,42 @@ function sanitize_option($option, $value) { } /** - * Filter an option value following sanitization. + * Filters an option value following sanitization. * * @since 2.3.0 + * @since 4.3.0 Added the `$original_value` parameter. * - * @param string $value The sanitized option value. - * @param string $option The option name. + * @param string $value The sanitized option value. + * @param string $option The option name. + * @param string $original_value The original value passed to the function. */ - $value = apply_filters( "sanitize_option_{$option}", $value, $option ); + return apply_filters( "sanitize_option_{$option}", $value, $option, $original_value ); +} + +/** + * Maps a function to all non-iterable elements of an array or an object. + * + * This is similar to `array_walk_recursive()` but acts upon objects too. + * + * @since 4.4.0 + * + * @param mixed $value The array, object, or scalar. + * @param callable $callback The function to map onto $value. + * @return mixed The value with the callback applied to all non-arrays and non-objects inside it. + */ +function map_deep( $value, $callback ) { + if ( is_array( $value ) ) { + foreach ( $value as $index => $item ) { + $value[ $index ] = map_deep( $item, $callback ); + } + } elseif ( is_object( $value ) ) { + $object_vars = get_object_vars( $value ); + foreach ( $object_vars as $property_name => $property_value ) { + $value->$property_name = map_deep( $property_value, $callback ); + } + } else { + $value = call_user_func( $callback, $value ); + } return $value; } @@ -3667,20 +4301,20 @@ function sanitize_option($option, $value) { /** * Parses a string into variables to be stored in an array. * - * Uses {@link http://www.php.net/parse_str parse_str()} and stripslashes if - * {@link http://www.php.net/magic_quotes magic_quotes_gpc} is on. + * Uses {@link https://secure.php.net/parse_str parse_str()} and stripslashes if + * {@link https://secure.php.net/magic_quotes magic_quotes_gpc} is on. * * @since 2.2.1 * * @param string $string The string to be parsed. - * @param array $array Variables will be stored in this array. + * @param array $array Variables will be stored in this array. */ function wp_parse_str( $string, &$array ) { parse_str( $string, $array ); if ( get_magic_quotes_gpc() ) $array = stripslashes_deep( $array ); /** - * Filter the array of variables derived from a parsed string. + * Filters the array of variables derived from a parsed string. * * @since 2.3.0 * @@ -3721,9 +4355,9 @@ function wp_pre_kses_less_than_callback( $matches ) { * WordPress implementation of PHP sprintf() with filters. * * @since 2.5.0 - * @link http://www.php.net/sprintf + * @link https://secure.php.net/sprintf * - * @param string $pattern The string which formatted args are inserted. + * @param string $pattern The string which formatted args are inserted. * @param mixed $args ,... Arguments to be formatted into the $pattern string. * @return string The formatted string. */ @@ -3765,7 +4399,7 @@ function wp_sprintf( $pattern ) { } /** - * Filter a fragment from the pattern passed to wp_sprintf(). + * Filters a fragment from the pattern passed to wp_sprintf(). * * If the fragment is unchanged, then sprintf() will be run on the fragment. * @@ -3798,10 +4432,10 @@ function wp_sprintf( $pattern ) { * @since 2.5.0 * * @param string $pattern Content containing '%l' at the beginning. - * @param array $args List items to prepend to the content and replace '%l'. + * @param array $args List items to prepend to the content and replace '%l'. * @return string Localized list items and rest of the content. */ -function wp_sprintf_l($pattern, $args) { +function wp_sprintf_l( $pattern, $args ) { // Not a match if ( substr($pattern, 0, 2) != '%l' ) return $pattern; @@ -3811,7 +4445,7 @@ function wp_sprintf_l($pattern, $args) { return ''; /** - * Filter the translated delimiters used by wp_sprintf_l(). + * Filters the translated delimiters used by wp_sprintf_l(). * Placeholders (%s) are included to assist translators and then * removed before the array of strings reaches the filter. * @@ -3856,9 +4490,9 @@ function wp_sprintf_l($pattern, $args) { * * @since 2.5.0 * - * @param string $str String to get the excerpt from. - * @param integer $count Maximum number of characters to take. - * @param string $more Optional. What to append if $str needs to be trimmed. Defaults to empty string. + * @param string $str String to get the excerpt from. + * @param int $count Maximum number of characters to take. + * @param string $more Optional. What to append if $str needs to be trimmed. Defaults to empty string. * @return string The excerpt. */ function wp_html_excerpt( $str, $count, $more = null ) { @@ -3881,9 +4515,11 @@ function wp_html_excerpt( $str, $count, $more = null ) { * * @since 2.7.0 * + * @global string $_links_add_base + * * @param string $content String to search for links in. - * @param string $base The base URL to prefix to links. - * @param array $attrs The attributes which should be processed. + * @param string $base The base URL to prefix to links. + * @param array $attrs The attributes which should be processed. * @return string The processed content. */ function links_add_base_url( $content, $base, $attrs = array('src', 'href') ) { @@ -3899,16 +4535,18 @@ function links_add_base_url( $content, $base, $attrs = array('src', 'href') ) { * @since 2.7.0 * @access private * + * @global string $_links_add_base + * * @param string $m The matched link. * @return string The processed link. */ -function _links_add_base($m) { +function _links_add_base( $m ) { global $_links_add_base; //1 = attribute name 2 = quotation mark 3 = URL return $m[1] . '=' . $m[2] . ( preg_match( '#^(\w{1,20}):#', $m[3], $protocol ) && in_array( $protocol[1], wp_allowed_protocols() ) ? $m[3] : - WP_HTTP::make_absolute_url( $m[3], $_links_add_base ) + WP_Http::make_absolute_url( $m[3], $_links_add_base ) ) . $m[2]; } @@ -3923,9 +4561,11 @@ function _links_add_base($m) { * * @since 2.7.0 * + * @global string $_links_add_target + * * @param string $content String to search for links in. - * @param string $target The Target to add to the links. - * @param array $tags An array of tags to apply to. + * @param string $target The Target to add to the links. + * @param array $tags An array of tags to apply to. * @return string The processed content. */ function links_add_target( $content, $target = '_blank', $tags = array('a') ) { @@ -3941,6 +4581,8 @@ function links_add_target( $content, $target = '_blank', $tags = array('a') ) { * @since 2.7.0 * @access private * + * @global string $_links_add_target + * * @param string $m The matched link. * @return string The processed link. */ @@ -3975,8 +4617,8 @@ function normalize_whitespace( $str ) { * * @since 2.9.0 * - * @param string $string String containing HTML tags - * @param bool $remove_breaks optional Whether to remove left over line breaks and white space chars + * @param string $string String containing HTML tags + * @param bool $remove_breaks Optional. Whether to remove left over line breaks and white space chars * @return string The processed string. */ function wp_strip_all_tags($string, $remove_breaks = false) { @@ -3990,20 +4632,23 @@ function wp_strip_all_tags($string, $remove_breaks = false) { } /** - * Sanitize a string from user input or from the db + * Sanitizes a string from user input or from the database. * - * check for invalid UTF-8, - * Convert single < characters to entity, - * strip all tags, - * remove line breaks, tabs and extra white space, - * strip octets. + * - Checks for invalid UTF-8, + * - Converts single `<` characters to entities + * - Strips all tags + * - Removes line breaks, tabs, and extra whitespace + * - Strips octets * * @since 2.9.0 * - * @param string $str - * @return string + * @see wp_check_invalid_utf8() + * @see wp_strip_all_tags() + * + * @param string $str String to sanitize. + * @return string Sanitized string. */ -function sanitize_text_field($str) { +function sanitize_text_field( $str ) { $filtered = wp_check_invalid_utf8( $str ); if ( strpos($filtered, '<') !== false ) { @@ -4026,7 +4671,7 @@ function sanitize_text_field($str) { } /** - * Filter a sanitized text field string. + * Filters a sanitized text field string. * * @since 2.9.0 * @@ -4041,7 +4686,7 @@ function sanitize_text_field($str) { * * @since 3.1.0 * - * @param string $path A path. + * @param string $path A path. * @param string $suffix If the filename ends in suffix this will also be cut off. * @return string */ @@ -4055,6 +4700,11 @@ function wp_basename( $path, $suffix = '' ) { * Violating our coding standards for a good function name. * * @since 3.0.0 + * + * @staticvar string|false $dblq + * + * @param string $text The text to be modified. + * @return string The modified text. */ function capital_P_dangit( $text ) { // Simple replacement for titles @@ -4063,13 +4713,13 @@ function capital_P_dangit( $text ) { return str_replace( 'Wordpress', 'WordPress', $text ); // Still here? Use the more judicious replacement static $dblq = false; - if ( false === $dblq ) + if ( false === $dblq ) { $dblq = _x( '“', 'opening curly double quote' ); + } return str_replace( array( ' Wordpress', '‘Wordpress', $dblq . 'Wordpress', '>Wordpress', '(Wordpress' ), array( ' WordPress', '‘WordPress', $dblq . 'WordPress', '>WordPress', '(WordPress' ), $text ); - } /** @@ -4083,7 +4733,7 @@ function capital_P_dangit( $text ) { function sanitize_mime_type( $mime_type ) { $sani_mime_type = preg_replace( '/[^-+*.a-zA-Z0-9\/]/', '', $mime_type ); /** - * Filter a mime type following sanitization. + * Filters a mime type following sanitization. * * @since 3.1.3 * @@ -4110,7 +4760,7 @@ function sanitize_trackback_urls( $to_ping ) { $urls_to_ping = array_map( 'esc_url_raw', $urls_to_ping ); $urls_to_ping = implode( "\n", $urls_to_ping ); /** - * Filter a list of trackback URLs following sanitization. + * Filters a list of trackback URLs following sanitization. * * The string returned here consists of a space or carriage return-delimited list * of trackback URLs. @@ -4171,7 +4821,7 @@ function wp_unslash( $value ) { * @since 3.6.0 * * @param string $content A string which might contain a URL. - * @return string The found URL. + * @return string|false The found URL. */ function get_url_in_content( $content ) { if ( empty( $content ) ) { @@ -4194,14 +4844,16 @@ function get_url_in_content( $content ) { * * @since 4.0.0 * + * @staticvar string $spaces + * * @return string The spaces regexp. */ function wp_spaces_regexp() { - static $spaces; + static $spaces = ''; if ( empty( $spaces ) ) { /** - * Filter the regexp for common whitespace characters. + * Filters the regexp for common whitespace characters. * * This string is substituted for the \s sequence as needed in regular * expressions. For websites not written in English, different characters @@ -4222,6 +4874,8 @@ function wp_spaces_regexp() { * Print the important emoji-related styles. * * @since 4.2.0 + * + * @staticvar bool $printed */ function print_emoji_styles() { static $printed = false; @@ -4249,8 +4903,13 @@ img.emoji { <?php } +/** + * Print the inline Emoji detection script if it is not already printed. + * + * @since 4.2.0 + * @staticvar bool $printed + */ function print_emoji_detection_script() { - global $wp_version; static $printed = false; if ( $printed ) { @@ -4259,30 +4918,62 @@ function print_emoji_detection_script() { $printed = true; + _print_emoji_detection_script(); +} + +/** + * Prints inline Emoji dection script + * + * @ignore + * @since 4.6.0 + * @access private + * + * @global string $wp_version WordPress version string. + */ +function _print_emoji_detection_script() { + global $wp_version; + $settings = array( /** - * Filter the URL where emoji images are hosted. + * Filters the URL where emoji png images are hosted. * * @since 4.2.0 * - * @param string The emoji base URL. + * @param string The emoji base URL for png images. */ - 'baseUrl' => apply_filters( 'emoji_url', set_url_scheme( '//s.w.org/images/core/emoji/72x72/' ) ), + 'baseUrl' => apply_filters( 'emoji_url', 'https://s.w.org/images/core/emoji/2/72x72/' ), /** - * Filter the extension of the emoji files. + * Filters the extension of the emoji png files. * * @since 4.2.0 * - * @param string The emoji extension. Default .png. + * @param string The emoji extension for png files. Default .png. */ 'ext' => apply_filters( 'emoji_ext', '.png' ), + + /** + * Filters the URL where emoji SVG images are hosted. + * + * @since 4.6.0 + * + * @param string The emoji base URL for svg images. + */ + 'svgUrl' => apply_filters( 'emoji_svg_url', 'https://s.w.org/images/core/emoji/2/svg/' ), + + /** + * Filters the extension of the emoji SVG files. + * + * @since 4.6.0 + * + * @param string The emoji extension for svg files. Default .svg. + */ + 'svgExt' => apply_filters( 'emoji_svg_ext', '.svg' ), ); $version = 'ver=' . $wp_version; - $develop_src = false !== strpos( $wp_version, '-src' ); - if ( $develop_src || ( defined( 'SCRIPT_DEBUG' ) && SCRIPT_DEBUG ) ) { + if ( SCRIPT_DEBUG ) { $settings['source'] = array( /** This filter is documented in wp-includes/class.wp-scripts.php */ 'wpemoji' => apply_filters( 'script_loader_src', includes_url( "js/wp-emoji.js?$version" ), 'wpemoji' ), @@ -4315,7 +5006,7 @@ function print_emoji_detection_script() { ?> <script type="text/javascript"> window._wpemojiSettings = <?php echo wp_json_encode( $settings ); ?>; - !function(a,b,c){function d(a){var c=b.createElement("canvas"),d=c.getContext&&c.getContext("2d");return d&&d.fillText?(d.textBaseline="top",d.font="600 32px Arial","flag"===a?(d.fillText(String.fromCharCode(55356,56812,55356,56807),0,0),c.toDataURL().length>3e3):(d.fillText(String.fromCharCode(55357,56835),0,0),0!==d.getImageData(16,16,1,1).data[0])):!1}function e(a){var c=b.createElement("script");c.src=a,c.type="text/javascript",b.getElementsByTagName("head")[0].appendChild(c)}var f,g;c.supports={simple:d("simple"),flag:d("flag")},c.DOMReady=!1,c.readyCallback=function(){c.DOMReady=!0},c.supports.simple&&c.supports.flag||(g=function(){c.readyCallback()},b.addEventListener?(b.addEventListener("DOMContentLoaded",g,!1),a.addEventListener("load",g,!1)):(a.attachEvent("onload",g),b.attachEvent("onreadystatechange",function(){"complete"===b.readyState&&c.readyCallback()})),f=c.source||{},f.concatemoji?e(f.concatemoji):f.wpemoji&&f.twemoji&&(e(f.twemoji),e(f.wpemoji)))}(window,document,window._wpemojiSettings); + !function(a,b,c){function d(a){var c,d,e,f,g,h=b.createElement("canvas"),i=h.getContext&&h.getContext("2d"),j=String.fromCharCode;if(!i||!i.fillText)return!1;switch(i.textBaseline="top",i.font="600 32px Arial",a){case"flag":return i.fillText(j(55356,56806,55356,56826),0,0),!(h.toDataURL().length<3e3)&&(i.clearRect(0,0,h.width,h.height),i.fillText(j(55356,57331,65039,8205,55356,57096),0,0),c=h.toDataURL(),i.clearRect(0,0,h.width,h.height),i.fillText(j(55356,57331,55356,57096),0,0),d=h.toDataURL(),c!==d);case"diversity":return i.fillText(j(55356,57221),0,0),e=i.getImageData(16,16,1,1).data,f=e[0]+","+e[1]+","+e[2]+","+e[3],i.fillText(j(55356,57221,55356,57343),0,0),e=i.getImageData(16,16,1,1).data,g=e[0]+","+e[1]+","+e[2]+","+e[3],f!==g;case"simple":return i.fillText(j(55357,56835),0,0),0!==i.getImageData(16,16,1,1).data[0];case"unicode8":return i.fillText(j(55356,57135),0,0),0!==i.getImageData(16,16,1,1).data[0];case"unicode9":return i.fillText(j(55358,56631),0,0),0!==i.getImageData(16,16,1,1).data[0]}return!1}function e(a){var c=b.createElement("script");c.src=a,c.type="text/javascript",b.getElementsByTagName("head")[0].appendChild(c)}var f,g,h,i;for(i=Array("simple","flag","unicode8","diversity","unicode9"),c.supports={everything:!0,everythingExceptFlag:!0},h=0;h<i.length;h++)c.supports[i[h]]=d(i[h]),c.supports.everything=c.supports.everything&&c.supports[i[h]],"flag"!==i[h]&&(c.supports.everythingExceptFlag=c.supports.everythingExceptFlag&&c.supports[i[h]]);c.supports.everythingExceptFlag=c.supports.everythingExceptFlag&&!c.supports.flag,c.DOMReady=!1,c.readyCallback=function(){c.DOMReady=!0},c.supports.everything||(g=function(){c.readyCallback()},b.addEventListener?(b.addEventListener("DOMContentLoaded",g,!1),a.addEventListener("load",g,!1)):(a.attachEvent("onload",g),b.attachEvent("onreadystatechange",function(){"complete"===b.readyState&&c.readyCallback()})),f=c.source||{},f.concatemoji?e(f.concatemoji):f.wpemoji&&f.twemoji&&(e(f.twemoji),e(f.wpemoji)))}(window,document,window._wpemojiSettings); </script> <?php } @@ -4349,7 +5040,7 @@ function wp_encode_emoji( $content ) { $matches = array(); if ( preg_match_all( $regex, $content, $matches ) ) { if ( ! empty( $matches[1] ) ) { - foreach( $matches[1] as $emoji ) { + foreach ( $matches[1] as $emoji ) { /* * UTF-32's hex encoding is the same as HTML's hex encoding. * So, by converting the emoji from UTF-8 to UTF-32, we magically @@ -4380,7 +5071,7 @@ function wp_staticize_emoji( $text ) { $text = wp_encode_emoji( $text ); /** This filter is documented in wp-includes/formatting.php */ - $cdn_url = apply_filters( 'emoji_url', set_url_scheme( '//s.w.org/images/core/emoji/72x72/' ) ); + $cdn_url = apply_filters( 'emoji_url', 'https://s.w.org/images/core/emoji/2/72x72/' ); /** This filter is documented in wp-includes/formatting.php */ $ext = apply_filters( 'emoji_ext', '.png' ); @@ -4516,3 +5207,88 @@ function wp_staticize_emoji_for_email( $mail ) { return $mail; } + +/** + * Shorten a URL, to be used as link text. + * + * @since 1.2.0 + * @since 4.4.0 Moved to wp-includes/formatting.php from wp-admin/includes/misc.php and added $length param. + * + * @param string $url URL to shorten. + * @param int $length Optional. Maximum length of the shortened URL. Default 35 characters. + * @return string Shortened URL. + */ +function url_shorten( $url, $length = 35 ) { + $stripped = str_replace( array( 'https://', 'http://', 'www.' ), '', $url ); + $short_url = untrailingslashit( $stripped ); + + if ( strlen( $short_url ) > $length ) { + $short_url = substr( $short_url, 0, $length - 3 ) . '…'; + } + return $short_url; +} + +/** + * Sanitizes a hex color. + * + * Returns either '', a 3 or 6 digit hex color (with #), or nothing. + * For sanitizing values without a #, see sanitize_hex_color_no_hash(). + * + * @since 3.4.0 + * + * @param string $color + * @return string|void + */ +function sanitize_hex_color( $color ) { + if ( '' === $color ) { + return ''; + } + + // 3 or 6 hex digits, or the empty string. + if ( preg_match('|^#([A-Fa-f0-9]{3}){1,2}$|', $color ) ) { + return $color; + } +} + +/** + * Sanitizes a hex color without a hash. Use sanitize_hex_color() when possible. + * + * Saving hex colors without a hash puts the burden of adding the hash on the + * UI, which makes it difficult to use or upgrade to other color types such as + * rgba, hsl, rgb, and html color names. + * + * Returns either '', a 3 or 6 digit hex color (without a #), or null. + * + * @since 3.4.0 + * + * @param string $color + * @return string|null + */ +function sanitize_hex_color_no_hash( $color ) { + $color = ltrim( $color, '#' ); + + if ( '' === $color ) { + return ''; + } + + return sanitize_hex_color( '#' . $color ) ? $color : null; +} + +/** + * Ensures that any hex color is properly hashed. + * Otherwise, returns value untouched. + * + * This method should only be necessary if using sanitize_hex_color_no_hash(). + * + * @since 3.4.0 + * + * @param string $color + * @return string + */ +function maybe_hash_hex_color( $color ) { + if ( $unhashed = sanitize_hex_color_no_hash( $color ) ) { + return '#' . $unhashed; + } + + return $color; +}