X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/41578db67d72562346e4dbb2a14889b23d522813..9e77185fafaf4e60e2b73821e0e4b9b1a11fb85f:/wp-includes/formatting.php diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 5efe0cda..1f18ab9d 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -24,15 +24,40 @@ * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases * * @param string $text The text to be formatted + * @param bool $reset Set to true for unit testing. Translated patterns will reset. * @return string The string replaced with html entities */ -function wptexturize($text) { +function wptexturize($text, $reset = false) { global $wp_cockneyreplace; static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements, - $default_no_texturize_tags, $default_no_texturize_shortcodes; + $default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true; + + // If there's nothing to do, just stop. + if ( empty( $text ) || false === $run_texturize ) { + return $text; + } + + // Set up static variables. Run once only. + if ( $reset || ! isset( $static_characters ) ) { + /** + * Filter whether to skip running wptexturize(). + * + * Passing false to the filter will effectively short-circuit wptexturize(). + * returning the original text passed to the function instead. + * + * The filter runs only once, the first time wptexturize() is called. + * + * @since 4.0.0 + * + * @see wptexturize() + * + * @param bool $run_texturize Whether to short-circuit wptexturize(). + */ + $run_texturize = apply_filters( 'run_wptexturize', $run_texturize ); + if ( false === $run_texturize ) { + return $text; + } - // No need to set up these static variables more than once - if ( ! isset( $static_characters ) ) { /* translators: opening curly double quote */ $opening_quote = _x( '“', 'opening curly double quote' ); /* translators: closing curly double quote */ @@ -70,35 +95,43 @@ function wptexturize($text) { $cockney = $cockneyreplace = array(); } - $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney ); - $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); - - /* - * Regex for common whitespace characters. - * - * By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp. - * This is designed to replace the PCRE \s sequence. In #WP22692, that sequence - * was found to be unreliable due to random inclusion of the A0 byte. - */ - $spaces = '[\r\n\t ]|\xC2\xA0| '; + $static_characters = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney ); + $static_replacements = array_merge( array( '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); // Pattern-based replacements of characters. + // Sort the remaining patterns into several arrays for performance tuning. + $dynamic_characters = array( 'apos' => array(), 'quote' => array(), 'dash' => array() ); + $dynamic_replacements = array( 'apos' => array(), 'quote' => array(), 'dash' => array() ); $dynamic = array(); + $spaces = wp_spaces_regexp(); + + // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. + if ( "'" !== $apos || "'" !== $closing_single_quote ) { + $dynamic[ '/\'(\d\d)\'(?=\Z|[.,)}\-\]]|>|' . $spaces . ')/' ] = $apos . '$1' . $closing_single_quote; + } + if ( "'" !== $apos || '"' !== $closing_quote ) { + $dynamic[ '/\'(\d\d)"(?=\Z|[.,)}\-\]]|>|' . $spaces . ')/' ] = $apos . '$1' . $closing_quote; + } - // '99 '99s '99's (apostrophe) + // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. if ( "'" !== $apos ) { - $dynamic[ '/\'(?=\d)/' ] = $apos; + $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos; } - // Single quote at start, or preceded by (, {, <, [, ", or spaces. + // Quoted Numbers like '0.42' + if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { + $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $opening_single_quote . '$1' . $closing_single_quote; + } + + // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. if ( "'" !== $opening_single_quote ) { - $dynamic[ '/(?<=\A|[([{<"]|' . $spaces . ')\'/' ] = $opening_single_quote; + $dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $opening_single_quote; } - // 9" (double prime) - if ( '"' !== $double_prime ) { - $dynamic[ '/(?<=\d)"/' ] = $double_prime; + // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. + if ( "'" !== $apos ) { + $dynamic[ '/(?|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + // Look for shortcodes and HTML elements. + + $regex = '/(' // Capture the entire match. + . '<' // Find start of element. + . '(?(?=!--)' // Is this a comment? + . '.+?--\s*>' // Find end of comment + . '|' + . '[^>]+>' // Find end of element + . ')' + . '|' + . '\[' // Find start of shortcode. + . '\[?' // Shortcodes may begin with [[ + . '(?:' + . '[^\[\]<>]' // Shortcodes do not contain other shortcodes. + . '|' + . '<[^>]+>' // HTML elements permitted. Prevents matching ] before >. + . ')++' + . '\]' // Find end of shortcode. + . '\]?' // Shortcodes may end with ]] + . ')/s'; + + $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); foreach ( $textarr as &$curl ) { - if ( empty( $curl ) ) { + // Only call _wptexturize_pushpop_element if $curl is a delimiter. + $first = $curl[0]; + if ( '<' === $first && '>' === substr( $curl, -1 ) ) { + // This is an HTML delimiter. + + if ( '