X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/58f607a1de715c9bca69340a4d6fb9e1b9c2bed2..5d244c8fd9a27c9f89dd08da2af6fbc67d4fce63:/wp-includes/formatting.php diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 58c826f9..7bd354af 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -5,41 +5,81 @@ * Handles many functions for formatting output. * * @package WordPress - **/ + */ /** * Replaces common plain text characters into formatted entities * * As an example, - * - * 'cause today's effort makes it worth tomorrow's "holiday"... - * + * + * 'cause today's effort makes it worth tomorrow's "holiday" ... + * * Becomes: - * - * ’cause today’s effort makes it worth tomorrow’s “holiday”… - * + * + * ’cause today’s effort makes it worth tomorrow’s “holiday” … + * * Code within certain html blocks are skipped. * * @since 0.71 * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases * * @param string $text The text to be formatted + * @param bool $reset Set to true for unit testing. Translated patterns will reset. * @return string The string replaced with html entities */ -function wptexturize($text) { - global $wp_cockneyreplace; - static $static_setup = false, $opening_quote, $closing_quote, $default_no_texturize_tags, $default_no_texturize_shortcodes, $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements; - $output = ''; - $curl = ''; - $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); - $stop = count($textarr); +function wptexturize($text, $reset = false) { + global $wp_cockneyreplace, $shortcode_tags; + static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements, + $default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true; + + // If there's nothing to do, just stop. + if ( empty( $text ) || false === $run_texturize ) { + return $text; + } + + // Set up static variables. Run once only. + if ( $reset || ! isset( $static_characters ) ) { + /** + * Filter whether to skip running wptexturize(). + * + * Passing false to the filter will effectively short-circuit wptexturize(). + * returning the original text passed to the function instead. + * + * The filter runs only once, the first time wptexturize() is called. + * + * @since 4.0.0 + * + * @see wptexturize() + * + * @param bool $run_texturize Whether to short-circuit wptexturize(). + */ + $run_texturize = apply_filters( 'run_wptexturize', $run_texturize ); + if ( false === $run_texturize ) { + return $text; + } + + /* translators: opening curly double quote */ + $opening_quote = _x( '“', 'opening curly double quote' ); + /* translators: closing curly double quote */ + $closing_quote = _x( '”', 'closing curly double quote' ); + + /* translators: apostrophe, for example in 'cause or can't */ + $apos = _x( '’', 'apostrophe' ); + + /* translators: prime, for example in 9' (nine feet) */ + $prime = _x( '′', 'prime' ); + /* translators: double prime, for example in 9" (nine inches) */ + $double_prime = _x( '″', 'double prime' ); - // No need to set up these variables more than once - if (!$static_setup) { - /* translators: opening curly quote */ - $opening_quote = _x('“', 'opening curly quote'); - /* translators: closing curly quote */ - $closing_quote = _x('”', 'closing curly quote'); + /* translators: opening curly single quote */ + $opening_single_quote = _x( '‘', 'opening curly single quote' ); + /* translators: closing curly single quote */ + $closing_single_quote = _x( '’', 'closing curly single quote' ); + + /* translators: en dash */ + $en_dash = _x( '–', 'en dash' ); + /* translators: em dash */ + $em_dash = _x( '—', 'em dash' ); $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); $default_no_texturize_shortcodes = array('code'); @@ -48,75 +88,259 @@ function wptexturize($text) { if ( isset($wp_cockneyreplace) ) { $cockney = array_keys($wp_cockneyreplace); $cockneyreplace = array_values($wp_cockneyreplace); + } elseif ( "'" != $apos ) { // Only bother if we're doing a replacement. + $cockney = array( "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause" ); + $cockneyreplace = array( $apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause" ); } else { - $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause"); - $cockneyreplace = array("’tain’t","’twere","’twas","’tis","’twill","’til","’bout","’nuff","’round","’cause"); + $cockney = $cockneyreplace = array(); } - $static_characters = array_merge(array('---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)'), $cockney); - $static_replacements = array_merge(array('—', ' — ', '–', ' – ', 'xn--', '…', $opening_quote, $closing_quote, ' ™'), $cockneyreplace); + $static_characters = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney ); + $static_replacements = array_merge( array( '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); + - $dynamic_characters = array('/\'(\d\d(?:’|\')?s)/', '/\'(\d+)/', '/(\s|\A|[([{<]|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A|[([{<])"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/\b(\d+)x(\d+)\b/'); - $dynamic_replacements = array('’$1','’$1', '$1‘', '$1″', '$1′', '$1’$2', '$1' . $opening_quote . '$2', $closing_quote . '$1', '’$1', '$1×$2'); + // Pattern-based replacements of characters. + // Sort the remaining patterns into several arrays for performance tuning. + $dynamic_characters = array( 'apos' => array(), 'quote' => array(), 'dash' => array() ); + $dynamic_replacements = array( 'apos' => array(), 'quote' => array(), 'dash' => array() ); + $dynamic = array(); + $spaces = wp_spaces_regexp(); + + // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. + if ( "'" !== $apos || "'" !== $closing_single_quote ) { + $dynamic[ '/\'(\d\d)\'(?=\Z|[.,)}\-\]]|>|' . $spaces . ')/' ] = $apos . '$1' . $closing_single_quote; + } + if ( "'" !== $apos || '"' !== $closing_quote ) { + $dynamic[ '/\'(\d\d)"(?=\Z|[.,)}\-\]]|>|' . $spaces . ')/' ] = $apos . '$1' . $closing_quote; + } + + // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. + if ( "'" !== $apos ) { + $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos; + } + + // Quoted Numbers like '0.42' + if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { + $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $opening_single_quote . '$1' . $closing_single_quote; + } + + // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. + if ( "'" !== $opening_single_quote ) { + $dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $opening_single_quote; + } + + // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. + if ( "'" !== $apos ) { + $dynamic[ '/(?'); - elseif ('[' == $curl{0}) - _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); - } + // Look for shortcodes and HTML elements. + + $tagnames = array_keys( $shortcode_tags ); + $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); + $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). + + $comment_regex = + '!' // Start of comment, after the <. + . '(?:' // Unroll the loop: Consume everything until --> is found. + . '-(?!->)' // Dash not followed by end of comment. + . '[^\-]*+' // Consume non-dashes. + . ')*+' // Loop possessively. + . '(?:-->)?'; // End of comment. If not found, match all input. + + $shortcode_regex = + '\[' // Find start of shortcode. + . '[\/\[]?' // Shortcodes may begin with [/ or [[ + . $tagregexp // Only match registered shortcodes, because performance. + . '(?:' + . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. + . '|' + . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. + . ')*+' // Possessive critical. + . '\]' // Find end of shortcode. + . '\]?'; // Shortcodes may end with ]] + + $regex = + '/(' // Capture the entire match. + . '<' // Find start of element. + . '(?(?=!--)' // Is this a comment? + . $comment_regex // Find end of comment. + . '|' + . '[^>]+>' // Find end of element. + . ')' + . '|' + . $shortcode_regex // Find shortcodes. + . ')/s'; + + $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); + + foreach ( $textarr as &$curl ) { + // Only call _wptexturize_pushpop_element if $curl is a delimiter. + $first = $curl[0]; + if ( '<' === $first && '