X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/41578db67d72562346e4dbb2a14889b23d522813..refs/tags/wordpress-4.4:/wp-includes/formatting.php?ds=inline diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 5efe0cda..ff4167d2 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -11,28 +11,79 @@ * Replaces common plain text characters into formatted entities * * As an example, - * - * 'cause today's effort makes it worth tomorrow's "holiday"... - * + * + * 'cause today's effort makes it worth tomorrow's "holiday" ... + * * Becomes: - * - * ’cause today’s effort makes it worth tomorrow’s “holiday”… - * + * + * ’cause today’s effort makes it worth tomorrow’s “holiday” … + * * Code within certain html blocks are skipped. * + * Do not use this function before the 'init' action hook; everything will break. + * * @since 0.71 - * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases + * + * @global array $wp_cockneyreplace Array of formatted entities for certain common phrases + * @global array $shortcode_tags + * @staticvar array $static_characters + * @staticvar array $static_replacements + * @staticvar array $dynamic_characters + * @staticvar array $dynamic_replacements + * @staticvar array $default_no_texturize_tags + * @staticvar array $default_no_texturize_shortcodes + * @staticvar bool $run_texturize * * @param string $text The text to be formatted + * @param bool $reset Set to true for unit testing. Translated patterns will reset. * @return string The string replaced with html entities */ -function wptexturize($text) { - global $wp_cockneyreplace; - static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements, - $default_no_texturize_tags, $default_no_texturize_shortcodes; +function wptexturize( $text, $reset = false ) { + global $wp_cockneyreplace, $shortcode_tags; + static $static_characters = null, + $static_replacements = null, + $dynamic_characters = null, + $dynamic_replacements = null, + $default_no_texturize_tags = null, + $default_no_texturize_shortcodes = null, + $run_texturize = true, + $apos = null, + $prime = null, + $double_prime = null, + $opening_quote = null, + $closing_quote = null, + $opening_single_quote = null, + $closing_single_quote = null, + $open_q_flag = '', + $open_sq_flag = '', + $apos_flag = ''; + + // If there's nothing to do, just stop. + if ( empty( $text ) || false === $run_texturize ) { + return $text; + } + + // Set up static variables. Run once only. + if ( $reset || ! isset( $static_characters ) ) { + /** + * Filter whether to skip running wptexturize(). + * + * Passing false to the filter will effectively short-circuit wptexturize(). + * returning the original text passed to the function instead. + * + * The filter runs only once, the first time wptexturize() is called. + * + * @since 4.0.0 + * + * @see wptexturize() + * + * @param bool $run_texturize Whether to short-circuit wptexturize(). + */ + $run_texturize = apply_filters( 'run_wptexturize', $run_texturize ); + if ( false === $run_texturize ) { + return $text; + } - // No need to set up these static variables more than once - if ( ! isset( $static_characters ) ) { /* translators: opening curly double quote */ $opening_quote = _x( '“', 'opening curly double quote' ); /* translators: closing curly double quote */ @@ -61,76 +112,87 @@ function wptexturize($text) { // if a plugin has provided an autocorrect array, use it if ( isset($wp_cockneyreplace) ) { - $cockney = array_keys($wp_cockneyreplace); - $cockneyreplace = array_values($wp_cockneyreplace); - } elseif ( "'" != $apos ) { // Only bother if we're doing a replacement. - $cockney = array( "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause" ); - $cockneyreplace = array( $apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause" ); + $cockney = array_keys( $wp_cockneyreplace ); + $cockneyreplace = array_values( $wp_cockneyreplace ); } else { - $cockney = $cockneyreplace = array(); - } + /* translators: This is a comma-separated list of words that defy the syntax of quotations in normal use, + * for example... 'We do not have enough words yet' ... is a typical quoted phrase. But when we write + * lines of code 'til we have enough of 'em, then we need to insert apostrophes instead of quotes. + */ + $cockney = explode( ',', _x( "'tain't,'twere,'twas,'tis,'twill,'til,'bout,'nuff,'round,'cause,'em", + 'Comma-separated list of words to texturize in your language' ) ); - $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney ); - $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); + $cockneyreplace = explode( ',', _x( '’tain’t,’twere,’twas,’tis,’twill,’til,’bout,’nuff,’round,’cause,’em', + 'Comma-separated list of replacement words in your language' ) ); + } - /* - * Regex for common whitespace characters. - * - * By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp. - * This is designed to replace the PCRE \s sequence. In #WP22692, that sequence - * was found to be unreliable due to random inclusion of the A0 byte. - */ - $spaces = '[\r\n\t ]|\xC2\xA0| '; + $static_characters = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney ); + $static_replacements = array_merge( array( '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); // Pattern-based replacements of characters. + // Sort the remaining patterns into several arrays for performance tuning. + $dynamic_characters = array( 'apos' => array(), 'quote' => array(), 'dash' => array() ); + $dynamic_replacements = array( 'apos' => array(), 'quote' => array(), 'dash' => array() ); $dynamic = array(); + $spaces = wp_spaces_regexp(); - // '99 '99s '99's (apostrophe) - if ( "'" !== $apos ) { - $dynamic[ '/\'(?=\d)/' ] = $apos; + // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. + if ( "'" !== $apos || "'" !== $closing_single_quote ) { + $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote; + } + if ( "'" !== $apos || '"' !== $closing_quote ) { + $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote; } - // Single quote at start, or preceded by (, {, <, [, ", or spaces. - if ( "'" !== $opening_single_quote ) { - $dynamic[ '/(?<=\A|[([{<"]|' . $spaces . ')\'/' ] = $opening_single_quote; + // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. + if ( "'" !== $apos ) { + $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos_flag; } - // 9" (double prime) - if ( '"' !== $double_prime ) { - $dynamic[ '/(?<=\d)"/' ] = $double_prime; + // Quoted Numbers like '0.42' + if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { + $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote; } - // 9' (prime) - if ( "'" !== $prime ) { - $dynamic[ '/(?<=\d)\'/' ] = $prime; + // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. + if ( "'" !== $opening_single_quote ) { + $dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $open_sq_flag; } - // Apostrophe in a word. No spaces or double primes. + // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. if ( "'" !== $apos ) { - $dynamic[ '/(?|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + // Look for shortcodes and HTML elements. - foreach ( $textarr as &$curl ) { - if ( empty( $curl ) ) { - continue; - } + preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20]++)@', $text, $matches ); + $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); + $found_shortcodes = ! empty( $tagnames ); + $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : ''; + $regex = _get_wptexturize_split_regex( $shortcode_regex ); - // Only call _wptexturize_pushpop_element if first char is correct tag opening + $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); + + foreach ( $textarr as &$curl ) { + // Only call _wptexturize_pushpop_element if $curl is a delimiter. $first = $curl[0]; if ( '<' === $first ) { - _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); - } elseif ( '[' === $first ) { - _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); - } elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) { + if ( ''; + $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|>|" . $spaces . ")/"; + $prime_pattern = "/(?<=\\d)$needle/"; + $flag_after_digit = "/(?<=\\d)$flag/"; + $flag_no_digit = "/(? &$sentence ) { + if ( false === strpos( $sentence, $needle ) ) { + continue; + } elseif ( 0 !== $key && 0 === substr_count( $sentence, $close_quote ) ) { + $sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count ); + if ( $count > 1 ) { + // This sentence appears to have multiple closing quotes. Attempt Vulcan logic. + $sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 ); + if ( 0 === $count2 ) { + // Try looking for a quote followed by a period. + $count2 = substr_count( $sentence, "$flag." ); + if ( $count2 > 0 ) { + // Assume the rightmost quote-period match is the end of quotation. + $pos = strrpos( $sentence, "$flag." ); + } else { + // When all else fails, make the rightmost candidate a closing quote. + // This is most likely to be problematic in the context of bug #18549. + $pos = strrpos( $sentence, $flag ); + } + $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) ); + } + // Use conventional replacement on any remaining primes and quotes. + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + $sentence = preg_replace( $flag_after_digit, $prime, $sentence ); + $sentence = str_replace( $flag, $close_quote, $sentence ); + } elseif ( 1 == $count ) { + // Found only one closing quote candidate, so give it priority over primes. + $sentence = str_replace( $flag, $close_quote, $sentence ); + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + } else { + // No closing quotes found. Just run primes pattern. + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + } + } else { + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + $sentence = preg_replace( $quote_pattern, $close_quote, $sentence ); + } + if ( '"' == $needle && false !== strpos( $sentence, '"' ) ) { + $sentence = str_replace( '"', $close_quote, $sentence ); + } + } + + return implode( $open_quote, $sentences ); +} + /** * Search for disabled element tags. Push element to stack on tag open and pop - * on tag close. Assumes first character of $text is tag opening. + * on tag close. + * + * Assumes first char of $text is tag opening and last char is tag closing. + * Assumes second char of $text is optionally '/' to indicate closing as in . * * @since 2.9.0 * @access private * - * @param string $text Text to check. First character is assumed to be $opening - * @param array $stack Array used as stack of opened tag elements - * @param string $disabled_elements Tags to match against formatted as regexp sub-expression - * @param string $opening Tag opening character, assumed to be 1 character long - * @param string $closing Tag closing character - */ -function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') { - // Check if it is a closing tag -- otherwise assume opening tag - if (strncmp($opening . '/', $text, 2)) { - // Opening? Check $text+1 against disabled elements - if (preg_match('/^' . $disabled_elements . '\b/', substr($text, 1), $matches)) { + * @param string $text Text to check. Must be a tag like `` or `[shortcode]`. + * @param array $stack List of open tag elements. + * @param array $disabled_elements The tag names to match against. Spaces are not allowed in tag names. + */ +function _wptexturize_pushpop_element( $text, &$stack, $disabled_elements ) { + // Is it an opening tag or closing tag? + if ( '/' !== $text[1] ) { + $opening_tag = true; + $name_offset = 1; + } elseif ( 0 == count( $stack ) ) { + // Stack is empty. Just stop. + return; + } else { + $opening_tag = false; + $name_offset = 2; + } + + // Parse out the tag name. + $space = strpos( $text, ' ' ); + if ( false === $space ) { + $space = -1; + } else { + $space -= $name_offset; + } + $tag = substr( $text, $name_offset, $space ); + + // Handle disabled tags. + if ( in_array( $tag, $disabled_elements ) ) { + if ( $opening_tag ) { /* * This disables texturize until we find a closing tag of our type * (e.g.
) even if there was invalid nesting before that
@@ -212,18 +401,9 @@ function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $openi
 			 *          "baba" won't be texturize
 			 */
 
-			array_push($stack, $matches[1]);
-		}
-	} else {
-		// Closing? Check $text+2 against disabled elements
-		$c = preg_quote($closing, '/');
-		if (preg_match('/^' . $disabled_elements . $c . '/', substr($text, 2), $matches)) {
-			$last = array_pop($stack);
-
-			// Make sure it matches the opening tag
-			if ( $last != $matches[1] ) {
-				array_push( $stack, $last );
-			}
+			array_push( $stack, $tag );
+		} elseif ( end( $stack ) == $tag ) {
+			array_pop( $stack );
 		}
 	}
 }
@@ -232,24 +412,29 @@ function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $openi
  * Replaces double line-breaks with paragraph elements.
  *
  * A group of regex replaces used to identify text formatted with newlines and
- * replace double line-breaks with HTML paragraph tags. The remaining
- * line-breaks after conversion become <
> tags, unless $br is set to '0' - * or 'false'. + * replace double line-breaks with HTML paragraph tags. The remaining line-breaks + * after conversion become <
> tags, unless $br is set to '0' or 'false'. * * @since 0.71 * * @param string $pee The text which has to be formatted. - * @param bool $br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true. + * @param bool $br Optional. If set, this will convert all remaining line-breaks + * after paragraphing. Default true. * @return string Text which has been converted into correct paragraph tags. */ -function wpautop($pee, $br = true) { +function wpautop( $pee, $br = true ) { $pre_tags = array(); if ( trim($pee) === '' ) return ''; - $pee = $pee . "\n"; // just to make things a little easier, pad the end + // Just to make things a little easier, pad the end. + $pee = $pee . "\n"; + /* + * Pre tags shouldn't be touched by autop. + * Replace pre tags with placeholders and bring them back after autop. + */ if ( strpos($pee, '', $pee ); $last_pee = array_pop($pee_parts); @@ -274,62 +459,305 @@ function wpautop($pee, $br = true) { $pee .= $last_pee; } + // Change multiple
s into two line breaks, which will turn into paragraphs. + $pee = preg_replace('|\s*|', "\n\n", $pee); + + $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'; - $pee = preg_replace('|
\s*
|', "\n\n", $pee); - // Space things out a little - $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|noscript|legend|section|article|aside|hgroup|header|footer|nav|figure|details|menu|summary)'; - $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee); + // Add a single line break above block-level opening tags. + $pee = preg_replace('!(<' . $allblocks . '[\s/>])!', "\n$1", $pee); + + // Add a double line break below block-level closing tags. $pee = preg_replace('!()!', "$1\n\n", $pee); - $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines + // Standardize newline characters to "\n". + $pee = str_replace(array("\r\n", "\r"), "\n", $pee); + + // Find newlines in all elements and add placeholders. + $pee = wp_replace_in_html_tags( $pee, array( "\n" => " " ) ); + + // Collapse line breaks before and after ', $pee ); + } + + /* + * Collapse line breaks inside elements, before and elements + * so they don't get autop'd. + */ if ( strpos( $pee, '' ) !== false ) { - // no P/BR around param and embed $pee = preg_replace( '|(]*>)\s*|', '$1', $pee ); $pee = preg_replace( '|\s*|', '', $pee ); $pee = preg_replace( '%\s*(]*>)\s*%', '$1', $pee ); } + /* + * Collapse line breaks inside