X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/76aea3697c6043c1613370f172395b4f65ee71f0..8d3bb1a5dcfdea9857d3c88c3751f09593e34dc8:/wp-includes/formatting.php diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index eeafe0b4..2701c1a1 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -1,879 +1,3398 @@ |\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); - $stop = count($textarr); - - // if a plugin has provided an autocorrect array, use it - if ( isset($wp_cockneyreplace) ) { - $cockney = array_keys($wp_cockneyreplace); - $cockneyreplace = array_values($wp_cockneyreplace); - } else { - $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause"); - $cockneyreplace = array("’tain’t","’twere","’twas","’tis","’twill","’til","’bout","’nuff","’round","’cause"); +/** + * Replaces common plain text characters into formatted entities + * + * As an example, + * + * 'cause today's effort makes it worth tomorrow's "holiday" ... + * + * Becomes: + * + * ’cause today’s effort makes it worth tomorrow’s “holiday” … + * + * Code within certain html blocks are skipped. + * + * Do not use this function before the {@see 'init'} action hook; everything will break. + * + * @since 0.71 + * + * @global array $wp_cockneyreplace Array of formatted entities for certain common phrases + * @global array $shortcode_tags + * @staticvar array $static_characters + * @staticvar array $static_replacements + * @staticvar array $dynamic_characters + * @staticvar array $dynamic_replacements + * @staticvar array $default_no_texturize_tags + * @staticvar array $default_no_texturize_shortcodes + * @staticvar bool $run_texturize + * + * @param string $text The text to be formatted + * @param bool $reset Set to true for unit testing. Translated patterns will reset. + * @return string The string replaced with html entities + */ +function wptexturize( $text, $reset = false ) { + global $wp_cockneyreplace, $shortcode_tags; + static $static_characters = null, + $static_replacements = null, + $dynamic_characters = null, + $dynamic_replacements = null, + $default_no_texturize_tags = null, + $default_no_texturize_shortcodes = null, + $run_texturize = true, + $apos = null, + $prime = null, + $double_prime = null, + $opening_quote = null, + $closing_quote = null, + $opening_single_quote = null, + $closing_single_quote = null, + $open_q_flag = '', + $open_sq_flag = '', + $apos_flag = ''; + + // If there's nothing to do, just stop. + if ( empty( $text ) || false === $run_texturize ) { + return $text; } - $static_characters = array_merge(array('---', ' -- ', '--', 'xn–', '...', '``', '\'s', '\'\'', ' (tm)'), $cockney); - $static_replacements = array_merge(array('—', ' — ', '–', 'xn--', '…', '“', '’s', '”', ' ™'), $cockneyreplace); + // Set up static variables. Run once only. + if ( $reset || ! isset( $static_characters ) ) { + /** + * Filters whether to skip running wptexturize(). + * + * Passing false to the filter will effectively short-circuit wptexturize(). + * returning the original text passed to the function instead. + * + * The filter runs only once, the first time wptexturize() is called. + * + * @since 4.0.0 + * + * @see wptexturize() + * + * @param bool $run_texturize Whether to short-circuit wptexturize(). + */ + $run_texturize = apply_filters( 'run_wptexturize', $run_texturize ); + if ( false === $run_texturize ) { + return $text; + } - $dynamic_characters = array('/\'(\d\d(?:’|\')?s)/', '/(\s|\A|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A)"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/(\d+)x(\d+)/'); - $dynamic_replacements = array('’$1','$1‘', '$1″', '$1′', '$1’$2', '$1“$2', '”$1', '’$1', '$1×$2'); + /* translators: opening curly double quote */ + $opening_quote = _x( '“', 'opening curly double quote' ); + /* translators: closing curly double quote */ + $closing_quote = _x( '”', 'closing curly double quote' ); - for ( $i = 0; $i < $stop; $i++ ) { - $curl = $textarr[$i]; - - if (isset($curl{0}) && '<' != $curl{0} && '[' != $curl{0} && $next && !$has_pre_parent) { // If it's not a tag - // static strings - $curl = str_replace($static_characters, $static_replacements, $curl); - // regular expressions - $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl); - } elseif (strpos($curl, '') !== false) { - $has_pre_parent = false; + /* translators: apostrophe, for example in 'cause or can't */ + $apos = _x( '’', 'apostrophe' ); + + /* translators: prime, for example in 9' (nine feet) */ + $prime = _x( '′', 'prime' ); + /* translators: double prime, for example in 9" (nine inches) */ + $double_prime = _x( '″', 'double prime' ); + + /* translators: opening curly single quote */ + $opening_single_quote = _x( '‘', 'opening curly single quote' ); + /* translators: closing curly single quote */ + $closing_single_quote = _x( '’', 'closing curly single quote' ); + + /* translators: en dash */ + $en_dash = _x( '–', 'en dash' ); + /* translators: em dash */ + $em_dash = _x( '—', 'em dash' ); + + $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); + $default_no_texturize_shortcodes = array('code'); + + // if a plugin has provided an autocorrect array, use it + if ( isset($wp_cockneyreplace) ) { + $cockney = array_keys( $wp_cockneyreplace ); + $cockneyreplace = array_values( $wp_cockneyreplace ); } else { - $next = true; + /* translators: This is a comma-separated list of words that defy the syntax of quotations in normal use, + * for example... 'We do not have enough words yet' ... is a typical quoted phrase. But when we write + * lines of code 'til we have enough of 'em, then we need to insert apostrophes instead of quotes. + */ + $cockney = explode( ',', _x( "'tain't,'twere,'twas,'tis,'twill,'til,'bout,'nuff,'round,'cause,'em", + 'Comma-separated list of words to texturize in your language' ) ); + + $cockneyreplace = explode( ',', _x( '’tain’t,’twere,’twas,’tis,’twill,’til,’bout,’nuff,’round,’cause,’em', + 'Comma-separated list of replacement words in your language' ) ); } - $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl); - $output .= $curl; - } + $static_characters = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney ); + $static_replacements = array_merge( array( '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); - return $output; -} -// Accepts matches array from preg_replace_callback in wpautop() -// or a string -function clean_pre($matches) { - if ( is_array($matches) ) - $text = $matches[1] . $matches[2] . ""; - else - $text = $matches; + // Pattern-based replacements of characters. + // Sort the remaining patterns into several arrays for performance tuning. + $dynamic_characters = array( 'apos' => array(), 'quote' => array(), 'dash' => array() ); + $dynamic_replacements = array( 'apos' => array(), 'quote' => array(), 'dash' => array() ); + $dynamic = array(); + $spaces = wp_spaces_regexp(); - $text = str_replace('
', '', $text); - $text = str_replace('

', "\n", $text); - $text = str_replace('

', '', $text); + // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. + if ( "'" !== $apos || "'" !== $closing_single_quote ) { + $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote; + } + if ( "'" !== $apos || '"' !== $closing_quote ) { + $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote; + } - return $text; -} + // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. + if ( "'" !== $apos ) { + $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos_flag; + } -function wpautop($pee, $br = 1) { - $pee = $pee . "\n"; // just to make things a little easier, pad the end - $pee = preg_replace('|
\s*
|', "\n\n", $pee); - // Space things out a little - $allblocks = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|map|area|blockquote|address|math|style|input|p|h[1-6]|hr)'; - $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee); - $pee = preg_replace('!()!', "$1\n\n", $pee); - $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines - if ( strpos($pee, ']*)>\s*|', "", $pee); // no pee inside object/embed - $pee = preg_replace('|\s*\s*|', '', $pee); - } - $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates - $pee = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "

$1

\n", $pee); // make paragraphs, including one at the end - $pee = preg_replace('|

\s*?

|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace - $pee = preg_replace('!

([^<]+)\s*?(]*>)!', "

$1

$2", $pee); - $pee = preg_replace( '|

|', "$1

", $pee ); - $pee = preg_replace('!

\s*(]*>)\s*

!', "$1", $pee); // don't pee all over a tag - $pee = preg_replace("|

(|", "$1", $pee); // problem with nested lists - $pee = preg_replace('|

]*)>|i', "

", $pee); - $pee = str_replace('

', '

', $pee); - $pee = preg_replace('!

\s*(]*>)!', "$1", $pee); - $pee = preg_replace('!(]*>)\s*

!', "$1", $pee); - if ($br) { - $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', create_function('$matches', 'return str_replace("\n", "", $matches[0]);'), $pee); - $pee = preg_replace('|(?)\s*\n|', "
\n", $pee); // optionally make line breaks - $pee = str_replace('', "\n", $pee); - } - $pee = preg_replace('!(]*>)\s*
!', "$1", $pee); - $pee = preg_replace('!
(\s*]*>)!', '$1', $pee); - if (strpos($pee, ')(.*?)!is', 'clean_pre', $pee ); - $pee = preg_replace( "|\n

$|", '

', $pee ); - $pee = preg_replace('/

\s*?(' . get_shortcode_regex() . ')\s*<\/p>/s', '$1', $pee); // don't auto-p wrap shortcodes that stand alone + // Quoted Numbers like '0.42' + if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { + $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote; + } - return $pee; -} + // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. + if ( "'" !== $opening_single_quote ) { + $dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $open_sq_flag; + } + + // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. + if ( "'" !== $apos ) { + $dynamic[ '/(?', '>', $text); - if ( 'double' === $quotes ) { - $text = str_replace('"', '"', $text); - } elseif ( 'single' === $quotes ) { - $text = str_replace("'", ''', $text); - } elseif ( $quotes ) { - $text = str_replace('"', '"', $text); - $text = str_replace("'", ''', $text); + // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces. + if ( '"' !== $opening_quote ) { + $dynamic[ '/(?<=\A|[([{\-]|<|' . $spaces . ')"(?!' . $spaces . ')/' ] = $open_q_flag; + } + + $dynamic_characters['quote'] = array_keys( $dynamic ); + $dynamic_replacements['quote'] = array_values( $dynamic ); + $dynamic = array(); + + // Dashes and spaces + $dynamic[ '/---/' ] = $em_dash; + $dynamic[ '/(?<=^|' . $spaces . ')--(?=$|' . $spaces . ')/' ] = $em_dash; + $dynamic[ '/(?&/\[\]\x00-\x20=]++)@', $text, $matches ); + $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); + $found_shortcodes = ! empty( $tagnames ); + $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : ''; + $regex = _get_wptexturize_split_regex( $shortcode_regex ); + + $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); + + foreach ( $textarr as &$curl ) { + // Only call _wptexturize_pushpop_element if $curl is a delimiter. + $first = $curl[0]; + if ( '<' === $first ) { + if ( ''; + $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|>|" . $spaces . ")/"; + $prime_pattern = "/(?<=\\d)$needle/"; + $flag_after_digit = "/(?<=\\d)$flag/"; + $flag_no_digit = "/(? &$sentence ) { + if ( false === strpos( $sentence, $needle ) ) { + continue; + } elseif ( 0 !== $key && 0 === substr_count( $sentence, $close_quote ) ) { + $sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count ); + if ( $count > 1 ) { + // This sentence appears to have multiple closing quotes. Attempt Vulcan logic. + $sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 ); + if ( 0 === $count2 ) { + // Try looking for a quote followed by a period. + $count2 = substr_count( $sentence, "$flag." ); + if ( $count2 > 0 ) { + // Assume the rightmost quote-period match is the end of quotation. + $pos = strrpos( $sentence, "$flag." ); + } else { + // When all else fails, make the rightmost candidate a closing quote. + // This is most likely to be problematic in the context of bug #18549. + $pos = strrpos( $sentence, $flag ); + } + $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) ); + } + // Use conventional replacement on any remaining primes and quotes. + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + $sentence = preg_replace( $flag_after_digit, $prime, $sentence ); + $sentence = str_replace( $flag, $close_quote, $sentence ); + } elseif ( 1 == $count ) { + // Found only one closing quote candidate, so give it priority over primes. + $sentence = str_replace( $flag, $close_quote, $sentence ); + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + } else { + // No closing quotes found. Just run primes pattern. + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + } + } else { + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + $sentence = preg_replace( $quote_pattern, $close_quote, $sentence ); + } + if ( '"' == $needle && false !== strpos( $sentence, '"' ) ) { + $sentence = str_replace( '"', $close_quote, $sentence ); + } + } - if (seems_utf8($string)) { - $chars = array( - // Decompositions for Latin-1 Supplement - chr(195).chr(128) => 'A', chr(195).chr(129) => 'A', - chr(195).chr(130) => 'A', chr(195).chr(131) => 'A', - chr(195).chr(132) => 'A', chr(195).chr(133) => 'A', - chr(195).chr(135) => 'C', chr(195).chr(136) => 'E', - chr(195).chr(137) => 'E', chr(195).chr(138) => 'E', - chr(195).chr(139) => 'E', chr(195).chr(140) => 'I', - chr(195).chr(141) => 'I', chr(195).chr(142) => 'I', - chr(195).chr(143) => 'I', chr(195).chr(145) => 'N', - chr(195).chr(146) => 'O', chr(195).chr(147) => 'O', - chr(195).chr(148) => 'O', chr(195).chr(149) => 'O', - chr(195).chr(150) => 'O', chr(195).chr(153) => 'U', - chr(195).chr(154) => 'U', chr(195).chr(155) => 'U', - chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y', - chr(195).chr(159) => 's', chr(195).chr(160) => 'a', - chr(195).chr(161) => 'a', chr(195).chr(162) => 'a', - chr(195).chr(163) => 'a', chr(195).chr(164) => 'a', - chr(195).chr(165) => 'a', chr(195).chr(167) => 'c', - chr(195).chr(168) => 'e', chr(195).chr(169) => 'e', - chr(195).chr(170) => 'e', chr(195).chr(171) => 'e', - chr(195).chr(172) => 'i', chr(195).chr(173) => 'i', - chr(195).chr(174) => 'i', chr(195).chr(175) => 'i', - chr(195).chr(177) => 'n', chr(195).chr(178) => 'o', - chr(195).chr(179) => 'o', chr(195).chr(180) => 'o', - chr(195).chr(181) => 'o', chr(195).chr(182) => 'o', - chr(195).chr(182) => 'o', chr(195).chr(185) => 'u', - chr(195).chr(186) => 'u', chr(195).chr(187) => 'u', - chr(195).chr(188) => 'u', chr(195).chr(189) => 'y', - chr(195).chr(191) => 'y', - // Decompositions for Latin Extended-A - chr(196).chr(128) => 'A', chr(196).chr(129) => 'a', - chr(196).chr(130) => 'A', chr(196).chr(131) => 'a', - chr(196).chr(132) => 'A', chr(196).chr(133) => 'a', - chr(196).chr(134) => 'C', chr(196).chr(135) => 'c', - chr(196).chr(136) => 'C', chr(196).chr(137) => 'c', - chr(196).chr(138) => 'C', chr(196).chr(139) => 'c', - chr(196).chr(140) => 'C', chr(196).chr(141) => 'c', - chr(196).chr(142) => 'D', chr(196).chr(143) => 'd', - chr(196).chr(144) => 'D', chr(196).chr(145) => 'd', - chr(196).chr(146) => 'E', chr(196).chr(147) => 'e', - chr(196).chr(148) => 'E', chr(196).chr(149) => 'e', - chr(196).chr(150) => 'E', chr(196).chr(151) => 'e', - chr(196).chr(152) => 'E', chr(196).chr(153) => 'e', - chr(196).chr(154) => 'E', chr(196).chr(155) => 'e', - chr(196).chr(156) => 'G', chr(196).chr(157) => 'g', - chr(196).chr(158) => 'G', chr(196).chr(159) => 'g', - chr(196).chr(160) => 'G', chr(196).chr(161) => 'g', - chr(196).chr(162) => 'G', chr(196).chr(163) => 'g', - chr(196).chr(164) => 'H', chr(196).chr(165) => 'h', - chr(196).chr(166) => 'H', chr(196).chr(167) => 'h', - chr(196).chr(168) => 'I', chr(196).chr(169) => 'i', - chr(196).chr(170) => 'I', chr(196).chr(171) => 'i', - chr(196).chr(172) => 'I', chr(196).chr(173) => 'i', - chr(196).chr(174) => 'I', chr(196).chr(175) => 'i', - chr(196).chr(176) => 'I', chr(196).chr(177) => 'i', - chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij', - chr(196).chr(180) => 'J', chr(196).chr(181) => 'j', - chr(196).chr(182) => 'K', chr(196).chr(183) => 'k', - chr(196).chr(184) => 'k', chr(196).chr(185) => 'L', - chr(196).chr(186) => 'l', chr(196).chr(187) => 'L', - chr(196).chr(188) => 'l', chr(196).chr(189) => 'L', - chr(196).chr(190) => 'l', chr(196).chr(191) => 'L', - chr(197).chr(128) => 'l', chr(197).chr(129) => 'L', - chr(197).chr(130) => 'l', chr(197).chr(131) => 'N', - chr(197).chr(132) => 'n', chr(197).chr(133) => 'N', - chr(197).chr(134) => 'n', chr(197).chr(135) => 'N', - chr(197).chr(136) => 'n', chr(197).chr(137) => 'N', - chr(197).chr(138) => 'n', chr(197).chr(139) => 'N', - chr(197).chr(140) => 'O', chr(197).chr(141) => 'o', - chr(197).chr(142) => 'O', chr(197).chr(143) => 'o', - chr(197).chr(144) => 'O', chr(197).chr(145) => 'o', - chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe', - chr(197).chr(148) => 'R',chr(197).chr(149) => 'r', - chr(197).chr(150) => 'R',chr(197).chr(151) => 'r', - chr(197).chr(152) => 'R',chr(197).chr(153) => 'r', - chr(197).chr(154) => 'S',chr(197).chr(155) => 's', - chr(197).chr(156) => 'S',chr(197).chr(157) => 's', - chr(197).chr(158) => 'S',chr(197).chr(159) => 's', - chr(197).chr(160) => 'S', chr(197).chr(161) => 's', - chr(197).chr(162) => 'T', chr(197).chr(163) => 't', - chr(197).chr(164) => 'T', chr(197).chr(165) => 't', - chr(197).chr(166) => 'T', chr(197).chr(167) => 't', - chr(197).chr(168) => 'U', chr(197).chr(169) => 'u', - chr(197).chr(170) => 'U', chr(197).chr(171) => 'u', - chr(197).chr(172) => 'U', chr(197).chr(173) => 'u', - chr(197).chr(174) => 'U', chr(197).chr(175) => 'u', - chr(197).chr(176) => 'U', chr(197).chr(177) => 'u', - chr(197).chr(178) => 'U', chr(197).chr(179) => 'u', - chr(197).chr(180) => 'W', chr(197).chr(181) => 'w', - chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y', - chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z', - chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z', - chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z', - chr(197).chr(190) => 'z', chr(197).chr(191) => 's', - // Euro Sign - chr(226).chr(130).chr(172) => 'E', - // GBP (Pound) Sign - chr(194).chr(163) => ''); + return implode( $open_quote, $sentences ); +} - $string = strtr($string, $chars); +/** + * Search for disabled element tags. Push element to stack on tag open and pop + * on tag close. + * + * Assumes first char of $text is tag opening and last char is tag closing. + * Assumes second char of $text is optionally '/' to indicate closing as in . + * + * @since 2.9.0 + * @access private + * + * @param string $text Text to check. Must be a tag like `` or `[shortcode]`. + * @param array $stack List of open tag elements. + * @param array $disabled_elements The tag names to match against. Spaces are not allowed in tag names. + */ +function _wptexturize_pushpop_element( $text, &$stack, $disabled_elements ) { + // Is it an opening tag or closing tag? + if ( isset( $text[1] ) && '/' !== $text[1] ) { + $opening_tag = true; + $name_offset = 1; + } elseif ( 0 == count( $stack ) ) { + // Stack is empty. Just stop. + return; } else { - // Assume ISO-8859-1 if not UTF-8 - $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158) - .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194) - .chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202) - .chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210) - .chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218) - .chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227) - .chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235) - .chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243) - .chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251) - .chr(252).chr(253).chr(255); - - $chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy"; - - $string = strtr($string, $chars['in'], $chars['out']); - $double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254)); - $double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'); - $string = str_replace($double_chars['in'], $double_chars['out'], $string); + $opening_tag = false; + $name_offset = 2; } - return $string; + // Parse out the tag name. + $space = strpos( $text, ' ' ); + if ( false === $space ) { + $space = -1; + } else { + $space -= $name_offset; + } + $tag = substr( $text, $name_offset, $space ); + + // Handle disabled tags. + if ( in_array( $tag, $disabled_elements ) ) { + if ( $opening_tag ) { + /* + * This disables texturize until we find a closing tag of our type + * (e.g.

) even if there was invalid nesting before that
+			 *
+			 * Example: in the case 
sadsadasd"baba"
+ * "baba" won't be texturize + */ + + array_push( $stack, $tag ); + } elseif ( end( $stack ) == $tag ) { + array_pop( $stack ); + } + } } -function sanitize_file_name( $name ) { // Like sanitize_title, but with periods - $name = strtolower( $name ); - $name = preg_replace('/&.+?;/', '', $name); // kill entities - $name = str_replace( '_', '-', $name ); - $name = preg_replace('/[^a-z0-9\s-.]/', '', $name); - $name = preg_replace('/\s+/', '-', $name); - $name = preg_replace('|-+|', '-', $name); - $name = trim($name, '-'); - return $name; -} +/** + * Replaces double line-breaks with paragraph elements. + * + * A group of regex replaces used to identify text formatted with newlines and + * replace double line-breaks with HTML paragraph tags. The remaining line-breaks + * after conversion become <
> tags, unless $br is set to '0' or 'false'. + * + * @since 0.71 + * + * @param string $pee The text which has to be formatted. + * @param bool $br Optional. If set, this will convert all remaining line-breaks + * after paragraphing. Default true. + * @return string Text which has been converted into correct paragraph tags. + */ +function wpautop( $pee, $br = true ) { + $pre_tags = array(); -function sanitize_user( $username, $strict = false ) { - $raw_username = $username; - $username = strip_tags($username); - // Kill octets - $username = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '', $username); - $username = preg_replace('/&.+?;/', '', $username); // Kill entities + if ( trim($pee) === '' ) + return ''; - // If strict, reduce to ASCII for max portability. - if ( $strict ) - $username = preg_replace('|[^a-z0-9 _.\-@]|i', '', $username); + // Just to make things a little easier, pad the end. + $pee = $pee . "\n"; + + /* + * Pre tags shouldn't be touched by autop. + * Replace pre tags with placeholders and bring them back after autop. + */ + if ( strpos($pee, '', $pee ); + $last_pee = array_pop($pee_parts); + $pee = ''; + $i = 0; + + foreach ( $pee_parts as $pee_part ) { + $start = strpos($pee_part, ''; - return apply_filters('sanitize_user', $username, $raw_username, $strict); -} + $pee .= substr( $pee_part, 0, $start ) . $name; + $i++; + } -function sanitize_title($title, $fallback_title = '') { - $title = strip_tags($title); - $title = apply_filters('sanitize_title', $title); + $pee .= $last_pee; + } + // Change multiple
s into two line breaks, which will turn into paragraphs. + $pee = preg_replace('|\s*|', "\n\n", $pee); - if ( '' === $title || false === $title ) - $title = $fallback_title; + $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'; - return $title; -} + // Add a single line break above block-level opening tags. + $pee = preg_replace('!(<' . $allblocks . '[\s/>])!', "\n$1", $pee); -function sanitize_title_with_dashes($title) { - $title = strip_tags($title); - // Preserve escaped octets. - $title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title); - // Remove percent signs that are not part of an octet. - $title = str_replace('%', '', $title); - // Restore octets. - $title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title); + // Add a double line break below block-level closing tags. + $pee = preg_replace('!()!', "$1\n\n", $pee); - $title = remove_accents($title); - if (seems_utf8($title)) { - if (function_exists('mb_strtolower')) { - $title = mb_strtolower($title, 'UTF-8'); - } - $title = utf8_uri_encode($title, 200); + // Standardize newline characters to "\n". + $pee = str_replace(array("\r\n", "\r"), "\n", $pee); + + // Find newlines in all elements and add placeholders. + $pee = wp_replace_in_html_tags( $pee, array( "\n" => " " ) ); + + // Collapse line breaks before and after ', $pee ); } - $title = strtolower($title); - $title = preg_replace('/&.+?;/', '', $title); // kill entities - $title = preg_replace('/[^%a-z0-9 _-]/', '', $title); - $title = preg_replace('/\s+/', '-', $title); - $title = preg_replace('|-+|', '-', $title); - $title = trim($title, '-'); + /* + * Collapse line breaks inside elements, before and elements + * so they don't get autop'd. + */ + if ( strpos( $pee, '' ) !== false ) { + $pee = preg_replace( '|(]*>)\s*|', '$1', $pee ); + $pee = preg_replace( '|\s*|', '', $pee ); + $pee = preg_replace( '%\s*(]*>)\s*%', '$1', $pee ); + } - return $title; -} + /* + * Collapse line breaks inside