X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/58f607a1de715c9bca69340a4d6fb9e1b9c2bed2..341dfbb66f24f5145174c373267f889c31615cc5:/wp-includes/formatting.php diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 58c826f9..3452ed2a 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -28,18 +28,18 @@ */ function wptexturize($text) { global $wp_cockneyreplace; - static $static_setup = false, $opening_quote, $closing_quote, $default_no_texturize_tags, $default_no_texturize_shortcodes, $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements; - $output = ''; - $curl = ''; - $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); - $stop = count($textarr); + static $opening_quote, $closing_quote, $en_dash, $em_dash, $default_no_texturize_tags, $default_no_texturize_shortcodes, $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements; - // No need to set up these variables more than once - if (!$static_setup) { + // No need to set up these static variables more than once + if ( empty( $opening_quote ) ) { /* translators: opening curly quote */ $opening_quote = _x('“', 'opening curly quote'); /* translators: closing curly quote */ $closing_quote = _x('”', 'closing curly quote'); + /* translators: en dash */ + $en_dash = _x('–', 'en dash'); + /* translators: em dash */ + $em_dash = _x('—', 'em dash'); $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); $default_no_texturize_shortcodes = array('code'); @@ -53,13 +53,11 @@ function wptexturize($text) { $cockneyreplace = array("’tain’t","’twere","’twas","’tis","’twill","’til","’bout","’nuff","’round","’cause"); } - $static_characters = array_merge(array('---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)'), $cockney); - $static_replacements = array_merge(array('—', ' — ', '–', ' – ', 'xn--', '…', $opening_quote, $closing_quote, ' ™'), $cockneyreplace); + $static_characters = array_merge( array('---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)'), $cockney ); + $static_replacements = array_merge( array($em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™'), $cockneyreplace ); - $dynamic_characters = array('/\'(\d\d(?:’|\')?s)/', '/\'(\d+)/', '/(\s|\A|[([{<]|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A|[([{<])"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/\b(\d+)x(\d+)\b/'); + $dynamic_characters = array('/\'(\d\d(?:’|\')?s)/', '/\'(\d)/', '/(\s|\A|[([{<]|")\'/', '/(\d)"/', '/(\d)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A|[([{<])"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/\b(\d+)x(\d+)\b/'); $dynamic_replacements = array('’$1','’$1', '$1‘', '$1″', '$1′', '$1’$2', '$1' . $opening_quote . '$2', $closing_quote . '$1', '’$1', '$1×$2'); - - $static_setup = true; } // Transform into regexp sub-expression used in _wptexturize_pushpop_element @@ -70,32 +68,27 @@ function wptexturize($text) { $no_texturize_tags_stack = array(); $no_texturize_shortcodes_stack = array(); - for ( $i = 0; $i < $stop; $i++ ) { - $curl = $textarr[$i]; + $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); - if ( !empty($curl) && '<' != $curl{0} && '[' != $curl{0} - && empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) { - // This is not a tag, nor is the texturization disabled - // static strings + foreach ( $textarr as &$curl ) { + if ( empty( $curl ) ) + continue; + + // Only call _wptexturize_pushpop_element if first char is correct tag opening + $first = $curl[0]; + if ( '<' === $first ) { + _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); + } elseif ( '[' === $first ) { + _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); + } elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) { + // This is not a tag, nor is the texturization disabled static strings $curl = str_replace($static_characters, $static_replacements, $curl); // regular expressions $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl); - } elseif (!empty($curl)) { - /* - * Only call _wptexturize_pushpop_element if first char is correct - * tag opening - */ - if ('<' == $curl{0}) - _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); - elseif ('[' == $curl{0}) - _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); } - $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl); - $output .= $curl; } - - return $output; + return implode( '', $textarr ); } /** @@ -208,7 +201,7 @@ function wpautop($pee, $br = 1) { $pee = preg_replace('!
\s*(?' . $allblocks . '[^>]*>)!', "$1", $pee); $pee = preg_replace('!(?' . $allblocks . '[^>]*>)\s*
!', "$1", $pee); if ($br) { - $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', create_function('$matches', 'return str_replace("\n", "\\s*?(\\[(' . $tagregexp . ')\\b.*?\\/?\\](?:.+?\\[\\/\\2\\])?)\\s*<\\/p>/s', '$1', $pee); - } - - return $pee; + if ( empty( $shortcode_tags ) || !is_array( $shortcode_tags ) ) { + return $pee; + } + + $tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) ); + + $pattern = + '/' + . '
' // Opening paragraph
+ . '\\s*+' // Optional leading whitespace
+ . '(' // 1: The shortcode
+ . '\\[' // Opening bracket
+ . "($tagregexp)" // 2: Shortcode name
+ . '\\b' // Word boundary
+ // Unroll the loop: Inside the opening shortcode tag
+ . '[^\\]\\/]*' // Not a closing bracket or forward slash
+ . '(?:'
+ . '\\/(?!\\])' // A forward slash not followed by a closing bracket
+ . '[^\\]\\/]*' // Not a closing bracket or forward slash
+ . ')*?'
+ . '(?:'
+ . '\\/\\]' // Self closing tag and closing bracket
+ . '|'
+ . '\\]' // Closing bracket
+ . '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags
+ . '[^\\[]*+' // Not an opening bracket
+ . '(?:'
+ . '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag
+ . '[^\\[]*+' // Not an opening bracket
+ . ')*+'
+ . '\\[\\/\\2\\]' // Closing shortcode tag
+ . ')?'
+ . ')'
+ . ')'
+ . '\\s*+' // optional trailing whitespace
+ . '<\\/p>' // closing paragraph
+ . '/s';
+
+ return preg_replace( $pattern, '$1', $pee );
}
/**
@@ -293,34 +330,31 @@ function seems_utf8($str) {
function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) {
$string = (string) $string;
- if ( 0 === strlen( $string ) ) {
+ if ( 0 === strlen( $string ) )
return '';
- }
// Don't bother if there are no specialchars - saves some processing
- if ( !preg_match( '/[&<>"\']/', $string ) ) {
+ if ( ! preg_match( '/[&<>"\']/', $string ) )
return $string;
- }
// Account for the previous behaviour of the function when the $quote_style is not an accepted value
- if ( empty( $quote_style ) ) {
+ if ( empty( $quote_style ) )
$quote_style = ENT_NOQUOTES;
- } elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
+ elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) )
$quote_style = ENT_QUOTES;
- }
// Store the site charset as a static to avoid multiple calls to wp_load_alloptions()
- if ( !$charset ) {
+ if ( ! $charset ) {
static $_charset;
- if ( !isset( $_charset ) ) {
+ if ( ! isset( $_charset ) ) {
$alloptions = wp_load_alloptions();
$_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : '';
}
$charset = $_charset;
}
- if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) {
+
+ if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) )
$charset = 'UTF-8';
- }
$_quote_style = $quote_style;
@@ -332,28 +366,27 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals
}
// Handle double encoding ourselves
- if ( !$double_encode ) {
+ if ( $double_encode ) {
+ $string = @htmlspecialchars( $string, $quote_style, $charset );
+ } else {
+ // Decode & into &
$string = wp_specialchars_decode( $string, $_quote_style );
- /* Critical */
- // The previous line decodes &phrase; into &phrase; We must guarantee that &phrase; is valid before proceeding.
- $string = wp_kses_normalize_entities($string);
+ // Guarantee every &entity; is valid or re-encode the &
+ $string = wp_kses_normalize_entities( $string );
- // Now proceed with custom double-encoding silliness
- $string = preg_replace( '/&(#?x?[0-9a-z]+);/i', '|wp_entity|$1|/wp_entity|', $string );
- }
+ // Now re-encode everything except &entity;
+ $string = preg_split( '/(?x?[0-9a-z]+;)/i', $string, -1, PREG_SPLIT_DELIM_CAPTURE );
- $string = @htmlspecialchars( $string, $quote_style, $charset );
+ for ( $i = 0; $i < count( $string ); $i += 2 )
+ $string[$i] = @htmlspecialchars( $string[$i], $quote_style, $charset );
- // Handle double encoding ourselves
- if ( !$double_encode ) {
- $string = str_replace( array( '|wp_entity|', '|/wp_entity|' ), array( '&', ';' ), $string );
+ $string = implode( '', $string );
}
// Backwards compatibility
- if ( 'single' === $_quote_style ) {
+ if ( 'single' === $_quote_style )
$string = str_replace( "'", ''', $string );
- }
return $string;
}
@@ -535,34 +568,38 @@ function remove_accents($string) {
if (seems_utf8($string)) {
$chars = array(
// Decompositions for Latin-1 Supplement
+ chr(194).chr(170) => 'a', chr(194).chr(186) => 'o',
chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
- chr(195).chr(135) => 'C', chr(195).chr(136) => 'E',
- chr(195).chr(137) => 'E', chr(195).chr(138) => 'E',
- chr(195).chr(139) => 'E', chr(195).chr(140) => 'I',
- chr(195).chr(141) => 'I', chr(195).chr(142) => 'I',
- chr(195).chr(143) => 'I', chr(195).chr(145) => 'N',
+ chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C',
+ chr(195).chr(136) => 'E', chr(195).chr(137) => 'E',
+ chr(195).chr(138) => 'E', chr(195).chr(139) => 'E',
+ chr(195).chr(140) => 'I', chr(195).chr(141) => 'I',
+ chr(195).chr(142) => 'I', chr(195).chr(143) => 'I',
+ chr(195).chr(144) => 'D', chr(195).chr(145) => 'N',
chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
- chr(195).chr(159) => 's', chr(195).chr(160) => 'a',
- chr(195).chr(161) => 'a', chr(195).chr(162) => 'a',
- chr(195).chr(163) => 'a', chr(195).chr(164) => 'a',
- chr(195).chr(165) => 'a', chr(195).chr(167) => 'c',
+ chr(195).chr(158) => 'TH',chr(195).chr(159) => 's',
+ chr(195).chr(160) => 'a', chr(195).chr(161) => 'a',
+ chr(195).chr(162) => 'a', chr(195).chr(163) => 'a',
+ chr(195).chr(164) => 'a', chr(195).chr(165) => 'a',
+ chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c',
chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
- chr(195).chr(177) => 'n', chr(195).chr(178) => 'o',
- chr(195).chr(179) => 'o', chr(195).chr(180) => 'o',
- chr(195).chr(181) => 'o', chr(195).chr(182) => 'o',
- chr(195).chr(182) => 'o', chr(195).chr(185) => 'u',
- chr(195).chr(186) => 'u', chr(195).chr(187) => 'u',
- chr(195).chr(188) => 'u', chr(195).chr(189) => 'y',
- chr(195).chr(191) => 'y',
+ chr(195).chr(176) => 'd', chr(195).chr(177) => 'n',
+ chr(195).chr(178) => 'o', chr(195).chr(179) => 'o',
+ chr(195).chr(180) => 'o', chr(195).chr(181) => 'o',
+ chr(195).chr(182) => 'o', chr(195).chr(184) => 'o',
+ chr(195).chr(185) => 'u', chr(195).chr(186) => 'u',
+ chr(195).chr(187) => 'u', chr(195).chr(188) => 'u',
+ chr(195).chr(189) => 'y', chr(195).chr(190) => 'th',
+ chr(195).chr(191) => 'y', chr(195).chr(152) => 'O',
// Decompositions for Latin Extended-A
chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
@@ -628,6 +665,9 @@ function remove_accents($string) {
chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
+ // Decompositions for Latin Extended-B
+ chr(200).chr(152) => 'S', chr(200).chr(153) => 's',
+ chr(200).chr(154) => 'T', chr(200).chr(155) => 't',
// Euro Sign
chr(226).chr(130).chr(172) => 'E',
// GBP (Pound) Sign
@@ -700,7 +740,7 @@ function sanitize_file_name( $filename ) {
if ( preg_match("/^[a-zA-Z]{2,5}\d?$/", $part) ) {
$allowed = false;
foreach ( $mimes as $ext_preg => $mime_match ) {
- $ext_preg = '!(^' . $ext_preg . ')$!i';
+ $ext_preg = '!^(' . $ext_preg . ')$!i';
if ( preg_match( $ext_preg, $part ) ) {
$allowed = true;
break;
@@ -718,12 +758,10 @@ function sanitize_file_name( $filename ) {
/**
* Sanitize username stripping out unsafe characters.
*
- * If $strict is true, only alphanumeric characters (as well as _, space, ., -,
- * @) are returned.
- * Removes tags, octets, entities, and if strict is enabled, will remove all
- * non-ASCII characters. After sanitizing, it passes the username, raw username
- * (the username in the parameter), and the strict parameter as parameters for
- * the filter.
+ * Removes tags, octets, entities, and if strict is enabled, will only keep
+ * alphanumeric, _, space, ., -, @. After sanitizing, it passes the username,
+ * raw username (the username in the parameter), and the value of $strict as
+ * parameters for the 'sanitize_user' filter.
*
* @since 2.0.0
* @uses apply_filters() Calls 'sanitize_user' hook on username, raw username,
@@ -745,6 +783,7 @@ function sanitize_user( $username, $strict = false ) {
if ( $strict )
$username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username );
+ $username = trim( $username );
// Consolidate contiguous whitespace
$username = preg_replace( '|\s+|', ' ', $username );
@@ -754,7 +793,7 @@ function sanitize_user( $username, $strict = false ) {
/**
* Sanitize a string key.
*
- * Keys are used as internal identifiers. They should be lowercase ASCII. Dashes and underscores are allowed.
+ * Keys are used as internal identifiers. Lowercase alphanumeric characters, dashes and underscores are allowed.
*
* @since 3.0.0
*
@@ -763,17 +802,9 @@ function sanitize_user( $username, $strict = false ) {
*/
function sanitize_key( $key ) {
$raw_key = $key;
- $key = wp_strip_all_tags($key);
- // Kill octets
- $key = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '', $key);
- $key = preg_replace('/&.+?;/', '', $key); // Kill entities
-
- $key = preg_replace('|[^a-z0-9 _.\-@]|i', '', $key);
-
- // Consolidate contiguous whitespace
- $key = preg_replace('|\s+|', ' ', $key);
-
- return apply_filters('sanitize_key', $key, $raw_key);
+ $key = strtolower( $key );
+ $key = preg_replace( '/[^a-z0-9_\-]/', '', $key );
+ return apply_filters( 'sanitize_key', $key, $raw_key );
}
/**
@@ -787,12 +818,16 @@ function sanitize_key( $key ) {
*
* @param string $title The string to be sanitized.
* @param string $fallback_title Optional. A title to use if $title is empty.
+ * @param string $context Optional. The operation for which the string is sanitized
* @return string The sanitized string.
*/
-function sanitize_title($title, $fallback_title = '') {
+function sanitize_title($title, $fallback_title = '', $context = 'save') {
$raw_title = $title;
- $title = strip_tags($title);
- $title = apply_filters('sanitize_title', $title, $raw_title);
+
+ if ( 'save' == $context )
+ $title = remove_accents($title);
+
+ $title = apply_filters('sanitize_title', $title, $raw_title, $context);
if ( '' === $title || false === $title )
$title = $fallback_title;
@@ -800,8 +835,12 @@ function sanitize_title($title, $fallback_title = '') {
return $title;
}
+function sanitize_title_for_query($title) {
+ return sanitize_title($title, '', 'query');
+}
+
/**
- * Sanitizes title, replacing whitespace with dashes.
+ * Sanitizes title, replacing whitespace and a few other characters with dashes.
*
* Limits the output to alphanumeric characters, underscore (_) and dash (-).
* Whitespace becomes a dash.
@@ -809,9 +848,11 @@ function sanitize_title($title, $fallback_title = '') {
* @since 1.2.0
*
* @param string $title The title to be sanitized.
+ * @param string $raw_title Optional. Not used.
+ * @param string $context Optional. The operation for which the string is sanitized.
* @return string The sanitized title.
*/
-function sanitize_title_with_dashes($title) {
+function sanitize_title_with_dashes($title, $raw_title = '', $context = 'display') {
$title = strip_tags($title);
// Preserve escaped octets.
$title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
@@ -820,7 +861,6 @@ function sanitize_title_with_dashes($title) {
// Restore octets.
$title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
- $title = remove_accents($title);
if (seems_utf8($title)) {
if (function_exists('mb_strtolower')) {
$title = mb_strtolower($title, 'UTF-8');
@@ -831,6 +871,20 @@ function sanitize_title_with_dashes($title) {
$title = strtolower($title);
$title = preg_replace('/&.+?;/', '', $title); // kill entities
$title = str_replace('.', '-', $title);
+
+ if ( 'save' == $context ) {
+ // nbsp, ndash and mdash
+ $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title );
+ // iexcl and iquest
+ $title = str_replace( array( '%c2%a1', '%c2%bf' ), '', $title );
+ // angle quotes
+ $title = str_replace( array( '%c2%ab', '%c2%bb', '%e2%80%b9', '%e2%80%ba' ), '', $title );
+ // curly quotes
+ $title = str_replace( array( '%e2%80%98', '%e2%80%99', '%e2%80%9c', '%e2%80%9d' ), '', $title );
+ // copy, reg, deg, hellip and trade
+ $title = str_replace( array( '%c2%a9', '%c2%ae', '%c2%b0', '%e2%80%a6', '%e2%84%a2' ), '', $title );
+ }
+
$title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
$title = preg_replace('/\s+/', '-', $title);
$title = preg_replace('|-+|', '-', $title);
@@ -860,7 +914,7 @@ function sanitize_sql_orderby( $orderby ){
/**
* Santizes a html classname to ensure it only contains valid characters
*
- * Strips the string down to A-Z,a-z,0-9,'-' if this results in an empty
+ * Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty
* string then it will return the alternative value supplied.
*
* @todo Expand to support the full range of CDATA that a class attribute can contain.
@@ -874,10 +928,10 @@ function sanitize_sql_orderby( $orderby ){
*/
function sanitize_html_class( $class, $fallback = '' ) {
//Strip out any % encoded octets
- $sanitized = preg_replace('|%[a-fA-F0-9][a-fA-F0-9]|', '', $class);
+ $sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $class );
- //Limit to A-Z,a-z,0-9,'-'
- $sanitized = preg_replace('/[^A-Za-z0-9-]/', '', $sanitized);
+ //Limit to A-Z,a-z,0-9,_,-
+ $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized );
if ( '' == $sanitized )
$sanitized = $fallback;
@@ -980,7 +1034,7 @@ function balanceTags( $text, $force = false ) {
* @since 2.0.4
*
* @author Leonard Lin