X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/dc1231b7312fbdca99e9e887cc2bb35a28f85cdc..HEAD:/wp-includes/formatting.php diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 640e136f..85d4e7e4 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -20,7 +20,7 @@ * * Code within certain html blocks are skipped. * - * Do not use this function before the 'init' action hook; everything will break. + * Do not use this function before the {@see 'init'} action hook; everything will break. * * @since 0.71 * @@ -66,7 +66,7 @@ function wptexturize( $text, $reset = false ) { // Set up static variables. Run once only. if ( $reset || ! isset( $static_characters ) ) { /** - * Filter whether to skip running wptexturize(). + * Filters whether to skip running wptexturize(). * * Passing false to the filter will effectively short-circuit wptexturize(). * returning the original text passed to the function instead. @@ -195,7 +195,7 @@ function wptexturize( $text, $reset = false ) { // Must do this every time in case plugins use these filters in a context sensitive manner /** - * Filter the list of HTML elements not to texturize. + * Filters the list of HTML elements not to texturize. * * @since 2.8.0 * @@ -203,7 +203,7 @@ function wptexturize( $text, $reset = false ) { */ $no_texturize_tags = apply_filters( 'no_texturize_tags', $default_no_texturize_tags ); /** - * Filter the list of shortcodes not to texturize. + * Filters the list of shortcodes not to texturize. * * @since 2.8.0 * @@ -216,63 +216,35 @@ function wptexturize( $text, $reset = false ) { // Look for shortcodes and HTML elements. - $tagnames = array_keys( $shortcode_tags ); - $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); - $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). - - $comment_regex = - '!' // Start of comment, after the <. - . '(?:' // Unroll the loop: Consume everything until --> is found. - . '-(?!->)' // Dash not followed by end of comment. - . '[^\-]*+' // Consume non-dashes. - . ')*+' // Loop possessively. - . '(?:-->)?'; // End of comment. If not found, match all input. - - $shortcode_regex = - '\[' // Find start of shortcode. - . '[\/\[]?' // Shortcodes may begin with [/ or [[ - . $tagregexp // Only match registered shortcodes, because performance. - . '(?:' - . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. - . '|' - . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. - . ')*+' // Possessive critical. - . '\]' // Find end of shortcode. - . '\]?'; // Shortcodes may end with ]] - - $regex = - '/(' // Capture the entire match. - . '<' // Find start of element. - . '(?(?=!--)' // Is this a comment? - . $comment_regex // Find end of comment. - . '|' - . '[^>]*>' // Find end of element. - . ')' - . '|' - . $shortcode_regex // Find shortcodes. - . ')/s'; + preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20=]++)@', $text, $matches ); + $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); + $found_shortcodes = ! empty( $tagnames ); + $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : ''; + $regex = _get_wptexturize_split_regex( $shortcode_regex ); $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); foreach ( $textarr as &$curl ) { // Only call _wptexturize_pushpop_element if $curl is a delimiter. $first = $curl[0]; - if ( '<' === $first && ' is found. + . '-(?!->)' // Dash not followed by end of comment. + . '[^\-]*+' // Consume non-dashes. + . ')*+' // Loop possessively. + . '(?:-->)?'; // End of comment. If not found, match all input. + + $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap. + '<' // Find start of element. + . '(?(?=!--)' // Is this a comment? + . $comment_regex // Find end of comment. + . '|' + . '[^>]*>?' // Find end of element. If not found, match all input. + . ')'; } - return preg_split( $regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE ); + if ( empty( $shortcode_regex ) ) { + $regex = '/(' . $html_regex . ')/'; + } else { + $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/'; + } + + return $regex; +} + +/** + * Retrieve the regular expression for shortcodes. + * + * @access private + * @ignore + * @internal This function will be removed in 4.5.0 per Shortcode API Roadmap. + * @since 4.4.0 + * + * @param array $tagnames List of shortcodes to find. + * @return string The regular expression + */ +function _get_wptexturize_shortcode_regex( $tagnames ) { + $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); + $tagregexp = "(?:$tagregexp)(?=[\\s\\]\\/])"; // Excerpt of get_shortcode_regex(). + $regex = + '\[' // Find start of shortcode. + . '[\/\[]?' // Shortcodes may begin with [/ or [[ + . $tagregexp // Only match registered shortcodes, because performance. + . '(?:' + . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. + . '|' + . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. + . ')*+' // Possessive critical. + . '\]' // Find end of shortcode. + . '\]?'; // Shortcodes may end with ]] + + return $regex; } /** @@ -669,7 +731,7 @@ function wp_replace_in_html_tags( $haystack, $replace_pairs ) { // Extract $needle and $replace. foreach ( $replace_pairs as $needle => $replace ); - // Loop through delimeters (elements) only. + // Loop through delimiters (elements) only. for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { if ( false !== strpos( $textarr[$i], $needle ) ) { $textarr[$i] = str_replace( $needle, $replace, $textarr[$i] ); @@ -680,7 +742,7 @@ function wp_replace_in_html_tags( $haystack, $replace_pairs ) { // Extract all $needles. $needles = array_keys( $replace_pairs ); - // Loop through delimeters (elements) only. + // Loop through delimiters (elements) only. for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { foreach ( $needles as $needle ) { if ( false !== strpos( $textarr[$i], $needle ) ) { @@ -765,7 +827,7 @@ function shortcode_unautop( $pee ) { . ')' . '(?:' . $spaces . ')*+' // optional trailing whitespace . '<\\/p>' // closing paragraph - . '/s'; + . '/'; return preg_replace( $pattern, '$1', $pee ); } @@ -816,10 +878,14 @@ function seems_utf8( $str ) { * * @staticvar string $_charset * - * @param string $string The text which is to be encoded. - * @param int $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES. - * @param string $charset Optional. The character encoding of the string. Default is false. - * @param bool $double_encode Optional. Whether to encode existing html entities. Default is false. + * @param string $string The text which is to be encoded. + * @param int|string $quote_style Optional. Converts double quotes if set to ENT_COMPAT, + * both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. + * Also compatible with old values; converting single quotes if set to 'single', + * double if set to 'double' or both if otherwise set. + * Default is ENT_NOQUOTES. + * @param string $charset Optional. The character encoding of the string. Default is false. + * @param bool $double_encode Optional. Whether to encode existing html entities. Default is false. * @return string The encoded text with HTML entities. */ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { @@ -868,7 +934,7 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals $string = @htmlspecialchars( $string, $quote_style, $charset, $double_encode ); - // Backwards compatibility + // Back-compat. if ( 'single' === $_quote_style ) $string = str_replace( "'", ''', $string ); @@ -1058,7 +1124,382 @@ function utf8_uri_encode( $utf8_string, $length = 0 ) { * * If there are no accent characters, then the string given is just returned. * + * **Accent characters converted:** + * + * Currency signs: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | ------------------- | + * | U+00A3 | £ | (empty) | British Pound sign | + * | U+20AC | € | E | Euro sign | + * + * Decompositions for Latin-1 Supplement: + * + * | Code | Glyph | Replacement | Description | + * | ------- | ----- | ----------- | -------------------------------------- | + * | U+00AA | ª | a | Feminine ordinal indicator | + * | U+00BA | º | o | Masculine ordinal indicator | + * | U+00C0 | À | A | Latin capital letter A with grave | + * | U+00C1 | Á | A | Latin capital letter A with acute | + * | U+00C2 |  | A | Latin capital letter A with circumflex | + * | U+00C3 | à | A | Latin capital letter A with tilde | + * | U+00C4 | Ä | A | Latin capital letter A with diaeresis | + * | U+00C5 | Å | A | Latin capital letter A with ring above | + * | U+00C6 | Æ | AE | Latin capital letter AE | + * | U+00C7 | Ç | C | Latin capital letter C with cedilla | + * | U+00C8 | È | E | Latin capital letter E with grave | + * | U+00C9 | É | E | Latin capital letter E with acute | + * | U+00CA | Ê | E | Latin capital letter E with circumflex | + * | U+00CB | Ë | E | Latin capital letter E with diaeresis | + * | U+00CC | Ì | I | Latin capital letter I with grave | + * | U+00CD | Í | I | Latin capital letter I with acute | + * | U+00CE | Î | I | Latin capital letter I with circumflex | + * | U+00CF | Ï | I | Latin capital letter I with diaeresis | + * | U+00D0 | Ð | D | Latin capital letter Eth | + * | U+00D1 | Ñ | N | Latin capital letter N with tilde | + * | U+00D2 | Ò | O | Latin capital letter O with grave | + * | U+00D3 | Ó | O | Latin capital letter O with acute | + * | U+00D4 | Ô | O | Latin capital letter O with circumflex | + * | U+00D5 | Õ | O | Latin capital letter O with tilde | + * | U+00D6 | Ö | O | Latin capital letter O with diaeresis | + * | U+00D8 | Ø | O | Latin capital letter O with stroke | + * | U+00D9 | Ù | U | Latin capital letter U with grave | + * | U+00DA | Ú | U | Latin capital letter U with acute | + * | U+00DB | Û | U | Latin capital letter U with circumflex | + * | U+00DC | Ü | U | Latin capital letter U with diaeresis | + * | U+00DD | Ý | Y | Latin capital letter Y with acute | + * | U+00DE | Þ | TH | Latin capital letter Thorn | + * | U+00DF | ß | s | Latin small letter sharp s | + * | U+00E0 | à | a | Latin small letter a with grave | + * | U+00E1 | á | a | Latin small letter a with acute | + * | U+00E2 | â | a | Latin small letter a with circumflex | + * | U+00E3 | ã | a | Latin small letter a with tilde | + * | U+00E4 | ä | a | Latin small letter a with diaeresis | + * | U+00E5 | Ã¥ | a | Latin small letter a with ring above | + * | U+00E6 | æ | ae | Latin small letter ae | + * | U+00E7 | ç | c | Latin small letter c with cedilla | + * | U+00E8 | è | e | Latin small letter e with grave | + * | U+00E9 | é | e | Latin small letter e with acute | + * | U+00EA | ê | e | Latin small letter e with circumflex | + * | U+00EB | ë | e | Latin small letter e with diaeresis | + * | U+00EC | ì | i | Latin small letter i with grave | + * | U+00ED | í | i | Latin small letter i with acute | + * | U+00EE | î | i | Latin small letter i with circumflex | + * | U+00EF | ï | i | Latin small letter i with diaeresis | + * | U+00F0 | ð | d | Latin small letter Eth | + * | U+00F1 | ñ | n | Latin small letter n with tilde | + * | U+00F2 | ò | o | Latin small letter o with grave | + * | U+00F3 | ó | o | Latin small letter o with acute | + * | U+00F4 | ô | o | Latin small letter o with circumflex | + * | U+00F5 | õ | o | Latin small letter o with tilde | + * | U+00F6 | ö | o | Latin small letter o with diaeresis | + * | U+00F8 | ø | o | Latin small letter o with stroke | + * | U+00F9 | ù | u | Latin small letter u with grave | + * | U+00FA | ú | u | Latin small letter u with acute | + * | U+00FB | û | u | Latin small letter u with circumflex | + * | U+00FC | ü | u | Latin small letter u with diaeresis | + * | U+00FD | ý | y | Latin small letter y with acute | + * | U+00FE | þ | th | Latin small letter Thorn | + * | U+00FF | ÿ | y | Latin small letter y with diaeresis | + * + * Decompositions for Latin Extended-A: + * + * | Code | Glyph | Replacement | Description | + * | ------- | ----- | ----------- | ------------------------------------------------- | + * | U+0100 | Ā | A | Latin capital letter A with macron | + * | U+0101 | ā | a | Latin small letter a with macron | + * | U+0102 | Ă | A | Latin capital letter A with breve | + * | U+0103 | ă | a | Latin small letter a with breve | + * | U+0104 | Ą | A | Latin capital letter A with ogonek | + * | U+0105 | ą | a | Latin small letter a with ogonek | + * | U+01006 | Ć | C | Latin capital letter C with acute | + * | U+0107 | ć | c | Latin small letter c with acute | + * | U+0108 | Ĉ | C | Latin capital letter C with circumflex | + * | U+0109 | ĉ | c | Latin small letter c with circumflex | + * | U+010A | Ċ | C | Latin capital letter C with dot above | + * | U+010B | ċ | c | Latin small letter c with dot above | + * | U+010C | Č | C | Latin capital letter C with caron | + * | U+010D | č | c | Latin small letter c with caron | + * | U+010E | Ď | D | Latin capital letter D with caron | + * | U+010F | ď | d | Latin small letter d with caron | + * | U+0110 | Đ | D | Latin capital letter D with stroke | + * | U+0111 | đ | d | Latin small letter d with stroke | + * | U+0112 | Ē | E | Latin capital letter E with macron | + * | U+0113 | ē | e | Latin small letter e with macron | + * | U+0114 | Ĕ | E | Latin capital letter E with breve | + * | U+0115 | ĕ | e | Latin small letter e with breve | + * | U+0116 | Ė | E | Latin capital letter E with dot above | + * | U+0117 | ė | e | Latin small letter e with dot above | + * | U+0118 | Ę | E | Latin capital letter E with ogonek | + * | U+0119 | ę | e | Latin small letter e with ogonek | + * | U+011A | Ě | E | Latin capital letter E with caron | + * | U+011B | ě | e | Latin small letter e with caron | + * | U+011C | Ĝ | G | Latin capital letter G with circumflex | + * | U+011D | ĝ | g | Latin small letter g with circumflex | + * | U+011E | Ğ | G | Latin capital letter G with breve | + * | U+011F | ğ | g | Latin small letter g with breve | + * | U+0120 | Ä  | G | Latin capital letter G with dot above | + * | U+0121 | Ä¡ | g | Latin small letter g with dot above | + * | U+0122 | Ä¢ | G | Latin capital letter G with cedilla | + * | U+0123 | Ä£ | g | Latin small letter g with cedilla | + * | U+0124 | Ĥ | H | Latin capital letter H with circumflex | + * | U+0125 | Ä¥ | h | Latin small letter h with circumflex | + * | U+0126 | Ħ | H | Latin capital letter H with stroke | + * | U+0127 | ħ | h | Latin small letter h with stroke | + * | U+0128 | Ĩ | I | Latin capital letter I with tilde | + * | U+0129 | Ä© | i | Latin small letter i with tilde | + * | U+012A | Ī | I | Latin capital letter I with macron | + * | U+012B | Ä« | i | Latin small letter i with macron | + * | U+012C | Ĭ | I | Latin capital letter I with breve | + * | U+012D | Ä­ | i | Latin small letter i with breve | + * | U+012E | Ä® | I | Latin capital letter I with ogonek | + * | U+012F | į | i | Latin small letter i with ogonek | + * | U+0130 | Ä° | I | Latin capital letter I with dot above | + * | U+0131 | ı | i | Latin small letter dotless i | + * | U+0132 | IJ | IJ | Latin capital ligature IJ | + * | U+0133 | ij | ij | Latin small ligature ij | + * | U+0134 | Ä´ | J | Latin capital letter J with circumflex | + * | U+0135 | ĵ | j | Latin small letter j with circumflex | + * | U+0136 | Ķ | K | Latin capital letter K with cedilla | + * | U+0137 | Ä· | k | Latin small letter k with cedilla | + * | U+0138 | ĸ | k | Latin small letter Kra | + * | U+0139 | Ĺ | L | Latin capital letter L with acute | + * | U+013A | ĺ | l | Latin small letter l with acute | + * | U+013B | Ä» | L | Latin capital letter L with cedilla | + * | U+013C | ļ | l | Latin small letter l with cedilla | + * | U+013D | Ľ | L | Latin capital letter L with caron | + * | U+013E | ľ | l | Latin small letter l with caron | + * | U+013F | Ä¿ | L | Latin capital letter L with middle dot | + * | U+0140 | ŀ | l | Latin small letter l with middle dot | + * | U+0141 | Ł | L | Latin capital letter L with stroke | + * | U+0142 | ł | l | Latin small letter l with stroke | + * | U+0143 | Ń | N | Latin capital letter N with acute | + * | U+0144 | ń | n | Latin small letter N with acute | + * | U+0145 | Ņ | N | Latin capital letter N with cedilla | + * | U+0146 | ņ | n | Latin small letter n with cedilla | + * | U+0147 | Ň | N | Latin capital letter N with caron | + * | U+0148 | ň | n | Latin small letter n with caron | + * | U+0149 | ʼn | n | Latin small letter n preceded by apostrophe | + * | U+014A | Ŋ | N | Latin capital letter Eng | + * | U+014B | ŋ | n | Latin small letter Eng | + * | U+014C | Ō | O | Latin capital letter O with macron | + * | U+014D | ō | o | Latin small letter o with macron | + * | U+014E | Ŏ | O | Latin capital letter O with breve | + * | U+014F | ŏ | o | Latin small letter o with breve | + * | U+0150 | Ő | O | Latin capital letter O with double acute | + * | U+0151 | ő | o | Latin small letter o with double acute | + * | U+0152 | Œ | OE | Latin capital ligature OE | + * | U+0153 | œ | oe | Latin small ligature oe | + * | U+0154 | Ŕ | R | Latin capital letter R with acute | + * | U+0155 | ŕ | r | Latin small letter r with acute | + * | U+0156 | Ŗ | R | Latin capital letter R with cedilla | + * | U+0157 | ŗ | r | Latin small letter r with cedilla | + * | U+0158 | Ř | R | Latin capital letter R with caron | + * | U+0159 | ř | r | Latin small letter r with caron | + * | U+015A | Ś | S | Latin capital letter S with acute | + * | U+015B | ś | s | Latin small letter s with acute | + * | U+015C | Ŝ | S | Latin capital letter S with circumflex | + * | U+015D | ŝ | s | Latin small letter s with circumflex | + * | U+015E | Ş | S | Latin capital letter S with cedilla | + * | U+015F | ş | s | Latin small letter s with cedilla | + * | U+0160 | Å  | S | Latin capital letter S with caron | + * | U+0161 | Å¡ | s | Latin small letter s with caron | + * | U+0162 | Å¢ | T | Latin capital letter T with cedilla | + * | U+0163 | Å£ | t | Latin small letter t with cedilla | + * | U+0164 | Ť | T | Latin capital letter T with caron | + * | U+0165 | Å¥ | t | Latin small letter t with caron | + * | U+0166 | Ŧ | T | Latin capital letter T with stroke | + * | U+0167 | ŧ | t | Latin small letter t with stroke | + * | U+0168 | Ũ | U | Latin capital letter U with tilde | + * | U+0169 | Å© | u | Latin small letter u with tilde | + * | U+016A | Ū | U | Latin capital letter U with macron | + * | U+016B | Å« | u | Latin small letter u with macron | + * | U+016C | Ŭ | U | Latin capital letter U with breve | + * | U+016D | Å­ | u | Latin small letter u with breve | + * | U+016E | Å® | U | Latin capital letter U with ring above | + * | U+016F | ů | u | Latin small letter u with ring above | + * | U+0170 | Å° | U | Latin capital letter U with double acute | + * | U+0171 | ű | u | Latin small letter u with double acute | + * | U+0172 | Ų | U | Latin capital letter U with ogonek | + * | U+0173 | ų | u | Latin small letter u with ogonek | + * | U+0174 | Å´ | W | Latin capital letter W with circumflex | + * | U+0175 | ŵ | w | Latin small letter w with circumflex | + * | U+0176 | Ŷ | Y | Latin capital letter Y with circumflex | + * | U+0177 | Å· | y | Latin small letter y with circumflex | + * | U+0178 | Ÿ | Y | Latin capital letter Y with diaeresis | + * | U+0179 | Ź | Z | Latin capital letter Z with acute | + * | U+017A | ź | z | Latin small letter z with acute | + * | U+017B | Å» | Z | Latin capital letter Z with dot above | + * | U+017C | ż | z | Latin small letter z with dot above | + * | U+017D | Ž | Z | Latin capital letter Z with caron | + * | U+017E | ž | z | Latin small letter z with caron | + * | U+017F | Å¿ | s | Latin small letter long s | + * | U+01A0 | Æ  | O | Latin capital letter O with horn | + * | U+01A1 | Æ¡ | o | Latin small letter o with horn | + * | U+01AF | Ư | U | Latin capital letter U with horn | + * | U+01B0 | Æ° | u | Latin small letter u with horn | + * | U+01CD | Ǎ | A | Latin capital letter A with caron | + * | U+01CE | ǎ | a | Latin small letter a with caron | + * | U+01CF | Ǐ | I | Latin capital letter I with caron | + * | U+01D0 | ǐ | i | Latin small letter i with caron | + * | U+01D1 | Ǒ | O | Latin capital letter O with caron | + * | U+01D2 | ǒ | o | Latin small letter o with caron | + * | U+01D3 | Ǔ | U | Latin capital letter U with caron | + * | U+01D4 | ǔ | u | Latin small letter u with caron | + * | U+01D5 | Ǖ | U | Latin capital letter U with diaeresis and macron | + * | U+01D6 | ǖ | u | Latin small letter u with diaeresis and macron | + * | U+01D7 | Ǘ | U | Latin capital letter U with diaeresis and acute | + * | U+01D8 | ǘ | u | Latin small letter u with diaeresis and acute | + * | U+01D9 | Ǚ | U | Latin capital letter U with diaeresis and caron | + * | U+01DA | ǚ | u | Latin small letter u with diaeresis and caron | + * | U+01DB | Ǜ | U | Latin capital letter U with diaeresis and grave | + * | U+01DC | ǜ | u | Latin small letter u with diaeresis and grave | + * + * Decompositions for Latin Extended-B: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | ----------------------------------------- | + * | U+0218 | Ș | S | Latin capital letter S with comma below | + * | U+0219 | ș | s | Latin small letter s with comma below | + * | U+021A | Ț | T | Latin capital letter T with comma below | + * | U+021B | ț | t | Latin small letter t with comma below | + * + * Vowels with diacritic (Chinese, Hanyu Pinyin): + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | ----------------------------------------------------- | + * | U+0251 | ɑ | a | Latin small letter alpha | + * | U+1EA0 | Ạ | A | Latin capital letter A with dot below | + * | U+1EA1 | ạ | a | Latin small letter a with dot below | + * | U+1EA2 | Ả | A | Latin capital letter A with hook above | + * | U+1EA3 | ả | a | Latin small letter a with hook above | + * | U+1EA4 | Ấ | A | Latin capital letter A with circumflex and acute | + * | U+1EA5 | ấ | a | Latin small letter a with circumflex and acute | + * | U+1EA6 | Ầ | A | Latin capital letter A with circumflex and grave | + * | U+1EA7 | ầ | a | Latin small letter a with circumflex and grave | + * | U+1EA8 | Ẩ | A | Latin capital letter A with circumflex and hook above | + * | U+1EA9 | ẩ | a | Latin small letter a with circumflex and hook above | + * | U+1EAA | Ẫ | A | Latin capital letter A with circumflex and tilde | + * | U+1EAB | ẫ | a | Latin small letter a with circumflex and tilde | + * | U+1EA6 | Ậ | A | Latin capital letter A with circumflex and dot below | + * | U+1EAD | ậ | a | Latin small letter a with circumflex and dot below | + * | U+1EAE | Ắ | A | Latin capital letter A with breve and acute | + * | U+1EAF | ắ | a | Latin small letter a with breve and acute | + * | U+1EB0 | Ằ | A | Latin capital letter A with breve and grave | + * | U+1EB1 | ằ | a | Latin small letter a with breve and grave | + * | U+1EB2 | Ẳ | A | Latin capital letter A with breve and hook above | + * | U+1EB3 | ẳ | a | Latin small letter a with breve and hook above | + * | U+1EB4 | Ẵ | A | Latin capital letter A with breve and tilde | + * | U+1EB5 | ẵ | a | Latin small letter a with breve and tilde | + * | U+1EB6 | Ặ | A | Latin capital letter A with breve and dot below | + * | U+1EB7 | ặ | a | Latin small letter a with breve and dot below | + * | U+1EB8 | Ẹ | E | Latin capital letter E with dot below | + * | U+1EB9 | ẹ | e | Latin small letter e with dot below | + * | U+1EBA | Ẻ | E | Latin capital letter E with hook above | + * | U+1EBB | ẻ | e | Latin small letter e with hook above | + * | U+1EBC | Ẽ | E | Latin capital letter E with tilde | + * | U+1EBD | ẽ | e | Latin small letter e with tilde | + * | U+1EBE | Ế | E | Latin capital letter E with circumflex and acute | + * | U+1EBF | ế | e | Latin small letter e with circumflex and acute | + * | U+1EC0 | Ề | E | Latin capital letter E with circumflex and grave | + * | U+1EC1 | ề | e | Latin small letter e with circumflex and grave | + * | U+1EC2 | Ể | E | Latin capital letter E with circumflex and hook above | + * | U+1EC3 | ể | e | Latin small letter e with circumflex and hook above | + * | U+1EC4 | Ễ | E | Latin capital letter E with circumflex and tilde | + * | U+1EC5 | ễ | e | Latin small letter e with circumflex and tilde | + * | U+1EC6 | Ệ | E | Latin capital letter E with circumflex and dot below | + * | U+1EC7 | ệ | e | Latin small letter e with circumflex and dot below | + * | U+1EC8 | Ỉ | I | Latin capital letter I with hook above | + * | U+1EC9 | ỉ | i | Latin small letter i with hook above | + * | U+1ECA | Ị | I | Latin capital letter I with dot below | + * | U+1ECB | ị | i | Latin small letter i with dot below | + * | U+1ECC | Ọ | O | Latin capital letter O with dot below | + * | U+1ECD | ọ | o | Latin small letter o with dot below | + * | U+1ECE | Ỏ | O | Latin capital letter O with hook above | + * | U+1ECF | ỏ | o | Latin small letter o with hook above | + * | U+1ED0 | Ố | O | Latin capital letter O with circumflex and acute | + * | U+1ED1 | ố | o | Latin small letter o with circumflex and acute | + * | U+1ED2 | Ồ | O | Latin capital letter O with circumflex and grave | + * | U+1ED3 | ồ | o | Latin small letter o with circumflex and grave | + * | U+1ED4 | Ổ | O | Latin capital letter O with circumflex and hook above | + * | U+1ED5 | ổ | o | Latin small letter o with circumflex and hook above | + * | U+1ED6 | Ỗ | O | Latin capital letter O with circumflex and tilde | + * | U+1ED7 | ỗ | o | Latin small letter o with circumflex and tilde | + * | U+1ED8 | Ộ | O | Latin capital letter O with circumflex and dot below | + * | U+1ED9 | ộ | o | Latin small letter o with circumflex and dot below | + * | U+1EDA | Ớ | O | Latin capital letter O with horn and acute | + * | U+1EDB | ớ | o | Latin small letter o with horn and acute | + * | U+1EDC | Ờ | O | Latin capital letter O with horn and grave | + * | U+1EDD | ờ | o | Latin small letter o with horn and grave | + * | U+1EDE | Ở | O | Latin capital letter O with horn and hook above | + * | U+1EDF | ở | o | Latin small letter o with horn and hook above | + * | U+1EE0 | á»  | O | Latin capital letter O with horn and tilde | + * | U+1EE1 | ỡ | o | Latin small letter o with horn and tilde | + * | U+1EE2 | Ợ | O | Latin capital letter O with horn and dot below | + * | U+1EE3 | ợ | o | Latin small letter o with horn and dot below | + * | U+1EE4 | Ụ | U | Latin capital letter U with dot below | + * | U+1EE5 | ụ | u | Latin small letter u with dot below | + * | U+1EE6 | Ủ | U | Latin capital letter U with hook above | + * | U+1EE7 | ủ | u | Latin small letter u with hook above | + * | U+1EE8 | Ứ | U | Latin capital letter U with horn and acute | + * | U+1EE9 | ứ | u | Latin small letter u with horn and acute | + * | U+1EEA | Ừ | U | Latin capital letter U with horn and grave | + * | U+1EEB | ừ | u | Latin small letter u with horn and grave | + * | U+1EEC | Ử | U | Latin capital letter U with horn and hook above | + * | U+1EED | á»­ | u | Latin small letter u with horn and hook above | + * | U+1EEE | á»® | U | Latin capital letter U with horn and tilde | + * | U+1EEF | ữ | u | Latin small letter u with horn and tilde | + * | U+1EF0 | á»° | U | Latin capital letter U with horn and dot below | + * | U+1EF1 | á»± | u | Latin small letter u with horn and dot below | + * | U+1EF2 | Ỳ | Y | Latin capital letter Y with grave | + * | U+1EF3 | ỳ | y | Latin small letter y with grave | + * | U+1EF4 | á»´ | Y | Latin capital letter Y with dot below | + * | U+1EF5 | ỵ | y | Latin small letter y with dot below | + * | U+1EF6 | Ỷ | Y | Latin capital letter Y with hook above | + * | U+1EF7 | á»· | y | Latin small letter y with hook above | + * | U+1EF8 | Ỹ | Y | Latin capital letter Y with tilde | + * | U+1EF9 | ỹ | y | Latin small letter y with tilde | + * + * German (`de_DE`), German formal (`de_DE_formal`), German (Switzerland) formal (`de_CH`), + * and German (Switzerland) informal (`de_CH_informal`) locales: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | --------------------------------------- | + * | U+00C4 | Ä | Ae | Latin capital letter A with diaeresis | + * | U+00E4 | ä | ae | Latin small letter a with diaeresis | + * | U+00D6 | Ö | Oe | Latin capital letter O with diaeresis | + * | U+00F6 | ö | oe | Latin small letter o with diaeresis | + * | U+00DC | Ü | Ue | Latin capital letter U with diaeresis | + * | U+00FC | ü | ue | Latin small letter u with diaeresis | + * | U+00DF | ß | ss | Latin small letter sharp s | + * + * Danish (`da_DK`) locale: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | --------------------------------------- | + * | U+00C6 | Æ | Ae | Latin capital letter AE | + * | U+00E6 | æ | ae | Latin small letter ae | + * | U+00D8 | Ø | Oe | Latin capital letter O with stroke | + * | U+00F8 | ø | oe | Latin small letter o with stroke | + * | U+00C5 | Å | Aa | Latin capital letter A with ring above | + * | U+00E5 | Ã¥ | aa | Latin small letter a with ring above | + * + * Catalan (`ca`) locale: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | --------------------------------------- | + * | U+00B7 | l·l | ll | Flown dot (between two Ls) | + * + * Serbian (`sr_RS`) locale: + * + * | Code | Glyph | Replacement | Description | + * | -------- | ----- | ----------- | --------------------------------------- | + * | U+0110 | Đ | DJ | Latin capital letter D with stroke | + * | U+0111 | đ | dj | Latin small letter d with stroke | + * * @since 1.2.1 + * @since 4.6.0 Added locale support for `de_CH`, `de_CH_informal`, and `ca`. + * @since 4.7.0 Added locale support for `sr_RS`. * * @param string $string Text that might have accent characters * @return string Filtered string with replaced "nice" characters. @@ -1070,220 +1511,225 @@ function remove_accents( $string ) { if (seems_utf8($string)) { $chars = array( // Decompositions for Latin-1 Supplement - chr(194).chr(170) => 'a', chr(194).chr(186) => 'o', - chr(195).chr(128) => 'A', chr(195).chr(129) => 'A', - chr(195).chr(130) => 'A', chr(195).chr(131) => 'A', - chr(195).chr(132) => 'A', chr(195).chr(133) => 'A', - chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C', - chr(195).chr(136) => 'E', chr(195).chr(137) => 'E', - chr(195).chr(138) => 'E', chr(195).chr(139) => 'E', - chr(195).chr(140) => 'I', chr(195).chr(141) => 'I', - chr(195).chr(142) => 'I', chr(195).chr(143) => 'I', - chr(195).chr(144) => 'D', chr(195).chr(145) => 'N', - chr(195).chr(146) => 'O', chr(195).chr(147) => 'O', - chr(195).chr(148) => 'O', chr(195).chr(149) => 'O', - chr(195).chr(150) => 'O', chr(195).chr(153) => 'U', - chr(195).chr(154) => 'U', chr(195).chr(155) => 'U', - chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y', - chr(195).chr(158) => 'TH',chr(195).chr(159) => 's', - chr(195).chr(160) => 'a', chr(195).chr(161) => 'a', - chr(195).chr(162) => 'a', chr(195).chr(163) => 'a', - chr(195).chr(164) => 'a', chr(195).chr(165) => 'a', - chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c', - chr(195).chr(168) => 'e', chr(195).chr(169) => 'e', - chr(195).chr(170) => 'e', chr(195).chr(171) => 'e', - chr(195).chr(172) => 'i', chr(195).chr(173) => 'i', - chr(195).chr(174) => 'i', chr(195).chr(175) => 'i', - chr(195).chr(176) => 'd', chr(195).chr(177) => 'n', - chr(195).chr(178) => 'o', chr(195).chr(179) => 'o', - chr(195).chr(180) => 'o', chr(195).chr(181) => 'o', - chr(195).chr(182) => 'o', chr(195).chr(184) => 'o', - chr(195).chr(185) => 'u', chr(195).chr(186) => 'u', - chr(195).chr(187) => 'u', chr(195).chr(188) => 'u', - chr(195).chr(189) => 'y', chr(195).chr(190) => 'th', - chr(195).chr(191) => 'y', chr(195).chr(152) => 'O', + 'ª' => 'a', 'º' => 'o', + 'À' => 'A', 'Á' => 'A', + 'Â' => 'A', 'Ã' => 'A', + 'Ä' => 'A', 'Å' => 'A', + 'Æ' => 'AE','Ç' => 'C', + 'È' => 'E', 'É' => 'E', + 'Ê' => 'E', 'Ë' => 'E', + 'Ì' => 'I', 'Í' => 'I', + 'Î' => 'I', 'Ï' => 'I', + 'Ð' => 'D', 'Ñ' => 'N', + 'Ò' => 'O', 'Ó' => 'O', + 'Ô' => 'O', 'Õ' => 'O', + 'Ö' => 'O', 'Ù' => 'U', + 'Ú' => 'U', 'Û' => 'U', + 'Ü' => 'U', 'Ý' => 'Y', + 'Þ' => 'TH','ß' => 's', + 'à' => 'a', 'á' => 'a', + 'â' => 'a', 'ã' => 'a', + 'ä' => 'a', 'Ã¥' => 'a', + 'æ' => 'ae','ç' => 'c', + 'è' => 'e', 'é' => 'e', + 'ê' => 'e', 'ë' => 'e', + 'ì' => 'i', 'í' => 'i', + 'î' => 'i', 'ï' => 'i', + 'ð' => 'd', 'ñ' => 'n', + 'ò' => 'o', 'ó' => 'o', + 'ô' => 'o', 'õ' => 'o', + 'ö' => 'o', 'ø' => 'o', + 'ù' => 'u', 'ú' => 'u', + 'û' => 'u', 'ü' => 'u', + 'ý' => 'y', 'þ' => 'th', + 'ÿ' => 'y', 'Ø' => 'O', // Decompositions for Latin Extended-A - chr(196).chr(128) => 'A', chr(196).chr(129) => 'a', - chr(196).chr(130) => 'A', chr(196).chr(131) => 'a', - chr(196).chr(132) => 'A', chr(196).chr(133) => 'a', - chr(196).chr(134) => 'C', chr(196).chr(135) => 'c', - chr(196).chr(136) => 'C', chr(196).chr(137) => 'c', - chr(196).chr(138) => 'C', chr(196).chr(139) => 'c', - chr(196).chr(140) => 'C', chr(196).chr(141) => 'c', - chr(196).chr(142) => 'D', chr(196).chr(143) => 'd', - chr(196).chr(144) => 'D', chr(196).chr(145) => 'd', - chr(196).chr(146) => 'E', chr(196).chr(147) => 'e', - chr(196).chr(148) => 'E', chr(196).chr(149) => 'e', - chr(196).chr(150) => 'E', chr(196).chr(151) => 'e', - chr(196).chr(152) => 'E', chr(196).chr(153) => 'e', - chr(196).chr(154) => 'E', chr(196).chr(155) => 'e', - chr(196).chr(156) => 'G', chr(196).chr(157) => 'g', - chr(196).chr(158) => 'G', chr(196).chr(159) => 'g', - chr(196).chr(160) => 'G', chr(196).chr(161) => 'g', - chr(196).chr(162) => 'G', chr(196).chr(163) => 'g', - chr(196).chr(164) => 'H', chr(196).chr(165) => 'h', - chr(196).chr(166) => 'H', chr(196).chr(167) => 'h', - chr(196).chr(168) => 'I', chr(196).chr(169) => 'i', - chr(196).chr(170) => 'I', chr(196).chr(171) => 'i', - chr(196).chr(172) => 'I', chr(196).chr(173) => 'i', - chr(196).chr(174) => 'I', chr(196).chr(175) => 'i', - chr(196).chr(176) => 'I', chr(196).chr(177) => 'i', - chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij', - chr(196).chr(180) => 'J', chr(196).chr(181) => 'j', - chr(196).chr(182) => 'K', chr(196).chr(183) => 'k', - chr(196).chr(184) => 'k', chr(196).chr(185) => 'L', - chr(196).chr(186) => 'l', chr(196).chr(187) => 'L', - chr(196).chr(188) => 'l', chr(196).chr(189) => 'L', - chr(196).chr(190) => 'l', chr(196).chr(191) => 'L', - chr(197).chr(128) => 'l', chr(197).chr(129) => 'L', - chr(197).chr(130) => 'l', chr(197).chr(131) => 'N', - chr(197).chr(132) => 'n', chr(197).chr(133) => 'N', - chr(197).chr(134) => 'n', chr(197).chr(135) => 'N', - chr(197).chr(136) => 'n', chr(197).chr(137) => 'N', - chr(197).chr(138) => 'n', chr(197).chr(139) => 'N', - chr(197).chr(140) => 'O', chr(197).chr(141) => 'o', - chr(197).chr(142) => 'O', chr(197).chr(143) => 'o', - chr(197).chr(144) => 'O', chr(197).chr(145) => 'o', - chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe', - chr(197).chr(148) => 'R',chr(197).chr(149) => 'r', - chr(197).chr(150) => 'R',chr(197).chr(151) => 'r', - chr(197).chr(152) => 'R',chr(197).chr(153) => 'r', - chr(197).chr(154) => 'S',chr(197).chr(155) => 's', - chr(197).chr(156) => 'S',chr(197).chr(157) => 's', - chr(197).chr(158) => 'S',chr(197).chr(159) => 's', - chr(197).chr(160) => 'S', chr(197).chr(161) => 's', - chr(197).chr(162) => 'T', chr(197).chr(163) => 't', - chr(197).chr(164) => 'T', chr(197).chr(165) => 't', - chr(197).chr(166) => 'T', chr(197).chr(167) => 't', - chr(197).chr(168) => 'U', chr(197).chr(169) => 'u', - chr(197).chr(170) => 'U', chr(197).chr(171) => 'u', - chr(197).chr(172) => 'U', chr(197).chr(173) => 'u', - chr(197).chr(174) => 'U', chr(197).chr(175) => 'u', - chr(197).chr(176) => 'U', chr(197).chr(177) => 'u', - chr(197).chr(178) => 'U', chr(197).chr(179) => 'u', - chr(197).chr(180) => 'W', chr(197).chr(181) => 'w', - chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y', - chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z', - chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z', - chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z', - chr(197).chr(190) => 'z', chr(197).chr(191) => 's', + 'Ā' => 'A', 'ā' => 'a', + 'Ă' => 'A', 'ă' => 'a', + 'Ą' => 'A', 'ą' => 'a', + 'Ć' => 'C', 'ć' => 'c', + 'Ĉ' => 'C', 'ĉ' => 'c', + 'Ċ' => 'C', 'ċ' => 'c', + 'Č' => 'C', 'č' => 'c', + 'Ď' => 'D', 'ď' => 'd', + 'Đ' => 'D', 'đ' => 'd', + 'Ē' => 'E', 'ē' => 'e', + 'Ĕ' => 'E', 'ĕ' => 'e', + 'Ė' => 'E', 'ė' => 'e', + 'Ę' => 'E', 'ę' => 'e', + 'Ě' => 'E', 'ě' => 'e', + 'Ĝ' => 'G', 'ĝ' => 'g', + 'Ğ' => 'G', 'ğ' => 'g', + 'Ä ' => 'G', 'Ä¡' => 'g', + 'Ä¢' => 'G', 'Ä£' => 'g', + 'Ĥ' => 'H', 'Ä¥' => 'h', + 'Ħ' => 'H', 'ħ' => 'h', + 'Ĩ' => 'I', 'Ä©' => 'i', + 'Ī' => 'I', 'Ä«' => 'i', + 'Ĭ' => 'I', 'Ä­' => 'i', + 'Ä®' => 'I', 'į' => 'i', + 'Ä°' => 'I', 'ı' => 'i', + 'IJ' => 'IJ','ij' => 'ij', + 'Ä´' => 'J', 'ĵ' => 'j', + 'Ķ' => 'K', 'Ä·' => 'k', + 'ĸ' => 'k', 'Ĺ' => 'L', + 'ĺ' => 'l', 'Ä»' => 'L', + 'ļ' => 'l', 'Ľ' => 'L', + 'ľ' => 'l', 'Ä¿' => 'L', + 'ŀ' => 'l', 'Ł' => 'L', + 'ł' => 'l', 'Ń' => 'N', + 'ń' => 'n', 'Ņ' => 'N', + 'ņ' => 'n', 'Ň' => 'N', + 'ň' => 'n', 'ʼn' => 'n', + 'Ŋ' => 'N', 'ŋ' => 'n', + 'Ō' => 'O', 'ō' => 'o', + 'Ŏ' => 'O', 'ŏ' => 'o', + 'Ő' => 'O', 'ő' => 'o', + 'Œ' => 'OE','œ' => 'oe', + 'Ŕ' => 'R','ŕ' => 'r', + 'Ŗ' => 'R','ŗ' => 'r', + 'Ř' => 'R','ř' => 'r', + 'Ś' => 'S','ś' => 's', + 'Ŝ' => 'S','ŝ' => 's', + 'Ş' => 'S','ş' => 's', + 'Å ' => 'S', 'Å¡' => 's', + 'Å¢' => 'T', 'Å£' => 't', + 'Ť' => 'T', 'Å¥' => 't', + 'Ŧ' => 'T', 'ŧ' => 't', + 'Ũ' => 'U', 'Å©' => 'u', + 'Ū' => 'U', 'Å«' => 'u', + 'Ŭ' => 'U', 'Å­' => 'u', + 'Å®' => 'U', 'ů' => 'u', + 'Å°' => 'U', 'ű' => 'u', + 'Ų' => 'U', 'ų' => 'u', + 'Å´' => 'W', 'ŵ' => 'w', + 'Ŷ' => 'Y', 'Å·' => 'y', + 'Ÿ' => 'Y', 'Ź' => 'Z', + 'ź' => 'z', 'Å»' => 'Z', + 'ż' => 'z', 'Ž' => 'Z', + 'ž' => 'z', 'Å¿' => 's', // Decompositions for Latin Extended-B - chr(200).chr(152) => 'S', chr(200).chr(153) => 's', - chr(200).chr(154) => 'T', chr(200).chr(155) => 't', + 'Ș' => 'S', 'ș' => 's', + 'Ț' => 'T', 'ț' => 't', // Euro Sign - chr(226).chr(130).chr(172) => 'E', + '€' => 'E', // GBP (Pound) Sign - chr(194).chr(163) => '', + '£' => '', // Vowels with diacritic (Vietnamese) // unmarked - chr(198).chr(160) => 'O', chr(198).chr(161) => 'o', - chr(198).chr(175) => 'U', chr(198).chr(176) => 'u', + 'Æ ' => 'O', 'Æ¡' => 'o', + 'Ư' => 'U', 'Æ°' => 'u', // grave accent - chr(225).chr(186).chr(166) => 'A', chr(225).chr(186).chr(167) => 'a', - chr(225).chr(186).chr(176) => 'A', chr(225).chr(186).chr(177) => 'a', - chr(225).chr(187).chr(128) => 'E', chr(225).chr(187).chr(129) => 'e', - chr(225).chr(187).chr(146) => 'O', chr(225).chr(187).chr(147) => 'o', - chr(225).chr(187).chr(156) => 'O', chr(225).chr(187).chr(157) => 'o', - chr(225).chr(187).chr(170) => 'U', chr(225).chr(187).chr(171) => 'u', - chr(225).chr(187).chr(178) => 'Y', chr(225).chr(187).chr(179) => 'y', + 'Ầ' => 'A', 'ầ' => 'a', + 'Ằ' => 'A', 'ằ' => 'a', + 'Ề' => 'E', 'ề' => 'e', + 'Ồ' => 'O', 'ồ' => 'o', + 'Ờ' => 'O', 'ờ' => 'o', + 'Ừ' => 'U', 'ừ' => 'u', + 'Ỳ' => 'Y', 'ỳ' => 'y', // hook - chr(225).chr(186).chr(162) => 'A', chr(225).chr(186).chr(163) => 'a', - chr(225).chr(186).chr(168) => 'A', chr(225).chr(186).chr(169) => 'a', - chr(225).chr(186).chr(178) => 'A', chr(225).chr(186).chr(179) => 'a', - chr(225).chr(186).chr(186) => 'E', chr(225).chr(186).chr(187) => 'e', - chr(225).chr(187).chr(130) => 'E', chr(225).chr(187).chr(131) => 'e', - chr(225).chr(187).chr(136) => 'I', chr(225).chr(187).chr(137) => 'i', - chr(225).chr(187).chr(142) => 'O', chr(225).chr(187).chr(143) => 'o', - chr(225).chr(187).chr(148) => 'O', chr(225).chr(187).chr(149) => 'o', - chr(225).chr(187).chr(158) => 'O', chr(225).chr(187).chr(159) => 'o', - chr(225).chr(187).chr(166) => 'U', chr(225).chr(187).chr(167) => 'u', - chr(225).chr(187).chr(172) => 'U', chr(225).chr(187).chr(173) => 'u', - chr(225).chr(187).chr(182) => 'Y', chr(225).chr(187).chr(183) => 'y', + 'Ả' => 'A', 'ả' => 'a', + 'Ẩ' => 'A', 'ẩ' => 'a', + 'Ẳ' => 'A', 'ẳ' => 'a', + 'Ẻ' => 'E', 'ẻ' => 'e', + 'Ể' => 'E', 'ể' => 'e', + 'Ỉ' => 'I', 'ỉ' => 'i', + 'Ỏ' => 'O', 'ỏ' => 'o', + 'Ổ' => 'O', 'ổ' => 'o', + 'Ở' => 'O', 'ở' => 'o', + 'Ủ' => 'U', 'ủ' => 'u', + 'Ử' => 'U', 'á»­' => 'u', + 'Ỷ' => 'Y', 'á»·' => 'y', // tilde - chr(225).chr(186).chr(170) => 'A', chr(225).chr(186).chr(171) => 'a', - chr(225).chr(186).chr(180) => 'A', chr(225).chr(186).chr(181) => 'a', - chr(225).chr(186).chr(188) => 'E', chr(225).chr(186).chr(189) => 'e', - chr(225).chr(187).chr(132) => 'E', chr(225).chr(187).chr(133) => 'e', - chr(225).chr(187).chr(150) => 'O', chr(225).chr(187).chr(151) => 'o', - chr(225).chr(187).chr(160) => 'O', chr(225).chr(187).chr(161) => 'o', - chr(225).chr(187).chr(174) => 'U', chr(225).chr(187).chr(175) => 'u', - chr(225).chr(187).chr(184) => 'Y', chr(225).chr(187).chr(185) => 'y', + 'Ẫ' => 'A', 'ẫ' => 'a', + 'Ẵ' => 'A', 'ẵ' => 'a', + 'Ẽ' => 'E', 'ẽ' => 'e', + 'Ễ' => 'E', 'ễ' => 'e', + 'Ỗ' => 'O', 'ỗ' => 'o', + 'á» ' => 'O', 'ỡ' => 'o', + 'á»®' => 'U', 'ữ' => 'u', + 'Ỹ' => 'Y', 'ỹ' => 'y', // acute accent - chr(225).chr(186).chr(164) => 'A', chr(225).chr(186).chr(165) => 'a', - chr(225).chr(186).chr(174) => 'A', chr(225).chr(186).chr(175) => 'a', - chr(225).chr(186).chr(190) => 'E', chr(225).chr(186).chr(191) => 'e', - chr(225).chr(187).chr(144) => 'O', chr(225).chr(187).chr(145) => 'o', - chr(225).chr(187).chr(154) => 'O', chr(225).chr(187).chr(155) => 'o', - chr(225).chr(187).chr(168) => 'U', chr(225).chr(187).chr(169) => 'u', + 'Ấ' => 'A', 'ấ' => 'a', + 'Ắ' => 'A', 'ắ' => 'a', + 'Ế' => 'E', 'ế' => 'e', + 'Ố' => 'O', 'ố' => 'o', + 'Ớ' => 'O', 'ớ' => 'o', + 'Ứ' => 'U', 'ứ' => 'u', // dot below - chr(225).chr(186).chr(160) => 'A', chr(225).chr(186).chr(161) => 'a', - chr(225).chr(186).chr(172) => 'A', chr(225).chr(186).chr(173) => 'a', - chr(225).chr(186).chr(182) => 'A', chr(225).chr(186).chr(183) => 'a', - chr(225).chr(186).chr(184) => 'E', chr(225).chr(186).chr(185) => 'e', - chr(225).chr(187).chr(134) => 'E', chr(225).chr(187).chr(135) => 'e', - chr(225).chr(187).chr(138) => 'I', chr(225).chr(187).chr(139) => 'i', - chr(225).chr(187).chr(140) => 'O', chr(225).chr(187).chr(141) => 'o', - chr(225).chr(187).chr(152) => 'O', chr(225).chr(187).chr(153) => 'o', - chr(225).chr(187).chr(162) => 'O', chr(225).chr(187).chr(163) => 'o', - chr(225).chr(187).chr(164) => 'U', chr(225).chr(187).chr(165) => 'u', - chr(225).chr(187).chr(176) => 'U', chr(225).chr(187).chr(177) => 'u', - chr(225).chr(187).chr(180) => 'Y', chr(225).chr(187).chr(181) => 'y', + 'Ạ' => 'A', 'ạ' => 'a', + 'Ậ' => 'A', 'ậ' => 'a', + 'Ặ' => 'A', 'ặ' => 'a', + 'Ẹ' => 'E', 'ẹ' => 'e', + 'Ệ' => 'E', 'ệ' => 'e', + 'Ị' => 'I', 'ị' => 'i', + 'Ọ' => 'O', 'ọ' => 'o', + 'Ộ' => 'O', 'ộ' => 'o', + 'Ợ' => 'O', 'ợ' => 'o', + 'Ụ' => 'U', 'ụ' => 'u', + 'á»°' => 'U', 'á»±' => 'u', + 'á»´' => 'Y', 'ỵ' => 'y', // Vowels with diacritic (Chinese, Hanyu Pinyin) - chr(201).chr(145) => 'a', + 'ɑ' => 'a', // macron - chr(199).chr(149) => 'U', chr(199).chr(150) => 'u', + 'Ǖ' => 'U', 'ǖ' => 'u', // acute accent - chr(199).chr(151) => 'U', chr(199).chr(152) => 'u', + 'Ǘ' => 'U', 'ǘ' => 'u', // caron - chr(199).chr(141) => 'A', chr(199).chr(142) => 'a', - chr(199).chr(143) => 'I', chr(199).chr(144) => 'i', - chr(199).chr(145) => 'O', chr(199).chr(146) => 'o', - chr(199).chr(147) => 'U', chr(199).chr(148) => 'u', - chr(199).chr(153) => 'U', chr(199).chr(154) => 'u', + 'Ǎ' => 'A', 'ǎ' => 'a', + 'Ǐ' => 'I', 'ǐ' => 'i', + 'Ǒ' => 'O', 'ǒ' => 'o', + 'Ǔ' => 'U', 'ǔ' => 'u', + 'Ǚ' => 'U', 'ǚ' => 'u', // grave accent - chr(199).chr(155) => 'U', chr(199).chr(156) => 'u', + 'Ǜ' => 'U', 'ǜ' => 'u', ); // Used for locale-specific rules $locale = get_locale(); - if ( 'de_DE' == $locale || 'de_DE_formal' == $locale ) { - $chars[ chr(195).chr(132) ] = 'Ae'; - $chars[ chr(195).chr(164) ] = 'ae'; - $chars[ chr(195).chr(150) ] = 'Oe'; - $chars[ chr(195).chr(182) ] = 'oe'; - $chars[ chr(195).chr(156) ] = 'Ue'; - $chars[ chr(195).chr(188) ] = 'ue'; - $chars[ chr(195).chr(159) ] = 'ss'; + if ( 'de_DE' == $locale || 'de_DE_formal' == $locale || 'de_CH' == $locale || 'de_CH_informal' == $locale ) { + $chars[ 'Ä' ] = 'Ae'; + $chars[ 'ä' ] = 'ae'; + $chars[ 'Ö' ] = 'Oe'; + $chars[ 'ö' ] = 'oe'; + $chars[ 'Ü' ] = 'Ue'; + $chars[ 'ü' ] = 'ue'; + $chars[ 'ß' ] = 'ss'; } elseif ( 'da_DK' === $locale ) { - $chars[ chr(195).chr(134) ] = 'Ae'; - $chars[ chr(195).chr(166) ] = 'ae'; - $chars[ chr(195).chr(152) ] = 'Oe'; - $chars[ chr(195).chr(184) ] = 'oe'; - $chars[ chr(195).chr(133) ] = 'Aa'; - $chars[ chr(195).chr(165) ] = 'aa'; + $chars[ 'Æ' ] = 'Ae'; + $chars[ 'æ' ] = 'ae'; + $chars[ 'Ø' ] = 'Oe'; + $chars[ 'ø' ] = 'oe'; + $chars[ 'Å' ] = 'Aa'; + $chars[ 'Ã¥' ] = 'aa'; + } elseif ( 'ca' === $locale ) { + $chars[ 'l·l' ] = 'll'; + } elseif ( 'sr_RS' === $locale ) { + $chars[ 'Đ' ] = 'DJ'; + $chars[ 'đ' ] = 'dj'; } $string = strtr($string, $chars); } else { $chars = array(); // Assume ISO-8859-1 if not UTF-8 - $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158) - .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194) - .chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202) - .chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210) - .chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218) - .chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227) - .chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235) - .chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243) - .chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251) - .chr(252).chr(253).chr(255); + $chars['in'] = "\x80\x83\x8a\x8e\x9a\x9e" + ."\x9f\xa2\xa5\xb5\xc0\xc1\xc2" + ."\xc3\xc4\xc5\xc7\xc8\xc9\xca" + ."\xcb\xcc\xcd\xce\xcf\xd1\xd2" + ."\xd3\xd4\xd5\xd6\xd8\xd9\xda" + ."\xdb\xdc\xdd\xe0\xe1\xe2\xe3" + ."\xe4\xe5\xe7\xe8\xe9\xea\xeb" + ."\xec\xed\xee\xef\xf1\xf2\xf3" + ."\xf4\xf5\xf6\xf8\xf9\xfa\xfb" + ."\xfc\xfd\xff"; $chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy"; $string = strtr($string, $chars['in'], $chars['out']); $double_chars = array(); - $double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254)); + $double_chars['in'] = array("\x8c", "\x9c", "\xc6", "\xd0", "\xde", "\xdf", "\xe6", "\xf0", "\xfe"); $double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'); $string = str_replace($double_chars['in'], $double_chars['out'], $string); } @@ -1298,7 +1744,8 @@ function remove_accents( $string ) { * operating systems and special characters requiring special escaping * to manipulate at the command line. Replaces spaces and consecutive * dashes with a single dash. Trims period, dash and underscore from beginning - * and end of filename. + * and end of filename. It is not guaranteed that this function will return a + * filename that is allowed to be uploaded. * * @since 2.1.0 * @@ -1307,9 +1754,9 @@ function remove_accents( $string ) { */ function sanitize_file_name( $filename ) { $filename_raw = $filename; - $special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", chr(0)); + $special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", "%", "+", chr(0)); /** - * Filter the list of characters to remove from a filename. + * Filters the list of characters to remove from a filename. * * @since 2.8.0 * @@ -1323,13 +1770,21 @@ function sanitize_file_name( $filename ) { $filename = preg_replace( '/[\r\n\t -]+/', '-', $filename ); $filename = trim( $filename, '.-_' ); + if ( false === strpos( $filename, '.' ) ) { + $mime_types = wp_get_mime_types(); + $filetype = wp_check_filetype( 'test.' . $filename, $mime_types ); + if ( $filetype['ext'] === $filename ) { + $filename = 'unnamed-file.' . $filetype['ext']; + } + } + // Split the filename into a base and extension[s] $parts = explode('.', $filename); // Return if only one extension if ( count( $parts ) <= 2 ) { /** - * Filter a sanitized filename string. + * Filters a sanitized filename string. * * @since 2.8.0 * @@ -1375,7 +1830,7 @@ function sanitize_file_name( $filename ) { * Removes tags, octets, entities, and if strict is enabled, will only keep * alphanumeric, _, space, ., -, @. After sanitizing, it passes the username, * raw username (the username in the parameter), and the value of $strict as - * parameters for the 'sanitize_user' filter. + * parameters for the {@see 'sanitize_user'} filter. * * @since 2.0.0 * @@ -1400,7 +1855,7 @@ function sanitize_user( $username, $strict = false ) { $username = preg_replace( '|\s+|', ' ', $username ); /** - * Filter a sanitized username string. + * Filters a sanitized username string. * * @since 2.0.1 * @@ -1427,7 +1882,7 @@ function sanitize_key( $key ) { $key = preg_replace( '/[^a-z0-9_\-]/', '', $key ); /** - * Filter a sanitized key string. + * Filters a sanitized key string. * * @since 3.0.0 * @@ -1458,7 +1913,7 @@ function sanitize_title( $title, $fallback_title = '', $context = 'save' ) { $title = remove_accents($title); /** - * Filter a sanitized title string. + * Filters a sanitized title string. * * @since 1.2.0 * @@ -1518,12 +1973,12 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa } $title = strtolower($title); - $title = preg_replace('/&.+?;/', '', $title); // kill entities - $title = str_replace('.', '-', $title); if ( 'save' == $context ) { // Convert nbsp, ndash and mdash to hyphens $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title ); + // Convert nbsp, ndash and mdash HTML entities to hyphens + $title = str_replace( array( ' ', ' ', '–', '–', '—', '—' ), '-', $title ); // Strip these characters entirely $title = str_replace( array( @@ -1546,6 +2001,9 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa $title = str_replace( '%c3%97', 'x', $title ); } + $title = preg_replace('/&.+?;/', '', $title); // kill entities + $title = str_replace('.', '-', $title); + $title = preg_replace('/[^%a-z0-9 _-]/', '', $title); $title = preg_replace('/\s+/', '-', $title); $title = preg_replace('|-+|', '-', $title); @@ -1596,11 +2054,11 @@ function sanitize_html_class( $class, $fallback = '' ) { //Limit to A-Z,a-z,0-9,_,- $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized ); - if ( '' == $sanitized ) - $sanitized = $fallback; - + if ( '' == $sanitized && $fallback ) { + return sanitize_html_class( $fallback ); + } /** - * Filter a sanitized HTML class string. + * Filters a sanitized HTML class string. * * @since 2.8.0 * @@ -1835,25 +2293,28 @@ function force_balance_tags( $text ) { * Acts on text which is about to be edited. * * The $content is run through esc_textarea(), which uses htmlspecialchars() - * to convert special characters to HTML entities. If $richedit is set to true, - * it is simply a holder for the 'format_to_edit' filter. + * to convert special characters to HTML entities. If `$richedit` is set to true, + * it is simply a holder for the {@see 'format_to_edit'} filter. * * @since 0.71 + * @since 4.4.0 The `$richedit` parameter was renamed to `$rich_text` for clarity. * - * @param string $content The text about to be edited. - * @param bool $richedit Whether the $content should not pass through htmlspecialchars(). Default false (meaning it will be passed). + * @param string $content The text about to be edited. + * @param bool $rich_text Optional. Whether `$content` should be considered rich text, + * in which case it would not be passed through esc_textarea(). + * Default false. * @return string The text after the filter (and possibly htmlspecialchars()) has been run. */ -function format_to_edit( $content, $richedit = false ) { +function format_to_edit( $content, $rich_text = false ) { /** - * Filter the text to be formatted for editing. + * Filters the text to be formatted for editing. * * @since 1.2.0 * * @param string $content The text, prior to formatting for editing. */ $content = apply_filters( 'format_to_edit', $content ); - if ( ! $richedit ) + if ( ! $rich_text ) $content = esc_textarea( $content ); return $content; } @@ -1930,7 +2391,7 @@ function untrailingslashit( $string ) { * Adds slashes to escape strings. * * Slashes will first be removed if magic_quotes_gpc is set, see {@link - * http://www.php.net/magic_quotes} for more details. + * https://secure.php.net/magic_quotes} for more details. * * @since 0.71 * @@ -1945,10 +2406,7 @@ function addslashes_gpc($gpc) { } /** - * Navigates through an array and removes slashes from the values. - * - * If an array is passed, the array_map() function causes a callback to pass the - * value back to the function. The slashes from this value will removed. + * Navigates through an array, object, or scalar, and removes slashes from the values. * * @since 2.0.0 * @@ -1956,43 +2414,55 @@ function addslashes_gpc($gpc) { * @return mixed Stripped value. */ function stripslashes_deep( $value ) { - if ( is_array($value) ) { - $value = array_map('stripslashes_deep', $value); - } elseif ( is_object($value) ) { - $vars = get_object_vars( $value ); - foreach ($vars as $key=>$data) { - $value->{$key} = stripslashes_deep( $data ); - } - } elseif ( is_string( $value ) ) { - $value = stripslashes($value); - } - - return $value; + return map_deep( $value, 'stripslashes_from_strings_only' ); } /** - * Navigates through an array and encodes the values to be used in a URL. + * Callback function for `stripslashes_deep()` which strips slashes from strings. * + * @since 4.4.0 + * + * @param mixed $value The array or string to be stripped. + * @return mixed $value The stripped value. + */ +function stripslashes_from_strings_only( $value ) { + return is_string( $value ) ? stripslashes( $value ) : $value; +} + +/** + * Navigates through an array, object, or scalar, and encodes the values to be used in a URL. * * @since 2.2.0 * - * @param array|string $value The array or string to be encoded. - * @return array|string $value The encoded array (or string from the callback). + * @param mixed $value The array or string to be encoded. + * @return mixed $value The encoded value. */ function urlencode_deep( $value ) { - return is_array( $value ) ? array_map( 'urlencode_deep', $value ) : urlencode( $value ); + return map_deep( $value, 'urlencode' ); } /** - * Navigates through an array and raw encodes the values to be used in a URL. + * Navigates through an array, object, or scalar, and raw-encodes the values to be used in a URL. * * @since 3.4.0 * - * @param array|string $value The array or string to be encoded. - * @return array|string $value The encoded array (or string from the callback). + * @param mixed $value The array or string to be encoded. + * @return mixed $value The encoded value. */ function rawurlencode_deep( $value ) { - return is_array( $value ) ? array_map( 'rawurlencode_deep', $value ) : rawurlencode( $value ); + return map_deep( $value, 'rawurlencode' ); +} + +/** + * Navigates through an array, object, or scalar, and decodes URL-encoded values + * + * @since 4.4.0 + * + * @param mixed $value The array or string to be decoded. + * @return mixed $value The decoded value. + */ +function urldecode_deep( $value ) { + return map_deep( $value, 'urldecode' ); } /** @@ -2023,8 +2493,7 @@ function antispambot( $email_address, $hex_encoding = 0 ) { /** * Callback to convert URI match to HTML A element. * - * This function was backported from 2.5.0 to 2.3.2. Regex callback for {@link - * make_clickable()}. + * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable(). * * @since 2.3.2 * @access private @@ -2060,8 +2529,7 @@ function _make_url_clickable_cb( $matches ) { /** * Callback to convert URL match to HTML A element. * - * This function was backported from 2.5.0 to 2.3.2. Regex callback for {@link - * make_clickable()}. + * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable(). * * @since 2.3.2 * @access private @@ -2073,23 +2541,24 @@ function _make_web_ftp_clickable_cb( $matches ) { $ret = ''; $dest = $matches[2]; $dest = 'http://' . $dest; - $dest = esc_url($dest); - if ( empty($dest) ) - return $matches[0]; // removed trailing [.,;:)] from URL if ( in_array( substr($dest, -1), array('.', ',', ';', ':', ')') ) === true ) { $ret = substr($dest, -1); $dest = substr($dest, 0, strlen($dest)-1); } + + $dest = esc_url($dest); + if ( empty($dest) ) + return $matches[0]; + return $matches[1] . "$dest$ret"; } /** * Callback to convert email address match to HTML A element. * - * This function was backported from 2.5.0 to 2.3.2. Regex callback for {@link - * make_clickable()}. + * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable(). * * @since 2.3.2 * @access private @@ -2119,9 +2588,9 @@ function make_clickable( $text ) { $nested_code_pre = 0; // Keep track of how many levels link is nested inside
 or 
 	foreach ( $textarr as $piece ) {
 
-		if ( preg_match( '|^]|i', $piece ) || preg_match( '|^]|i', $piece ) )
+		if ( preg_match( '|^]|i', $piece ) || preg_match( '|^]|i', $piece ) || preg_match( '|^]|i', $piece ) || preg_match( '|^]|i', $piece ) )
 			$nested_code_pre++;
-		elseif ( ( '' === strtolower( $piece ) || '
' === strtolower( $piece ) ) && $nested_code_pre ) + elseif ( $nested_code_pre && ( '' === strtolower( $piece ) || '' === strtolower( $piece ) || '' === strtolower( $piece ) || '' === strtolower( $piece ) ) ) $nested_code_pre--; if ( $nested_code_pre || empty( $piece ) || ( $piece[0] === '<' && ! preg_match( '|^<\s*[\w]{1,20}+://|', $piece ) ) ) { @@ -2255,14 +2724,37 @@ function wp_rel_nofollow( $text ) { */ function wp_rel_nofollow_callback( $matches ) { $text = $matches[1]; - $text = str_replace(array(' rel="nofollow"', " rel='nofollow'"), '', $text); - return ""; + $atts = shortcode_parse_atts( $matches[1] ); + $rel = 'nofollow'; + + if ( preg_match( '%href=["\'](' . preg_quote( set_url_scheme( home_url(), 'http' ) ) . ')%i', $text ) || + preg_match( '%href=["\'](' . preg_quote( set_url_scheme( home_url(), 'https' ) ) . ')%i', $text ) + ) { + return ""; + } + + if ( ! empty( $atts['rel'] ) ) { + $parts = array_map( 'trim', explode( ' ', $atts['rel'] ) ); + if ( false === array_search( 'nofollow', $parts ) ) { + $parts[] = 'nofollow'; + } + $rel = implode( ' ', $parts ); + unset( $atts['rel'] ); + + $html = ''; + foreach ( $atts as $name => $value ) { + $html .= "{$name}=\"$value\" "; + } + $text = trim( $html ); + } + return ""; } /** * Convert one smiley code to the icon graphic file equivalent. * - * Callback handler for {@link convert_smilies()}. + * Callback handler for convert_smilies(). + * * Looks up one smiley code in the $wpsmiliestrans global array and returns an * `` string for that smiley. * @@ -2292,7 +2784,7 @@ function translate_smiley( $matches ) { } /** - * Filter the Smiley image URL before it's used in the image element. + * Filters the Smiley image URL before it's used in the image element. * * @since 2.9.0 * @@ -2370,12 +2862,12 @@ function convert_smilies( $text ) { */ function is_email( $email, $deprecated = false ) { if ( ! empty( $deprecated ) ) - _deprecated_argument( __FUNCTION__, '3.0' ); + _deprecated_argument( __FUNCTION__, '3.0.0' ); // Test for the minimum length the email can be if ( strlen( $email ) < 3 ) { /** - * Filter whether an email address is valid. + * Filters whether an email address is valid. * * This filter is evaluated under several different contexts, such as 'email_too_short', * 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits', @@ -2385,7 +2877,6 @@ function is_email( $email, $deprecated = false ) { * * @param bool $is_email Whether the email address has passed the is_email() checks. Default false. * @param string $email The email address being checked. - * @param string $message An explanatory message to the user. * @param string $context Context under which the email was tested. */ return apply_filters( 'is_email', false, $email, 'email_too_short' ); @@ -2498,13 +2989,19 @@ function get_gmt_from_date( $string, $format = 'Y-m-d H:i:s' ) { $tz = get_option( 'timezone_string' ); if ( $tz ) { $datetime = date_create( $string, new DateTimeZone( $tz ) ); - if ( ! $datetime ) + if ( ! $datetime ) { return gmdate( $format, 0 ); + } $datetime->setTimezone( new DateTimeZone( 'UTC' ) ); $string_gmt = $datetime->format( $format ); } else { - if ( ! preg_match( '#([0-9]{1,4})-([0-9]{1,2})-([0-9]{1,2}) ([0-9]{1,2}):([0-9]{1,2}):([0-9]{1,2})#', $string, $matches ) ) - return gmdate( $format, 0 ); + if ( ! preg_match( '#([0-9]{1,4})-([0-9]{1,2})-([0-9]{1,2}) ([0-9]{1,2}):([0-9]{1,2}):([0-9]{1,2})#', $string, $matches ) ) { + $datetime = strtotime( $string ); + if ( false === $datetime ) { + return gmdate( $format, 0 ); + } + return gmdate( $format, $datetime ); + } $string_time = gmmktime( $matches[4], $matches[5], $matches[6], $matches[2], $matches[3], $matches[1] ); $string_gmt = gmdate( $format, $string_time - get_option( 'gmt_offset' ) * HOUR_IN_SECONDS ); } @@ -2568,7 +3065,7 @@ function iso8601_timezone_to_offset( $timezone ) { * * @since 1.5.0 * - * @param string $date_string Date and time in ISO 8601 format {@link http://en.wikipedia.org/wiki/ISO_8601}. + * @param string $date_string Date and time in ISO 8601 format {@link https://en.wikipedia.org/wiki/ISO_8601}. * @param string $timezone Optional. If set to GMT returns the time minus gmt_offset. Default is 'user'. * @return string The date and time in MySQL DateTime format - Y-m-d H:i:s. */ @@ -2595,23 +3092,6 @@ function iso8601_to_datetime( $date_string, $timezone = 'user' ) { } } -/** - * Adds a element attributes to open links in new windows. - * - * Comment text in popup windows should be filtered through this. Right now it's - * a moderately dumb function, ideally it would detect whether a target or rel - * attribute was already there and adjust its actions accordingly. - * - * @since 0.71 - * - * @param string $text Content to replace links to open in a new window. - * @return string Content that has filtered links. - */ -function popuplinks( $text ) { - $text = preg_replace('//i', "", $text); - return $text; -} - /** * Strips out all characters that are not allowable in an email. * @@ -2624,7 +3104,7 @@ function sanitize_email( $email ) { // Test for the minimum length the email can be if ( strlen( $email ) < 3 ) { /** - * Filter a sanitized email address. + * Filters a sanitized email address. * * This filter is evaluated under several contexts, including 'email_too_short', * 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits', @@ -2737,37 +3217,42 @@ function human_time_diff( $from, $to = '' ) { $mins = round( $diff / MINUTE_IN_SECONDS ); if ( $mins <= 1 ) $mins = 1; - /* translators: min=minute */ + /* translators: Time difference between two dates, in minutes (min=minute). 1: Number of minutes */ $since = sprintf( _n( '%s min', '%s mins', $mins ), $mins ); } elseif ( $diff < DAY_IN_SECONDS && $diff >= HOUR_IN_SECONDS ) { $hours = round( $diff / HOUR_IN_SECONDS ); if ( $hours <= 1 ) $hours = 1; + /* translators: Time difference between two dates, in hours. 1: Number of hours */ $since = sprintf( _n( '%s hour', '%s hours', $hours ), $hours ); } elseif ( $diff < WEEK_IN_SECONDS && $diff >= DAY_IN_SECONDS ) { $days = round( $diff / DAY_IN_SECONDS ); if ( $days <= 1 ) $days = 1; + /* translators: Time difference between two dates, in days. 1: Number of days */ $since = sprintf( _n( '%s day', '%s days', $days ), $days ); - } elseif ( $diff < 30 * DAY_IN_SECONDS && $diff >= WEEK_IN_SECONDS ) { + } elseif ( $diff < MONTH_IN_SECONDS && $diff >= WEEK_IN_SECONDS ) { $weeks = round( $diff / WEEK_IN_SECONDS ); if ( $weeks <= 1 ) $weeks = 1; + /* translators: Time difference between two dates, in weeks. 1: Number of weeks */ $since = sprintf( _n( '%s week', '%s weeks', $weeks ), $weeks ); - } elseif ( $diff < YEAR_IN_SECONDS && $diff >= 30 * DAY_IN_SECONDS ) { - $months = round( $diff / ( 30 * DAY_IN_SECONDS ) ); + } elseif ( $diff < YEAR_IN_SECONDS && $diff >= MONTH_IN_SECONDS ) { + $months = round( $diff / MONTH_IN_SECONDS ); if ( $months <= 1 ) $months = 1; + /* translators: Time difference between two dates, in months. 1: Number of months */ $since = sprintf( _n( '%s month', '%s months', $months ), $months ); } elseif ( $diff >= YEAR_IN_SECONDS ) { $years = round( $diff / YEAR_IN_SECONDS ); if ( $years <= 1 ) $years = 1; + /* translators: Time difference between two dates, in years. 1: Number of years */ $since = sprintf( _n( '%s year', '%s years', $years ), $years ); } /** - * Filter the human readable difference between two timestamps. + * Filters the human readable difference between two timestamps. * * @since 4.0.0 * @@ -2786,8 +3271,8 @@ function human_time_diff( $from, $to = '' ) { * that, then the string ' […]' will be appended to the excerpt. If the string * is less than 55 words, then the content will be returned as is. * - * The 55 word limit can be modified by plugins/themes using the excerpt_length filter - * The ' […]' string can be modified by plugins/themes using the excerpt_more filter + * The 55 word limit can be modified by plugins/themes using the {@see 'excerpt_length'} filter + * The ' […]' string can be modified by plugins/themes using the {@see 'excerpt_more'} filter * * @since 1.5.0 * @@ -2806,7 +3291,7 @@ function wp_trim_excerpt( $text = '' ) { $text = str_replace(']]>', ']]>', $text); /** - * Filter the number of words in an excerpt. + * Filters the number of words in an excerpt. * * @since 2.7.0 * @@ -2814,7 +3299,7 @@ function wp_trim_excerpt( $text = '' ) { */ $excerpt_length = apply_filters( 'excerpt_length', 55 ); /** - * Filter the string in the "more" link displayed after a trimmed excerpt. + * Filters the string in the "more" link displayed after a trimmed excerpt. * * @since 2.9.0 * @@ -2824,7 +3309,7 @@ function wp_trim_excerpt( $text = '' ) { $text = wp_trim_words( $text, $excerpt_length, $excerpt_more ); } /** - * Filter the trimmed excerpt string. + * Filters the trimmed excerpt string. * * @since 2.8.0 * @@ -2880,7 +3365,7 @@ function wp_trim_words( $text, $num_words = 55, $more = null ) { } /** - * Filter the text content after words have been trimmed. + * Filters the text content after words have been trimmed. * * @since 3.3.0 * @@ -2903,7 +3388,7 @@ function wp_trim_words( $text, $num_words = 55, $more = null ) { function ent2ncr( $text ) { /** - * Filter text before named entities are converted into numbered entities. + * Filters text before named entities are converted into numbered entities. * * A non-null string must be returned for the filter to be evaluated. * @@ -3189,7 +3674,11 @@ function ent2ncr( $text ) { * * @since 4.3.0 * - * @param string $text The text to be formatted. + * @see _WP_Editors::editor() + * + * @param string $text The text to be formatted. + * @param string $default_editor The default editor for the current user. + * It is usually either 'html' or 'tinymce'. * @return string The formatted text after filter is applied. */ function format_for_editor( $text, $default_editor = null ) { @@ -3198,11 +3687,13 @@ function format_for_editor( $text, $default_editor = null ) { } /** - * Filter the text after it is formatted for the editor. + * Filters the text after it is formatted for the editor. * * @since 4.3.0 * - * @param string $text The formatted text. + * @param string $text The formatted text. + * @param string $default_editor The default editor for the current user. + * It is usually either 'html' or 'tinymce'. */ return apply_filters( 'format_for_editor', $text, $default_editor ); } @@ -3242,7 +3733,7 @@ function _deep_replace( $search, $subject ) { * * @since 2.8.0 * - * @global wpdb $wpdb + * @global wpdb $wpdb WordPress database abstraction object. * * @param string|array $data Unescaped data * @return string|array Escaped data @@ -3256,7 +3747,7 @@ function esc_sql( $data ) { * Checks and cleans a URL. * * A number of characters are removed from the URL. If the URL is for displaying - * (the default behaviour) ampersands are also replaced. The 'clean_url' filter + * (the default behaviour) ampersands are also replaced. The {@see 'clean_url'} filter * is applied to the returned cleaned URL. * * @since 2.8.0 @@ -3265,21 +3756,29 @@ function esc_sql( $data ) { * @param array $protocols Optional. An array of acceptable protocols. * Defaults to return value of wp_allowed_protocols() * @param string $_context Private. Use esc_url_raw() for database usage. - * @return string The cleaned $url after the 'clean_url' filter is applied. + * @return string The cleaned $url after the {@see 'clean_url'} filter is applied. */ function esc_url( $url, $protocols = null, $_context = 'display' ) { $original_url = $url; if ( '' == $url ) return $url; - $url = preg_replace('|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\\x80-\\xff]|i', '', $url); + + $url = str_replace( ' ', '%20', $url ); + $url = preg_replace('|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\[\]\\x80-\\xff]|i', '', $url); + + if ( '' === $url ) { + return $url; + } + if ( 0 !== stripos( $url, 'mailto:' ) ) { $strip = array('%0d', '%0a', '%0D', '%0A'); $url = _deep_replace($strip, $url); } + $url = str_replace(';//', '://', $url); /* If the URL doesn't appear to contain a scheme, we - * presume it needs http:// appended (unless a relative + * presume it needs http:// prepended (unless a relative * link starting with /, # or ? or a php file). */ if ( strpos($url, ':') === false && ! in_array( $url[0], array( '/', '#', '?' ) ) && @@ -3293,6 +3792,43 @@ function esc_url( $url, $protocols = null, $_context = 'display' ) { $url = str_replace( "'", ''', $url ); } + if ( ( false !== strpos( $url, '[' ) ) || ( false !== strpos( $url, ']' ) ) ) { + + $parsed = wp_parse_url( $url ); + $front = ''; + + if ( isset( $parsed['scheme'] ) ) { + $front .= $parsed['scheme'] . '://'; + } elseif ( '/' === $url[0] ) { + $front .= '//'; + } + + if ( isset( $parsed['user'] ) ) { + $front .= $parsed['user']; + } + + if ( isset( $parsed['pass'] ) ) { + $front .= ':' . $parsed['pass']; + } + + if ( isset( $parsed['user'] ) || isset( $parsed['pass'] ) ) { + $front .= '@'; + } + + if ( isset( $parsed['host'] ) ) { + $front .= $parsed['host']; + } + + if ( isset( $parsed['port'] ) ) { + $front .= ':' . $parsed['port']; + } + + $end_dirty = str_replace( $front, '', $url ); + $end_clean = str_replace( array( '[', ']' ), array( '%5B', '%5D' ), $end_dirty ); + $url = str_replace( $end_dirty, $end_clean, $url ); + + } + if ( '/' === $url[0] ) { $good_protocol_url = $url; } else { @@ -3304,7 +3840,7 @@ function esc_url( $url, $protocols = null, $_context = 'display' ) { } /** - * Filter a string cleaned and escaped for output as a URL. + * Filters a string cleaned and escaped for output as a URL. * * @since 2.3.0 * @@ -3331,7 +3867,7 @@ function esc_url_raw( $url, $protocols = null ) { /** * Convert entities, while preserving already-encoded entities. * - * @link http://www.php.net/htmlentities Borrowed from the PHP Manual user notes. + * @link https://secure.php.net/htmlentities Borrowed from the PHP Manual user notes. * * @since 1.2.2 * @@ -3349,7 +3885,7 @@ function htmlentities2( $myHTML ) { * * Escapes text strings for echoing in JS. It is intended to be used for inline JS * (in a tag attribute, for example onclick="..."). Note that the strings have to - * be in single quotes. The filter 'js_escape' is also applied here. + * be in single quotes. The {@see 'js_escape'} filter is also applied here. * * @since 2.8.0 * @@ -3363,7 +3899,7 @@ function esc_js( $text ) { $safe_text = str_replace( "\r", '', $safe_text ); $safe_text = str_replace( "\n", '\\n', addslashes( $safe_text ) ); /** - * Filter a string cleaned and escaped for output in JavaScript. + * Filters a string cleaned and escaped for output in JavaScript. * * Text passed to esc_js() is stripped of invalid or special characters, * and properly slashed for output. @@ -3388,7 +3924,7 @@ function esc_html( $text ) { $safe_text = wp_check_invalid_utf8( $text ); $safe_text = _wp_specialchars( $safe_text, ENT_QUOTES ); /** - * Filter a string cleaned and escaped for output in HTML. + * Filters a string cleaned and escaped for output in HTML. * * Text passed to esc_html() is stripped of invalid or special characters * before output. @@ -3413,7 +3949,7 @@ function esc_attr( $text ) { $safe_text = wp_check_invalid_utf8( $text ); $safe_text = _wp_specialchars( $safe_text, ENT_QUOTES ); /** - * Filter a string cleaned and escaped for output in an HTML attribute. + * Filters a string cleaned and escaped for output in an HTML attribute. * * Text passed to esc_attr() is stripped of invalid or special characters * before output. @@ -3437,7 +3973,7 @@ function esc_attr( $text ) { function esc_textarea( $text ) { $safe_text = htmlspecialchars( $text, ENT_QUOTES, get_option( 'blog_charset' ) ); /** - * Filter a string cleaned and escaped for output in a textarea element. + * Filters a string cleaned and escaped for output in a textarea element. * * @since 3.1.0 * @@ -3458,7 +3994,7 @@ function esc_textarea( $text ) { function tag_escape( $tag_name ) { $safe_tag = strtolower( preg_replace('/[^a-zA-Z0-9_:]/', '', $tag_name) ); /** - * Filter a string cleaned and escaped for output as an HTML tag. + * Filters a string cleaned and escaped for output as an HTML tag. * * @since 2.8.0 * @@ -3481,7 +4017,7 @@ function tag_escape( $tag_name ) { * @return string Absolute path. */ function wp_make_link_relative( $link ) { - return preg_replace( '|^(https?:)?//[^/]+(/.*)|i', '$2', $link ); + return preg_replace( '|^(https?:)?//[^/]+(/?.*)|i', '$2', $link ); } /** @@ -3492,7 +4028,7 @@ function wp_make_link_relative( $link ) { * * @since 2.0.5 * - * @global wpdb $wpdb + * @global wpdb $wpdb WordPress database abstraction object. * * @param string $option The name of the option. * @param string $value The unsanitised value. @@ -3522,6 +4058,8 @@ function sanitize_option( $option, $value ) { case 'thumbnail_size_h': case 'medium_size_w': case 'medium_size_h': + case 'medium_large_size_w': + case 'medium_large_size_h': case 'large_size_w': case 'large_size_h': case 'mailserver_port': @@ -3560,10 +4098,13 @@ function sanitize_option( $option, $value ) { case 'blogdescription': case 'blogname': $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); + if ( $value !== $original_value ) { + $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', wp_encode_emoji( $original_value ) ); + } + if ( is_wp_error( $value ) ) { $error = $value->get_error_message(); } else { - $value = wp_kses_post( $value ); $value = esc_html( $value ); } break; @@ -3696,6 +4237,14 @@ function sanitize_option( $option, $value ) { $value = esc_url_raw( $value ); $value = str_replace( 'http://', '', $value ); } + + if ( 'permalink_structure' === $option && '' !== $value && ! preg_match( '/%[^\/%]+%/', $value ) ) { + $error = sprintf( + /* translators: %s: Codex URL */ + __( 'A structure tag is required when using custom permalinks. Learn more' ), + __( 'https://codex.wordpress.org/Using_Permalinks#Choosing_your_permalink_structure' ) + ); + } break; case 'default_role' : @@ -3725,7 +4274,7 @@ function sanitize_option( $option, $value ) { } /** - * Filter an option value following sanitization. + * Filters an option value following sanitization. * * @since 2.3.0 * @since 4.3.0 Added the `$original_value` parameter. @@ -3737,11 +4286,39 @@ function sanitize_option( $option, $value ) { return apply_filters( "sanitize_option_{$option}", $value, $option, $original_value ); } +/** + * Maps a function to all non-iterable elements of an array or an object. + * + * This is similar to `array_walk_recursive()` but acts upon objects too. + * + * @since 4.4.0 + * + * @param mixed $value The array, object, or scalar. + * @param callable $callback The function to map onto $value. + * @return mixed The value with the callback applied to all non-arrays and non-objects inside it. + */ +function map_deep( $value, $callback ) { + if ( is_array( $value ) ) { + foreach ( $value as $index => $item ) { + $value[ $index ] = map_deep( $item, $callback ); + } + } elseif ( is_object( $value ) ) { + $object_vars = get_object_vars( $value ); + foreach ( $object_vars as $property_name => $property_value ) { + $value->$property_name = map_deep( $property_value, $callback ); + } + } else { + $value = call_user_func( $callback, $value ); + } + + return $value; +} + /** * Parses a string into variables to be stored in an array. * - * Uses {@link http://www.php.net/parse_str parse_str()} and stripslashes if - * {@link http://www.php.net/magic_quotes magic_quotes_gpc} is on. + * Uses {@link https://secure.php.net/parse_str parse_str()} and stripslashes if + * {@link https://secure.php.net/magic_quotes magic_quotes_gpc} is on. * * @since 2.2.1 * @@ -3753,7 +4330,7 @@ function wp_parse_str( $string, &$array ) { if ( get_magic_quotes_gpc() ) $array = stripslashes_deep( $array ); /** - * Filter the array of variables derived from a parsed string. + * Filters the array of variables derived from a parsed string. * * @since 2.3.0 * @@ -3794,7 +4371,7 @@ function wp_pre_kses_less_than_callback( $matches ) { * WordPress implementation of PHP sprintf() with filters. * * @since 2.5.0 - * @link http://www.php.net/sprintf + * @link https://secure.php.net/sprintf * * @param string $pattern The string which formatted args are inserted. * @param mixed $args ,... Arguments to be formatted into the $pattern string. @@ -3838,7 +4415,7 @@ function wp_sprintf( $pattern ) { } /** - * Filter a fragment from the pattern passed to wp_sprintf(). + * Filters a fragment from the pattern passed to wp_sprintf(). * * If the fragment is unchanged, then sprintf() will be run on the fragment. * @@ -3884,7 +4461,7 @@ function wp_sprintf_l( $pattern, $args ) { return ''; /** - * Filter the translated delimiters used by wp_sprintf_l(). + * Filters the translated delimiters used by wp_sprintf_l(). * Placeholders (%s) are included to assist translators and then * removed before the array of strings reaches the filter. * @@ -3985,7 +4562,7 @@ function _links_add_base( $m ) { return $m[1] . '=' . $m[2] . ( preg_match( '#^(\w{1,20}):#', $m[3], $protocol ) && in_array( $protocol[1], wp_allowed_protocols() ) ? $m[3] : - WP_HTTP::make_absolute_url( $m[3], $_links_add_base ) + WP_Http::make_absolute_url( $m[3], $_links_add_base ) ) . $m[2]; } @@ -4071,30 +4648,93 @@ function wp_strip_all_tags($string, $remove_breaks = false) { } /** - * Sanitize a string from user input or from the db + * Sanitizes a string from user input or from the database. * - * check for invalid UTF-8, - * Convert single < characters to entity, - * strip all tags, - * remove line breaks, tabs and extra white space, - * strip octets. + * - Checks for invalid UTF-8, + * - Converts single `<` characters to entities + * - Strips all tags + * - Removes line breaks, tabs, and extra whitespace + * - Strips octets * * @since 2.9.0 * - * @param string $str - * @return string + * @see sanitize_textarea_field() + * @see wp_check_invalid_utf8() + * @see wp_strip_all_tags() + * + * @param string $str String to sanitize. + * @return string Sanitized string. */ function sanitize_text_field( $str ) { + $filtered = _sanitize_text_fields( $str, false ); + + /** + * Filters a sanitized text field string. + * + * @since 2.9.0 + * + * @param string $filtered The sanitized string. + * @param string $str The string prior to being sanitized. + */ + return apply_filters( 'sanitize_text_field', $filtered, $str ); +} + +/** + * Sanitizes a multiline string from user input or from the database. + * + * The function is like sanitize_text_field(), but preserves + * new lines (\n) and other whitespace, which are legitimate + * input in textarea elements. + * + * @see sanitize_text_field() + * + * @since 4.7.0 + * + * @param string $str String to sanitize. + * @return string Sanitized string. + */ +function sanitize_textarea_field( $str ) { + $filtered = _sanitize_text_fields( $str, true ); + + /** + * Filters a sanitized textarea field string. + * + * @since 4.7.0 + * + * @param string $filtered The sanitized string. + * @param string $str The string prior to being sanitized. + */ + return apply_filters( 'sanitize_textarea_field', $filtered, $str ); +} + +/** + * Internal helper function to sanitize a string from user input or from the db + * + * @since 4.7.0 + * @access private + * + * @param string $str String to sanitize. + * @param bool $keep_newlines optional Whether to keep newlines. Default: false. + * @return string Sanitized string. + */ +function _sanitize_text_fields( $str, $keep_newlines = false ) { $filtered = wp_check_invalid_utf8( $str ); if ( strpos($filtered, '<') !== false ) { $filtered = wp_pre_kses_less_than( $filtered ); // This will strip extra whitespace for us. - $filtered = wp_strip_all_tags( $filtered, true ); - } else { - $filtered = trim( preg_replace('/[\r\n\t ]+/', ' ', $filtered) ); + $filtered = wp_strip_all_tags( $filtered, false ); + + // Use html entities in a special case to make sure no later + // newline stripping stage could lead to a functional tag + $filtered = str_replace("<\n", "<\n", $filtered); } + if ( ! $keep_newlines ) { + $filtered = preg_replace( '/[\r\n\t ]+/', ' ', $filtered ); + } + $filtered = trim( $filtered ); + $found = false; while ( preg_match('/%[a-f0-9]{2}/i', $filtered, $match) ) { $filtered = str_replace($match[0], '', $filtered); @@ -4106,15 +4746,7 @@ function sanitize_text_field( $str ) { $filtered = trim( preg_replace('/ +/', ' ', $filtered) ); } - /** - * Filter a sanitized text field string. - * - * @since 2.9.0 - * - * @param string $filtered The sanitized string. - * @param string $str The string prior to being sanitized. - */ - return apply_filters( 'sanitize_text_field', $filtered, $str ); + return $filtered; } /** @@ -4138,6 +4770,9 @@ function wp_basename( $path, $suffix = '' ) { * @since 3.0.0 * * @staticvar string|false $dblq + * + * @param string $text The text to be modified. + * @return string The modified text. */ function capital_P_dangit( $text ) { // Simple replacement for titles @@ -4166,7 +4801,7 @@ function capital_P_dangit( $text ) { function sanitize_mime_type( $mime_type ) { $sani_mime_type = preg_replace( '/[^-+*.a-zA-Z0-9\/]/', '', $mime_type ); /** - * Filter a mime type following sanitization. + * Filters a mime type following sanitization. * * @since 3.1.3 * @@ -4193,7 +4828,7 @@ function sanitize_trackback_urls( $to_ping ) { $urls_to_ping = array_map( 'esc_url_raw', $urls_to_ping ); $urls_to_ping = implode( "\n", $urls_to_ping ); /** - * Filter a list of trackback URLs following sanitization. + * Filters a list of trackback URLs following sanitization. * * The string returned here consists of a space or carriage return-delimited list * of trackback URLs. @@ -4286,7 +4921,7 @@ function wp_spaces_regexp() { if ( empty( $spaces ) ) { /** - * Filter the regexp for common whitespace characters. + * Filters the regexp for common whitespace characters. * * This string is substituted for the \s sequence as needed in regular * expressions. For websites not written in English, different characters @@ -4337,12 +4972,12 @@ img.emoji { } /** + * Print the inline Emoji detection script if it is not already printed. * - * @global string $wp_version + * @since 4.2.0 * @staticvar bool $printed */ function print_emoji_detection_script() { - global $wp_version; static $printed = false; if ( $printed ) { @@ -4351,27 +4986,56 @@ function print_emoji_detection_script() { $printed = true; + _print_emoji_detection_script(); +} + +/** + * Prints inline Emoji dection script + * + * @ignore + * @since 4.6.0 + * @access private + */ +function _print_emoji_detection_script() { $settings = array( /** - * Filter the URL where emoji images are hosted. + * Filters the URL where emoji png images are hosted. * * @since 4.2.0 * - * @param string The emoji base URL. + * @param string The emoji base URL for png images. */ - 'baseUrl' => apply_filters( 'emoji_url', set_url_scheme( '//s.w.org/images/core/emoji/72x72/' ) ), + 'baseUrl' => apply_filters( 'emoji_url', 'https://s.w.org/images/core/emoji/2.2.1/72x72/' ), /** - * Filter the extension of the emoji files. + * Filters the extension of the emoji png files. * * @since 4.2.0 * - * @param string The emoji extension. Default .png. + * @param string The emoji extension for png files. Default .png. */ 'ext' => apply_filters( 'emoji_ext', '.png' ), + + /** + * Filters the URL where emoji SVG images are hosted. + * + * @since 4.6.0 + * + * @param string The emoji base URL for svg images. + */ + 'svgUrl' => apply_filters( 'emoji_svg_url', 'https://s.w.org/images/core/emoji/2.2.1/svg/' ), + + /** + * Filters the extension of the emoji SVG files. + * + * @since 4.6.0 + * + * @param string The emoji extension for svg files. Default .svg. + */ + 'svgExt' => apply_filters( 'emoji_svg_ext', '.svg' ), ); - $version = 'ver=' . $wp_version; + $version = 'ver=' . get_bloginfo( 'version' ); if ( SCRIPT_DEBUG ) { $settings['source'] = array( @@ -4406,7 +5070,7 @@ function print_emoji_detection_script() { ?> $length ) { + $short_url = substr( $short_url, 0, $length - 3 ) . '…'; + } + return $short_url; +} + +/** + * Sanitizes a hex color. + * + * Returns either '', a 3 or 6 digit hex color (with #), or nothing. + * For sanitizing values without a #, see sanitize_hex_color_no_hash(). + * + * @since 3.4.0 + * + * @param string $color + * @return string|void + */ +function sanitize_hex_color( $color ) { + if ( '' === $color ) { + return ''; + } + + // 3 or 6 hex digits, or the empty string. + if ( preg_match('|^#([A-Fa-f0-9]{3}){1,2}$|', $color ) ) { + return $color; + } +} + +/** + * Sanitizes a hex color without a hash. Use sanitize_hex_color() when possible. + * + * Saving hex colors without a hash puts the burden of adding the hash on the + * UI, which makes it difficult to use or upgrade to other color types such as + * rgba, hsl, rgb, and html color names. + * + * Returns either '', a 3 or 6 digit hex color (without a #), or null. + * + * @since 3.4.0 + * + * @param string $color + * @return string|null + */ +function sanitize_hex_color_no_hash( $color ) { + $color = ltrim( $color, '#' ); + + if ( '' === $color ) { + return ''; + } + + return sanitize_hex_color( '#' . $color ) ? $color : null; +} + +/** + * Ensures that any hex color is properly hashed. + * Otherwise, returns value untouched. + * + * This method should only be necessary if using sanitize_hex_color_no_hash(). + * + * @since 3.4.0 + * + * @param string $color + * @return string + */ +function maybe_hash_hex_color( $color ) { + if ( $unhashed = sanitize_hex_color_no_hash( $color ) ) { + return '#' . $unhashed; + } + + return $color; +}