*
* Code within certain html blocks are skipped.
*
- * Do not use this function before the 'init' action hook; everything will break.
+ * Do not use this function before the {@see 'init'} action hook; everything will break.
*
* @since 0.71
*
// Set up static variables. Run once only.
if ( $reset || ! isset( $static_characters ) ) {
/**
- * Filter whether to skip running wptexturize().
+ * Filters whether to skip running wptexturize().
*
* Passing false to the filter will effectively short-circuit wptexturize().
* returning the original text passed to the function instead.
// Must do this every time in case plugins use these filters in a context sensitive manner
/**
- * Filter the list of HTML elements not to texturize.
+ * Filters the list of HTML elements not to texturize.
*
* @since 2.8.0
*
*/
$no_texturize_tags = apply_filters( 'no_texturize_tags', $default_no_texturize_tags );
/**
- * Filter the list of shortcodes not to texturize.
+ * Filters the list of shortcodes not to texturize.
*
* @since 2.8.0
*
// Look for shortcodes and HTML elements.
- preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20]++)@', $text, $matches );
+ preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20=]++)@', $text, $matches );
$tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] );
$found_shortcodes = ! empty( $tagnames );
$shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : '';
continue;
} else {
// This is an HTML element delimiter.
+
+ // Replace each & with & unless it already looks like an entity.
+ $curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $curl );
+
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
}
*/
function _wptexturize_pushpop_element( $text, &$stack, $disabled_elements ) {
// Is it an opening tag or closing tag?
- if ( '/' !== $text[1] ) {
+ if ( isset( $text[1] ) && '/' !== $text[1] ) {
$opening_tag = true;
$name_offset = 1;
} elseif ( 0 == count( $stack ) ) {
$allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)';
- // Add a single line break above block-level opening tags.
- $pee = preg_replace('!(<' . $allblocks . '[\s/>])!', "\n$1", $pee);
+ // Add a double line break above block-level opening tags.
+ $pee = preg_replace('!(<' . $allblocks . '[\s/>])!', "\n\n$1", $pee);
// Add a double line break below block-level closing tags.
$pee = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", $pee);
$string = @htmlspecialchars( $string, $quote_style, $charset, $double_encode );
- // Backwards compatibility
+ // Back-compat.
if ( 'single' === $_quote_style )
$string = str_replace( "'", ''', $string );
*
* If there are no accent characters, then the string given is just returned.
*
+ * **Accent characters converted:**
+ *
+ * Currency signs:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | ------------------- |
+ * | U+00A3 | £ | (empty) | British Pound sign |
+ * | U+20AC | € | E | Euro sign |
+ *
+ * Decompositions for Latin-1 Supplement:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | ------- | ----- | ----------- | -------------------------------------- |
+ * | U+00AA | ª | a | Feminine ordinal indicator |
+ * | U+00BA | º | o | Masculine ordinal indicator |
+ * | U+00C0 | À | A | Latin capital letter A with grave |
+ * | U+00C1 | Á | A | Latin capital letter A with acute |
+ * | U+00C2 | Â | A | Latin capital letter A with circumflex |
+ * | U+00C3 | Ã | A | Latin capital letter A with tilde |
+ * | U+00C4 | Ä | A | Latin capital letter A with diaeresis |
+ * | U+00C5 | Å | A | Latin capital letter A with ring above |
+ * | U+00C6 | Æ | AE | Latin capital letter AE |
+ * | U+00C7 | Ç | C | Latin capital letter C with cedilla |
+ * | U+00C8 | È | E | Latin capital letter E with grave |
+ * | U+00C9 | É | E | Latin capital letter E with acute |
+ * | U+00CA | Ê | E | Latin capital letter E with circumflex |
+ * | U+00CB | Ë | E | Latin capital letter E with diaeresis |
+ * | U+00CC | Ì | I | Latin capital letter I with grave |
+ * | U+00CD | Í | I | Latin capital letter I with acute |
+ * | U+00CE | Î | I | Latin capital letter I with circumflex |
+ * | U+00CF | Ï | I | Latin capital letter I with diaeresis |
+ * | U+00D0 | Ð | D | Latin capital letter Eth |
+ * | U+00D1 | Ñ | N | Latin capital letter N with tilde |
+ * | U+00D2 | Ò | O | Latin capital letter O with grave |
+ * | U+00D3 | Ó | O | Latin capital letter O with acute |
+ * | U+00D4 | Ô | O | Latin capital letter O with circumflex |
+ * | U+00D5 | Õ | O | Latin capital letter O with tilde |
+ * | U+00D6 | Ö | O | Latin capital letter O with diaeresis |
+ * | U+00D8 | Ø | O | Latin capital letter O with stroke |
+ * | U+00D9 | Ù | U | Latin capital letter U with grave |
+ * | U+00DA | Ú | U | Latin capital letter U with acute |
+ * | U+00DB | Û | U | Latin capital letter U with circumflex |
+ * | U+00DC | Ü | U | Latin capital letter U with diaeresis |
+ * | U+00DD | Ý | Y | Latin capital letter Y with acute |
+ * | U+00DE | Þ | TH | Latin capital letter Thorn |
+ * | U+00DF | ß | s | Latin small letter sharp s |
+ * | U+00E0 | à | a | Latin small letter a with grave |
+ * | U+00E1 | á | a | Latin small letter a with acute |
+ * | U+00E2 | â | a | Latin small letter a with circumflex |
+ * | U+00E3 | ã | a | Latin small letter a with tilde |
+ * | U+00E4 | ä | a | Latin small letter a with diaeresis |
+ * | U+00E5 | å | a | Latin small letter a with ring above |
+ * | U+00E6 | æ | ae | Latin small letter ae |
+ * | U+00E7 | ç | c | Latin small letter c with cedilla |
+ * | U+00E8 | è | e | Latin small letter e with grave |
+ * | U+00E9 | é | e | Latin small letter e with acute |
+ * | U+00EA | ê | e | Latin small letter e with circumflex |
+ * | U+00EB | ë | e | Latin small letter e with diaeresis |
+ * | U+00EC | ì | i | Latin small letter i with grave |
+ * | U+00ED | í | i | Latin small letter i with acute |
+ * | U+00EE | î | i | Latin small letter i with circumflex |
+ * | U+00EF | ï | i | Latin small letter i with diaeresis |
+ * | U+00F0 | ð | d | Latin small letter Eth |
+ * | U+00F1 | ñ | n | Latin small letter n with tilde |
+ * | U+00F2 | ò | o | Latin small letter o with grave |
+ * | U+00F3 | ó | o | Latin small letter o with acute |
+ * | U+00F4 | ô | o | Latin small letter o with circumflex |
+ * | U+00F5 | õ | o | Latin small letter o with tilde |
+ * | U+00F6 | ö | o | Latin small letter o with diaeresis |
+ * | U+00F8 | ø | o | Latin small letter o with stroke |
+ * | U+00F9 | ù | u | Latin small letter u with grave |
+ * | U+00FA | ú | u | Latin small letter u with acute |
+ * | U+00FB | û | u | Latin small letter u with circumflex |
+ * | U+00FC | ü | u | Latin small letter u with diaeresis |
+ * | U+00FD | ý | y | Latin small letter y with acute |
+ * | U+00FE | þ | th | Latin small letter Thorn |
+ * | U+00FF | ÿ | y | Latin small letter y with diaeresis |
+ *
+ * Decompositions for Latin Extended-A:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | ------- | ----- | ----------- | ------------------------------------------------- |
+ * | U+0100 | Ā | A | Latin capital letter A with macron |
+ * | U+0101 | ā | a | Latin small letter a with macron |
+ * | U+0102 | Ă | A | Latin capital letter A with breve |
+ * | U+0103 | ă | a | Latin small letter a with breve |
+ * | U+0104 | Ą | A | Latin capital letter A with ogonek |
+ * | U+0105 | ą | a | Latin small letter a with ogonek |
+ * | U+01006 | Ć | C | Latin capital letter C with acute |
+ * | U+0107 | ć | c | Latin small letter c with acute |
+ * | U+0108 | Ĉ | C | Latin capital letter C with circumflex |
+ * | U+0109 | ĉ | c | Latin small letter c with circumflex |
+ * | U+010A | Ċ | C | Latin capital letter C with dot above |
+ * | U+010B | ċ | c | Latin small letter c with dot above |
+ * | U+010C | Č | C | Latin capital letter C with caron |
+ * | U+010D | č | c | Latin small letter c with caron |
+ * | U+010E | Ď | D | Latin capital letter D with caron |
+ * | U+010F | ď | d | Latin small letter d with caron |
+ * | U+0110 | Đ | D | Latin capital letter D with stroke |
+ * | U+0111 | đ | d | Latin small letter d with stroke |
+ * | U+0112 | Ē | E | Latin capital letter E with macron |
+ * | U+0113 | ē | e | Latin small letter e with macron |
+ * | U+0114 | Ĕ | E | Latin capital letter E with breve |
+ * | U+0115 | ĕ | e | Latin small letter e with breve |
+ * | U+0116 | Ė | E | Latin capital letter E with dot above |
+ * | U+0117 | ė | e | Latin small letter e with dot above |
+ * | U+0118 | Ę | E | Latin capital letter E with ogonek |
+ * | U+0119 | ę | e | Latin small letter e with ogonek |
+ * | U+011A | Ě | E | Latin capital letter E with caron |
+ * | U+011B | ě | e | Latin small letter e with caron |
+ * | U+011C | Ĝ | G | Latin capital letter G with circumflex |
+ * | U+011D | ĝ | g | Latin small letter g with circumflex |
+ * | U+011E | Ğ | G | Latin capital letter G with breve |
+ * | U+011F | ğ | g | Latin small letter g with breve |
+ * | U+0120 | Ġ | G | Latin capital letter G with dot above |
+ * | U+0121 | ġ | g | Latin small letter g with dot above |
+ * | U+0122 | Ģ | G | Latin capital letter G with cedilla |
+ * | U+0123 | ģ | g | Latin small letter g with cedilla |
+ * | U+0124 | Ĥ | H | Latin capital letter H with circumflex |
+ * | U+0125 | ĥ | h | Latin small letter h with circumflex |
+ * | U+0126 | Ħ | H | Latin capital letter H with stroke |
+ * | U+0127 | ħ | h | Latin small letter h with stroke |
+ * | U+0128 | Ĩ | I | Latin capital letter I with tilde |
+ * | U+0129 | ĩ | i | Latin small letter i with tilde |
+ * | U+012A | Ī | I | Latin capital letter I with macron |
+ * | U+012B | ī | i | Latin small letter i with macron |
+ * | U+012C | Ĭ | I | Latin capital letter I with breve |
+ * | U+012D | ĭ | i | Latin small letter i with breve |
+ * | U+012E | Į | I | Latin capital letter I with ogonek |
+ * | U+012F | į | i | Latin small letter i with ogonek |
+ * | U+0130 | İ | I | Latin capital letter I with dot above |
+ * | U+0131 | ı | i | Latin small letter dotless i |
+ * | U+0132 | IJ | IJ | Latin capital ligature IJ |
+ * | U+0133 | ij | ij | Latin small ligature ij |
+ * | U+0134 | Ĵ | J | Latin capital letter J with circumflex |
+ * | U+0135 | ĵ | j | Latin small letter j with circumflex |
+ * | U+0136 | Ķ | K | Latin capital letter K with cedilla |
+ * | U+0137 | ķ | k | Latin small letter k with cedilla |
+ * | U+0138 | ĸ | k | Latin small letter Kra |
+ * | U+0139 | Ĺ | L | Latin capital letter L with acute |
+ * | U+013A | ĺ | l | Latin small letter l with acute |
+ * | U+013B | Ļ | L | Latin capital letter L with cedilla |
+ * | U+013C | ļ | l | Latin small letter l with cedilla |
+ * | U+013D | Ľ | L | Latin capital letter L with caron |
+ * | U+013E | ľ | l | Latin small letter l with caron |
+ * | U+013F | Ŀ | L | Latin capital letter L with middle dot |
+ * | U+0140 | ŀ | l | Latin small letter l with middle dot |
+ * | U+0141 | Ł | L | Latin capital letter L with stroke |
+ * | U+0142 | ł | l | Latin small letter l with stroke |
+ * | U+0143 | Ń | N | Latin capital letter N with acute |
+ * | U+0144 | ń | n | Latin small letter N with acute |
+ * | U+0145 | Ņ | N | Latin capital letter N with cedilla |
+ * | U+0146 | ņ | n | Latin small letter n with cedilla |
+ * | U+0147 | Ň | N | Latin capital letter N with caron |
+ * | U+0148 | ň | n | Latin small letter n with caron |
+ * | U+0149 | ʼn | n | Latin small letter n preceded by apostrophe |
+ * | U+014A | Ŋ | N | Latin capital letter Eng |
+ * | U+014B | ŋ | n | Latin small letter Eng |
+ * | U+014C | Ō | O | Latin capital letter O with macron |
+ * | U+014D | ō | o | Latin small letter o with macron |
+ * | U+014E | Ŏ | O | Latin capital letter O with breve |
+ * | U+014F | ŏ | o | Latin small letter o with breve |
+ * | U+0150 | Ő | O | Latin capital letter O with double acute |
+ * | U+0151 | ő | o | Latin small letter o with double acute |
+ * | U+0152 | Œ | OE | Latin capital ligature OE |
+ * | U+0153 | œ | oe | Latin small ligature oe |
+ * | U+0154 | Ŕ | R | Latin capital letter R with acute |
+ * | U+0155 | ŕ | r | Latin small letter r with acute |
+ * | U+0156 | Ŗ | R | Latin capital letter R with cedilla |
+ * | U+0157 | ŗ | r | Latin small letter r with cedilla |
+ * | U+0158 | Ř | R | Latin capital letter R with caron |
+ * | U+0159 | ř | r | Latin small letter r with caron |
+ * | U+015A | Ś | S | Latin capital letter S with acute |
+ * | U+015B | ś | s | Latin small letter s with acute |
+ * | U+015C | Ŝ | S | Latin capital letter S with circumflex |
+ * | U+015D | ŝ | s | Latin small letter s with circumflex |
+ * | U+015E | Ş | S | Latin capital letter S with cedilla |
+ * | U+015F | ş | s | Latin small letter s with cedilla |
+ * | U+0160 | Š | S | Latin capital letter S with caron |
+ * | U+0161 | š | s | Latin small letter s with caron |
+ * | U+0162 | Ţ | T | Latin capital letter T with cedilla |
+ * | U+0163 | ţ | t | Latin small letter t with cedilla |
+ * | U+0164 | Ť | T | Latin capital letter T with caron |
+ * | U+0165 | ť | t | Latin small letter t with caron |
+ * | U+0166 | Ŧ | T | Latin capital letter T with stroke |
+ * | U+0167 | ŧ | t | Latin small letter t with stroke |
+ * | U+0168 | Ũ | U | Latin capital letter U with tilde |
+ * | U+0169 | ũ | u | Latin small letter u with tilde |
+ * | U+016A | Ū | U | Latin capital letter U with macron |
+ * | U+016B | ū | u | Latin small letter u with macron |
+ * | U+016C | Ŭ | U | Latin capital letter U with breve |
+ * | U+016D | ŭ | u | Latin small letter u with breve |
+ * | U+016E | Ů | U | Latin capital letter U with ring above |
+ * | U+016F | ů | u | Latin small letter u with ring above |
+ * | U+0170 | Ű | U | Latin capital letter U with double acute |
+ * | U+0171 | ű | u | Latin small letter u with double acute |
+ * | U+0172 | Ų | U | Latin capital letter U with ogonek |
+ * | U+0173 | ų | u | Latin small letter u with ogonek |
+ * | U+0174 | Ŵ | W | Latin capital letter W with circumflex |
+ * | U+0175 | ŵ | w | Latin small letter w with circumflex |
+ * | U+0176 | Ŷ | Y | Latin capital letter Y with circumflex |
+ * | U+0177 | ŷ | y | Latin small letter y with circumflex |
+ * | U+0178 | Ÿ | Y | Latin capital letter Y with diaeresis |
+ * | U+0179 | Ź | Z | Latin capital letter Z with acute |
+ * | U+017A | ź | z | Latin small letter z with acute |
+ * | U+017B | Ż | Z | Latin capital letter Z with dot above |
+ * | U+017C | ż | z | Latin small letter z with dot above |
+ * | U+017D | Ž | Z | Latin capital letter Z with caron |
+ * | U+017E | ž | z | Latin small letter z with caron |
+ * | U+017F | ſ | s | Latin small letter long s |
+ * | U+01A0 | Ơ | O | Latin capital letter O with horn |
+ * | U+01A1 | ơ | o | Latin small letter o with horn |
+ * | U+01AF | Ư | U | Latin capital letter U with horn |
+ * | U+01B0 | ư | u | Latin small letter u with horn |
+ * | U+01CD | Ǎ | A | Latin capital letter A with caron |
+ * | U+01CE | ǎ | a | Latin small letter a with caron |
+ * | U+01CF | Ǐ | I | Latin capital letter I with caron |
+ * | U+01D0 | ǐ | i | Latin small letter i with caron |
+ * | U+01D1 | Ǒ | O | Latin capital letter O with caron |
+ * | U+01D2 | ǒ | o | Latin small letter o with caron |
+ * | U+01D3 | Ǔ | U | Latin capital letter U with caron |
+ * | U+01D4 | ǔ | u | Latin small letter u with caron |
+ * | U+01D5 | Ǖ | U | Latin capital letter U with diaeresis and macron |
+ * | U+01D6 | ǖ | u | Latin small letter u with diaeresis and macron |
+ * | U+01D7 | Ǘ | U | Latin capital letter U with diaeresis and acute |
+ * | U+01D8 | ǘ | u | Latin small letter u with diaeresis and acute |
+ * | U+01D9 | Ǚ | U | Latin capital letter U with diaeresis and caron |
+ * | U+01DA | ǚ | u | Latin small letter u with diaeresis and caron |
+ * | U+01DB | Ǜ | U | Latin capital letter U with diaeresis and grave |
+ * | U+01DC | ǜ | u | Latin small letter u with diaeresis and grave |
+ *
+ * Decompositions for Latin Extended-B:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | ----------------------------------------- |
+ * | U+0218 | Ș | S | Latin capital letter S with comma below |
+ * | U+0219 | ș | s | Latin small letter s with comma below |
+ * | U+021A | Ț | T | Latin capital letter T with comma below |
+ * | U+021B | ț | t | Latin small letter t with comma below |
+ *
+ * Vowels with diacritic (Chinese, Hanyu Pinyin):
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | ----------------------------------------------------- |
+ * | U+0251 | ɑ | a | Latin small letter alpha |
+ * | U+1EA0 | Ạ | A | Latin capital letter A with dot below |
+ * | U+1EA1 | ạ | a | Latin small letter a with dot below |
+ * | U+1EA2 | Ả | A | Latin capital letter A with hook above |
+ * | U+1EA3 | ả | a | Latin small letter a with hook above |
+ * | U+1EA4 | Ấ | A | Latin capital letter A with circumflex and acute |
+ * | U+1EA5 | ấ | a | Latin small letter a with circumflex and acute |
+ * | U+1EA6 | Ầ | A | Latin capital letter A with circumflex and grave |
+ * | U+1EA7 | ầ | a | Latin small letter a with circumflex and grave |
+ * | U+1EA8 | Ẩ | A | Latin capital letter A with circumflex and hook above |
+ * | U+1EA9 | ẩ | a | Latin small letter a with circumflex and hook above |
+ * | U+1EAA | Ẫ | A | Latin capital letter A with circumflex and tilde |
+ * | U+1EAB | ẫ | a | Latin small letter a with circumflex and tilde |
+ * | U+1EA6 | Ậ | A | Latin capital letter A with circumflex and dot below |
+ * | U+1EAD | ậ | a | Latin small letter a with circumflex and dot below |
+ * | U+1EAE | Ắ | A | Latin capital letter A with breve and acute |
+ * | U+1EAF | ắ | a | Latin small letter a with breve and acute |
+ * | U+1EB0 | Ằ | A | Latin capital letter A with breve and grave |
+ * | U+1EB1 | ằ | a | Latin small letter a with breve and grave |
+ * | U+1EB2 | Ẳ | A | Latin capital letter A with breve and hook above |
+ * | U+1EB3 | ẳ | a | Latin small letter a with breve and hook above |
+ * | U+1EB4 | Ẵ | A | Latin capital letter A with breve and tilde |
+ * | U+1EB5 | ẵ | a | Latin small letter a with breve and tilde |
+ * | U+1EB6 | Ặ | A | Latin capital letter A with breve and dot below |
+ * | U+1EB7 | ặ | a | Latin small letter a with breve and dot below |
+ * | U+1EB8 | Ẹ | E | Latin capital letter E with dot below |
+ * | U+1EB9 | ẹ | e | Latin small letter e with dot below |
+ * | U+1EBA | Ẻ | E | Latin capital letter E with hook above |
+ * | U+1EBB | ẻ | e | Latin small letter e with hook above |
+ * | U+1EBC | Ẽ | E | Latin capital letter E with tilde |
+ * | U+1EBD | ẽ | e | Latin small letter e with tilde |
+ * | U+1EBE | Ế | E | Latin capital letter E with circumflex and acute |
+ * | U+1EBF | ế | e | Latin small letter e with circumflex and acute |
+ * | U+1EC0 | Ề | E | Latin capital letter E with circumflex and grave |
+ * | U+1EC1 | ề | e | Latin small letter e with circumflex and grave |
+ * | U+1EC2 | Ể | E | Latin capital letter E with circumflex and hook above |
+ * | U+1EC3 | ể | e | Latin small letter e with circumflex and hook above |
+ * | U+1EC4 | Ễ | E | Latin capital letter E with circumflex and tilde |
+ * | U+1EC5 | ễ | e | Latin small letter e with circumflex and tilde |
+ * | U+1EC6 | Ệ | E | Latin capital letter E with circumflex and dot below |
+ * | U+1EC7 | ệ | e | Latin small letter e with circumflex and dot below |
+ * | U+1EC8 | Ỉ | I | Latin capital letter I with hook above |
+ * | U+1EC9 | ỉ | i | Latin small letter i with hook above |
+ * | U+1ECA | Ị | I | Latin capital letter I with dot below |
+ * | U+1ECB | ị | i | Latin small letter i with dot below |
+ * | U+1ECC | Ọ | O | Latin capital letter O with dot below |
+ * | U+1ECD | ọ | o | Latin small letter o with dot below |
+ * | U+1ECE | Ỏ | O | Latin capital letter O with hook above |
+ * | U+1ECF | ỏ | o | Latin small letter o with hook above |
+ * | U+1ED0 | Ố | O | Latin capital letter O with circumflex and acute |
+ * | U+1ED1 | ố | o | Latin small letter o with circumflex and acute |
+ * | U+1ED2 | Ồ | O | Latin capital letter O with circumflex and grave |
+ * | U+1ED3 | ồ | o | Latin small letter o with circumflex and grave |
+ * | U+1ED4 | Ổ | O | Latin capital letter O with circumflex and hook above |
+ * | U+1ED5 | ổ | o | Latin small letter o with circumflex and hook above |
+ * | U+1ED6 | Ỗ | O | Latin capital letter O with circumflex and tilde |
+ * | U+1ED7 | ỗ | o | Latin small letter o with circumflex and tilde |
+ * | U+1ED8 | Ộ | O | Latin capital letter O with circumflex and dot below |
+ * | U+1ED9 | ộ | o | Latin small letter o with circumflex and dot below |
+ * | U+1EDA | Ớ | O | Latin capital letter O with horn and acute |
+ * | U+1EDB | ớ | o | Latin small letter o with horn and acute |
+ * | U+1EDC | Ờ | O | Latin capital letter O with horn and grave |
+ * | U+1EDD | ờ | o | Latin small letter o with horn and grave |
+ * | U+1EDE | Ở | O | Latin capital letter O with horn and hook above |
+ * | U+1EDF | ở | o | Latin small letter o with horn and hook above |
+ * | U+1EE0 | Ỡ | O | Latin capital letter O with horn and tilde |
+ * | U+1EE1 | ỡ | o | Latin small letter o with horn and tilde |
+ * | U+1EE2 | Ợ | O | Latin capital letter O with horn and dot below |
+ * | U+1EE3 | ợ | o | Latin small letter o with horn and dot below |
+ * | U+1EE4 | Ụ | U | Latin capital letter U with dot below |
+ * | U+1EE5 | ụ | u | Latin small letter u with dot below |
+ * | U+1EE6 | Ủ | U | Latin capital letter U with hook above |
+ * | U+1EE7 | ủ | u | Latin small letter u with hook above |
+ * | U+1EE8 | Ứ | U | Latin capital letter U with horn and acute |
+ * | U+1EE9 | ứ | u | Latin small letter u with horn and acute |
+ * | U+1EEA | Ừ | U | Latin capital letter U with horn and grave |
+ * | U+1EEB | ừ | u | Latin small letter u with horn and grave |
+ * | U+1EEC | Ử | U | Latin capital letter U with horn and hook above |
+ * | U+1EED | ử | u | Latin small letter u with horn and hook above |
+ * | U+1EEE | Ữ | U | Latin capital letter U with horn and tilde |
+ * | U+1EEF | ữ | u | Latin small letter u with horn and tilde |
+ * | U+1EF0 | Ự | U | Latin capital letter U with horn and dot below |
+ * | U+1EF1 | ự | u | Latin small letter u with horn and dot below |
+ * | U+1EF2 | Ỳ | Y | Latin capital letter Y with grave |
+ * | U+1EF3 | ỳ | y | Latin small letter y with grave |
+ * | U+1EF4 | Ỵ | Y | Latin capital letter Y with dot below |
+ * | U+1EF5 | ỵ | y | Latin small letter y with dot below |
+ * | U+1EF6 | Ỷ | Y | Latin capital letter Y with hook above |
+ * | U+1EF7 | ỷ | y | Latin small letter y with hook above |
+ * | U+1EF8 | Ỹ | Y | Latin capital letter Y with tilde |
+ * | U+1EF9 | ỹ | y | Latin small letter y with tilde |
+ *
+ * German (`de_DE`), German formal (`de_DE_formal`), German (Switzerland) formal (`de_CH`),
+ * and German (Switzerland) informal (`de_CH_informal`) locales:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | --------------------------------------- |
+ * | U+00C4 | Ä | Ae | Latin capital letter A with diaeresis |
+ * | U+00E4 | ä | ae | Latin small letter a with diaeresis |
+ * | U+00D6 | Ö | Oe | Latin capital letter O with diaeresis |
+ * | U+00F6 | ö | oe | Latin small letter o with diaeresis |
+ * | U+00DC | Ü | Ue | Latin capital letter U with diaeresis |
+ * | U+00FC | ü | ue | Latin small letter u with diaeresis |
+ * | U+00DF | ß | ss | Latin small letter sharp s |
+ *
+ * Danish (`da_DK`) locale:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | --------------------------------------- |
+ * | U+00C6 | Æ | Ae | Latin capital letter AE |
+ * | U+00E6 | æ | ae | Latin small letter ae |
+ * | U+00D8 | Ø | Oe | Latin capital letter O with stroke |
+ * | U+00F8 | ø | oe | Latin small letter o with stroke |
+ * | U+00C5 | Å | Aa | Latin capital letter A with ring above |
+ * | U+00E5 | å | aa | Latin small letter a with ring above |
+ *
+ * Catalan (`ca`) locale:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | --------------------------------------- |
+ * | U+00B7 | l·l | ll | Flown dot (between two Ls) |
+ *
+ * Serbian (`sr_RS`) locale:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | --------------------------------------- |
+ * | U+0110 | Đ | DJ | Latin capital letter D with stroke |
+ * | U+0111 | đ | dj | Latin small letter d with stroke |
+ *
* @since 1.2.1
+ * @since 4.6.0 Added locale support for `de_CH`, `de_CH_informal`, and `ca`.
+ * @since 4.7.0 Added locale support for `sr_RS`.
*
* @param string $string Text that might have accent characters
* @return string Filtered string with replaced "nice" characters.
if (seems_utf8($string)) {
$chars = array(
// Decompositions for Latin-1 Supplement
- chr(194).chr(170) => 'a', chr(194).chr(186) => 'o',
- chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
- chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
- chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
- chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C',
- chr(195).chr(136) => 'E', chr(195).chr(137) => 'E',
- chr(195).chr(138) => 'E', chr(195).chr(139) => 'E',
- chr(195).chr(140) => 'I', chr(195).chr(141) => 'I',
- chr(195).chr(142) => 'I', chr(195).chr(143) => 'I',
- chr(195).chr(144) => 'D', chr(195).chr(145) => 'N',
- chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
- chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
- chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
- chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
- chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
- chr(195).chr(158) => 'TH',chr(195).chr(159) => 's',
- chr(195).chr(160) => 'a', chr(195).chr(161) => 'a',
- chr(195).chr(162) => 'a', chr(195).chr(163) => 'a',
- chr(195).chr(164) => 'a', chr(195).chr(165) => 'a',
- chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c',
- chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
- chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
- chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
- chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
- chr(195).chr(176) => 'd', chr(195).chr(177) => 'n',
- chr(195).chr(178) => 'o', chr(195).chr(179) => 'o',
- chr(195).chr(180) => 'o', chr(195).chr(181) => 'o',
- chr(195).chr(182) => 'o', chr(195).chr(184) => 'o',
- chr(195).chr(185) => 'u', chr(195).chr(186) => 'u',
- chr(195).chr(187) => 'u', chr(195).chr(188) => 'u',
- chr(195).chr(189) => 'y', chr(195).chr(190) => 'th',
- chr(195).chr(191) => 'y', chr(195).chr(152) => 'O',
+ 'ª' => 'a', 'º' => 'o',
+ 'À' => 'A', 'Á' => 'A',
+ 'Â' => 'A', 'Ã' => 'A',
+ 'Ä' => 'A', 'Å' => 'A',
+ 'Æ' => 'AE','Ç' => 'C',
+ 'È' => 'E', 'É' => 'E',
+ 'Ê' => 'E', 'Ë' => 'E',
+ 'Ì' => 'I', 'Í' => 'I',
+ 'Î' => 'I', 'Ï' => 'I',
+ 'Ð' => 'D', 'Ñ' => 'N',
+ 'Ò' => 'O', 'Ó' => 'O',
+ 'Ô' => 'O', 'Õ' => 'O',
+ 'Ö' => 'O', 'Ù' => 'U',
+ 'Ú' => 'U', 'Û' => 'U',
+ 'Ü' => 'U', 'Ý' => 'Y',
+ 'Þ' => 'TH','ß' => 's',
+ 'à' => 'a', 'á' => 'a',
+ 'â' => 'a', 'ã' => 'a',
+ 'ä' => 'a', 'å' => 'a',
+ 'æ' => 'ae','ç' => 'c',
+ 'è' => 'e', 'é' => 'e',
+ 'ê' => 'e', 'ë' => 'e',
+ 'ì' => 'i', 'í' => 'i',
+ 'î' => 'i', 'ï' => 'i',
+ 'ð' => 'd', 'ñ' => 'n',
+ 'ò' => 'o', 'ó' => 'o',
+ 'ô' => 'o', 'õ' => 'o',
+ 'ö' => 'o', 'ø' => 'o',
+ 'ù' => 'u', 'ú' => 'u',
+ 'û' => 'u', 'ü' => 'u',
+ 'ý' => 'y', 'þ' => 'th',
+ 'ÿ' => 'y', 'Ø' => 'O',
// Decompositions for Latin Extended-A
- chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
- chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
- chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
- chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
- chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
- chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
- chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
- chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
- chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
- chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
- chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
- chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
- chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
- chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
- chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
- chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
- chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
- chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
- chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
- chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
- chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
- chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
- chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
- chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
- chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
- chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
- chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
- chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
- chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
- chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
- chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
- chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
- chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
- chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
- chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
- chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
- chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
- chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
- chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
- chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
- chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
- chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
- chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
- chr(197).chr(150) => 'R',chr(197).chr(151) => 'r',
- chr(197).chr(152) => 'R',chr(197).chr(153) => 'r',
- chr(197).chr(154) => 'S',chr(197).chr(155) => 's',
- chr(197).chr(156) => 'S',chr(197).chr(157) => 's',
- chr(197).chr(158) => 'S',chr(197).chr(159) => 's',
- chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
- chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
- chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
- chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
- chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
- chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
- chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
- chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
- chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
- chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
- chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
- chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
- chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
- chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
- chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
- chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
+ 'Ā' => 'A', 'ā' => 'a',
+ 'Ă' => 'A', 'ă' => 'a',
+ 'Ą' => 'A', 'ą' => 'a',
+ 'Ć' => 'C', 'ć' => 'c',
+ 'Ĉ' => 'C', 'ĉ' => 'c',
+ 'Ċ' => 'C', 'ċ' => 'c',
+ 'Č' => 'C', 'č' => 'c',
+ 'Ď' => 'D', 'ď' => 'd',
+ 'Đ' => 'D', 'đ' => 'd',
+ 'Ē' => 'E', 'ē' => 'e',
+ 'Ĕ' => 'E', 'ĕ' => 'e',
+ 'Ė' => 'E', 'ė' => 'e',
+ 'Ę' => 'E', 'ę' => 'e',
+ 'Ě' => 'E', 'ě' => 'e',
+ 'Ĝ' => 'G', 'ĝ' => 'g',
+ 'Ğ' => 'G', 'ğ' => 'g',
+ 'Ġ' => 'G', 'ġ' => 'g',
+ 'Ģ' => 'G', 'ģ' => 'g',
+ 'Ĥ' => 'H', 'ĥ' => 'h',
+ 'Ħ' => 'H', 'ħ' => 'h',
+ 'Ĩ' => 'I', 'ĩ' => 'i',
+ 'Ī' => 'I', 'ī' => 'i',
+ 'Ĭ' => 'I', 'ĭ' => 'i',
+ 'Į' => 'I', 'į' => 'i',
+ 'İ' => 'I', 'ı' => 'i',
+ 'IJ' => 'IJ','ij' => 'ij',
+ 'Ĵ' => 'J', 'ĵ' => 'j',
+ 'Ķ' => 'K', 'ķ' => 'k',
+ 'ĸ' => 'k', 'Ĺ' => 'L',
+ 'ĺ' => 'l', 'Ļ' => 'L',
+ 'ļ' => 'l', 'Ľ' => 'L',
+ 'ľ' => 'l', 'Ŀ' => 'L',
+ 'ŀ' => 'l', 'Ł' => 'L',
+ 'ł' => 'l', 'Ń' => 'N',
+ 'ń' => 'n', 'Ņ' => 'N',
+ 'ņ' => 'n', 'Ň' => 'N',
+ 'ň' => 'n', 'ʼn' => 'n',
+ 'Ŋ' => 'N', 'ŋ' => 'n',
+ 'Ō' => 'O', 'ō' => 'o',
+ 'Ŏ' => 'O', 'ŏ' => 'o',
+ 'Ő' => 'O', 'ő' => 'o',
+ 'Œ' => 'OE','œ' => 'oe',
+ 'Ŕ' => 'R','ŕ' => 'r',
+ 'Ŗ' => 'R','ŗ' => 'r',
+ 'Ř' => 'R','ř' => 'r',
+ 'Ś' => 'S','ś' => 's',
+ 'Ŝ' => 'S','ŝ' => 's',
+ 'Ş' => 'S','ş' => 's',
+ 'Š' => 'S', 'š' => 's',
+ 'Ţ' => 'T', 'ţ' => 't',
+ 'Ť' => 'T', 'ť' => 't',
+ 'Ŧ' => 'T', 'ŧ' => 't',
+ 'Ũ' => 'U', 'ũ' => 'u',
+ 'Ū' => 'U', 'ū' => 'u',
+ 'Ŭ' => 'U', 'ŭ' => 'u',
+ 'Ů' => 'U', 'ů' => 'u',
+ 'Ű' => 'U', 'ű' => 'u',
+ 'Ų' => 'U', 'ų' => 'u',
+ 'Ŵ' => 'W', 'ŵ' => 'w',
+ 'Ŷ' => 'Y', 'ŷ' => 'y',
+ 'Ÿ' => 'Y', 'Ź' => 'Z',
+ 'ź' => 'z', 'Ż' => 'Z',
+ 'ż' => 'z', 'Ž' => 'Z',
+ 'ž' => 'z', 'ſ' => 's',
// Decompositions for Latin Extended-B
- chr(200).chr(152) => 'S', chr(200).chr(153) => 's',
- chr(200).chr(154) => 'T', chr(200).chr(155) => 't',
+ 'Ș' => 'S', 'ș' => 's',
+ 'Ț' => 'T', 'ț' => 't',
// Euro Sign
- chr(226).chr(130).chr(172) => 'E',
+ '€' => 'E',
// GBP (Pound) Sign
- chr(194).chr(163) => '',
+ '£' => '',
// Vowels with diacritic (Vietnamese)
// unmarked
- chr(198).chr(160) => 'O', chr(198).chr(161) => 'o',
- chr(198).chr(175) => 'U', chr(198).chr(176) => 'u',
+ 'Ơ' => 'O', 'ơ' => 'o',
+ 'Ư' => 'U', 'ư' => 'u',
// grave accent
- chr(225).chr(186).chr(166) => 'A', chr(225).chr(186).chr(167) => 'a',
- chr(225).chr(186).chr(176) => 'A', chr(225).chr(186).chr(177) => 'a',
- chr(225).chr(187).chr(128) => 'E', chr(225).chr(187).chr(129) => 'e',
- chr(225).chr(187).chr(146) => 'O', chr(225).chr(187).chr(147) => 'o',
- chr(225).chr(187).chr(156) => 'O', chr(225).chr(187).chr(157) => 'o',
- chr(225).chr(187).chr(170) => 'U', chr(225).chr(187).chr(171) => 'u',
- chr(225).chr(187).chr(178) => 'Y', chr(225).chr(187).chr(179) => 'y',
+ 'Ầ' => 'A', 'ầ' => 'a',
+ 'Ằ' => 'A', 'ằ' => 'a',
+ 'Ề' => 'E', 'ề' => 'e',
+ 'Ồ' => 'O', 'ồ' => 'o',
+ 'Ờ' => 'O', 'ờ' => 'o',
+ 'Ừ' => 'U', 'ừ' => 'u',
+ 'Ỳ' => 'Y', 'ỳ' => 'y',
// hook
- chr(225).chr(186).chr(162) => 'A', chr(225).chr(186).chr(163) => 'a',
- chr(225).chr(186).chr(168) => 'A', chr(225).chr(186).chr(169) => 'a',
- chr(225).chr(186).chr(178) => 'A', chr(225).chr(186).chr(179) => 'a',
- chr(225).chr(186).chr(186) => 'E', chr(225).chr(186).chr(187) => 'e',
- chr(225).chr(187).chr(130) => 'E', chr(225).chr(187).chr(131) => 'e',
- chr(225).chr(187).chr(136) => 'I', chr(225).chr(187).chr(137) => 'i',
- chr(225).chr(187).chr(142) => 'O', chr(225).chr(187).chr(143) => 'o',
- chr(225).chr(187).chr(148) => 'O', chr(225).chr(187).chr(149) => 'o',
- chr(225).chr(187).chr(158) => 'O', chr(225).chr(187).chr(159) => 'o',
- chr(225).chr(187).chr(166) => 'U', chr(225).chr(187).chr(167) => 'u',
- chr(225).chr(187).chr(172) => 'U', chr(225).chr(187).chr(173) => 'u',
- chr(225).chr(187).chr(182) => 'Y', chr(225).chr(187).chr(183) => 'y',
+ 'Ả' => 'A', 'ả' => 'a',
+ 'Ẩ' => 'A', 'ẩ' => 'a',
+ 'Ẳ' => 'A', 'ẳ' => 'a',
+ 'Ẻ' => 'E', 'ẻ' => 'e',
+ 'Ể' => 'E', 'ể' => 'e',
+ 'Ỉ' => 'I', 'ỉ' => 'i',
+ 'Ỏ' => 'O', 'ỏ' => 'o',
+ 'Ổ' => 'O', 'ổ' => 'o',
+ 'Ở' => 'O', 'ở' => 'o',
+ 'Ủ' => 'U', 'ủ' => 'u',
+ 'Ử' => 'U', 'ử' => 'u',
+ 'Ỷ' => 'Y', 'ỷ' => 'y',
// tilde
- chr(225).chr(186).chr(170) => 'A', chr(225).chr(186).chr(171) => 'a',
- chr(225).chr(186).chr(180) => 'A', chr(225).chr(186).chr(181) => 'a',
- chr(225).chr(186).chr(188) => 'E', chr(225).chr(186).chr(189) => 'e',
- chr(225).chr(187).chr(132) => 'E', chr(225).chr(187).chr(133) => 'e',
- chr(225).chr(187).chr(150) => 'O', chr(225).chr(187).chr(151) => 'o',
- chr(225).chr(187).chr(160) => 'O', chr(225).chr(187).chr(161) => 'o',
- chr(225).chr(187).chr(174) => 'U', chr(225).chr(187).chr(175) => 'u',
- chr(225).chr(187).chr(184) => 'Y', chr(225).chr(187).chr(185) => 'y',
+ 'Ẫ' => 'A', 'ẫ' => 'a',
+ 'Ẵ' => 'A', 'ẵ' => 'a',
+ 'Ẽ' => 'E', 'ẽ' => 'e',
+ 'Ễ' => 'E', 'ễ' => 'e',
+ 'Ỗ' => 'O', 'ỗ' => 'o',
+ 'Ỡ' => 'O', 'ỡ' => 'o',
+ 'Ữ' => 'U', 'ữ' => 'u',
+ 'Ỹ' => 'Y', 'ỹ' => 'y',
// acute accent
- chr(225).chr(186).chr(164) => 'A', chr(225).chr(186).chr(165) => 'a',
- chr(225).chr(186).chr(174) => 'A', chr(225).chr(186).chr(175) => 'a',
- chr(225).chr(186).chr(190) => 'E', chr(225).chr(186).chr(191) => 'e',
- chr(225).chr(187).chr(144) => 'O', chr(225).chr(187).chr(145) => 'o',
- chr(225).chr(187).chr(154) => 'O', chr(225).chr(187).chr(155) => 'o',
- chr(225).chr(187).chr(168) => 'U', chr(225).chr(187).chr(169) => 'u',
+ 'Ấ' => 'A', 'ấ' => 'a',
+ 'Ắ' => 'A', 'ắ' => 'a',
+ 'Ế' => 'E', 'ế' => 'e',
+ 'Ố' => 'O', 'ố' => 'o',
+ 'Ớ' => 'O', 'ớ' => 'o',
+ 'Ứ' => 'U', 'ứ' => 'u',
// dot below
- chr(225).chr(186).chr(160) => 'A', chr(225).chr(186).chr(161) => 'a',
- chr(225).chr(186).chr(172) => 'A', chr(225).chr(186).chr(173) => 'a',
- chr(225).chr(186).chr(182) => 'A', chr(225).chr(186).chr(183) => 'a',
- chr(225).chr(186).chr(184) => 'E', chr(225).chr(186).chr(185) => 'e',
- chr(225).chr(187).chr(134) => 'E', chr(225).chr(187).chr(135) => 'e',
- chr(225).chr(187).chr(138) => 'I', chr(225).chr(187).chr(139) => 'i',
- chr(225).chr(187).chr(140) => 'O', chr(225).chr(187).chr(141) => 'o',
- chr(225).chr(187).chr(152) => 'O', chr(225).chr(187).chr(153) => 'o',
- chr(225).chr(187).chr(162) => 'O', chr(225).chr(187).chr(163) => 'o',
- chr(225).chr(187).chr(164) => 'U', chr(225).chr(187).chr(165) => 'u',
- chr(225).chr(187).chr(176) => 'U', chr(225).chr(187).chr(177) => 'u',
- chr(225).chr(187).chr(180) => 'Y', chr(225).chr(187).chr(181) => 'y',
+ 'Ạ' => 'A', 'ạ' => 'a',
+ 'Ậ' => 'A', 'ậ' => 'a',
+ 'Ặ' => 'A', 'ặ' => 'a',
+ 'Ẹ' => 'E', 'ẹ' => 'e',
+ 'Ệ' => 'E', 'ệ' => 'e',
+ 'Ị' => 'I', 'ị' => 'i',
+ 'Ọ' => 'O', 'ọ' => 'o',
+ 'Ộ' => 'O', 'ộ' => 'o',
+ 'Ợ' => 'O', 'ợ' => 'o',
+ 'Ụ' => 'U', 'ụ' => 'u',
+ 'Ự' => 'U', 'ự' => 'u',
+ 'Ỵ' => 'Y', 'ỵ' => 'y',
// Vowels with diacritic (Chinese, Hanyu Pinyin)
- chr(201).chr(145) => 'a',
+ 'ɑ' => 'a',
// macron
- chr(199).chr(149) => 'U', chr(199).chr(150) => 'u',
+ 'Ǖ' => 'U', 'ǖ' => 'u',
// acute accent
- chr(199).chr(151) => 'U', chr(199).chr(152) => 'u',
+ 'Ǘ' => 'U', 'ǘ' => 'u',
// caron
- chr(199).chr(141) => 'A', chr(199).chr(142) => 'a',
- chr(199).chr(143) => 'I', chr(199).chr(144) => 'i',
- chr(199).chr(145) => 'O', chr(199).chr(146) => 'o',
- chr(199).chr(147) => 'U', chr(199).chr(148) => 'u',
- chr(199).chr(153) => 'U', chr(199).chr(154) => 'u',
+ 'Ǎ' => 'A', 'ǎ' => 'a',
+ 'Ǐ' => 'I', 'ǐ' => 'i',
+ 'Ǒ' => 'O', 'ǒ' => 'o',
+ 'Ǔ' => 'U', 'ǔ' => 'u',
+ 'Ǚ' => 'U', 'ǚ' => 'u',
// grave accent
- chr(199).chr(155) => 'U', chr(199).chr(156) => 'u',
+ 'Ǜ' => 'U', 'ǜ' => 'u',
);
// Used for locale-specific rules
$locale = get_locale();
- if ( 'de_DE' == $locale || 'de_DE_formal' == $locale ) {
- $chars[ chr(195).chr(132) ] = 'Ae';
- $chars[ chr(195).chr(164) ] = 'ae';
- $chars[ chr(195).chr(150) ] = 'Oe';
- $chars[ chr(195).chr(182) ] = 'oe';
- $chars[ chr(195).chr(156) ] = 'Ue';
- $chars[ chr(195).chr(188) ] = 'ue';
- $chars[ chr(195).chr(159) ] = 'ss';
+ if ( 'de_DE' == $locale || 'de_DE_formal' == $locale || 'de_CH' == $locale || 'de_CH_informal' == $locale ) {
+ $chars[ 'Ä' ] = 'Ae';
+ $chars[ 'ä' ] = 'ae';
+ $chars[ 'Ö' ] = 'Oe';
+ $chars[ 'ö' ] = 'oe';
+ $chars[ 'Ü' ] = 'Ue';
+ $chars[ 'ü' ] = 'ue';
+ $chars[ 'ß' ] = 'ss';
} elseif ( 'da_DK' === $locale ) {
- $chars[ chr(195).chr(134) ] = 'Ae';
- $chars[ chr(195).chr(166) ] = 'ae';
- $chars[ chr(195).chr(152) ] = 'Oe';
- $chars[ chr(195).chr(184) ] = 'oe';
- $chars[ chr(195).chr(133) ] = 'Aa';
- $chars[ chr(195).chr(165) ] = 'aa';
+ $chars[ 'Æ' ] = 'Ae';
+ $chars[ 'æ' ] = 'ae';
+ $chars[ 'Ø' ] = 'Oe';
+ $chars[ 'ø' ] = 'oe';
+ $chars[ 'Å' ] = 'Aa';
+ $chars[ 'å' ] = 'aa';
+ } elseif ( 'ca' === $locale ) {
+ $chars[ 'l·l' ] = 'll';
+ } elseif ( 'sr_RS' === $locale ) {
+ $chars[ 'Đ' ] = 'DJ';
+ $chars[ 'đ' ] = 'dj';
}
$string = strtr($string, $chars);
} else {
$chars = array();
// Assume ISO-8859-1 if not UTF-8
- $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158)
- .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194)
- .chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202)
- .chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210)
- .chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218)
- .chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227)
- .chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235)
- .chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243)
- .chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251)
- .chr(252).chr(253).chr(255);
+ $chars['in'] = "\x80\x83\x8a\x8e\x9a\x9e"
+ ."\x9f\xa2\xa5\xb5\xc0\xc1\xc2"
+ ."\xc3\xc4\xc5\xc7\xc8\xc9\xca"
+ ."\xcb\xcc\xcd\xce\xcf\xd1\xd2"
+ ."\xd3\xd4\xd5\xd6\xd8\xd9\xda"
+ ."\xdb\xdc\xdd\xe0\xe1\xe2\xe3"
+ ."\xe4\xe5\xe7\xe8\xe9\xea\xeb"
+ ."\xec\xed\xee\xef\xf1\xf2\xf3"
+ ."\xf4\xf5\xf6\xf8\xf9\xfa\xfb"
+ ."\xfc\xfd\xff";
$chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy";
$string = strtr($string, $chars['in'], $chars['out']);
$double_chars = array();
- $double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254));
+ $double_chars['in'] = array("\x8c", "\x9c", "\xc6", "\xd0", "\xde", "\xdf", "\xe6", "\xf0", "\xfe");
$double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th');
$string = str_replace($double_chars['in'], $double_chars['out'], $string);
}
* operating systems and special characters requiring special escaping
* to manipulate at the command line. Replaces spaces and consecutive
* dashes with a single dash. Trims period, dash and underscore from beginning
- * and end of filename.
+ * and end of filename. It is not guaranteed that this function will return a
+ * filename that is allowed to be uploaded.
*
* @since 2.1.0
*
$filename_raw = $filename;
$special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", "%", "+", chr(0));
/**
- * Filter the list of characters to remove from a filename.
+ * Filters the list of characters to remove from a filename.
*
* @since 2.8.0
*
$filename = preg_replace( '/[\r\n\t -]+/', '-', $filename );
$filename = trim( $filename, '.-_' );
+ if ( false === strpos( $filename, '.' ) ) {
+ $mime_types = wp_get_mime_types();
+ $filetype = wp_check_filetype( 'test.' . $filename, $mime_types );
+ if ( $filetype['ext'] === $filename ) {
+ $filename = 'unnamed-file.' . $filetype['ext'];
+ }
+ }
+
// Split the filename into a base and extension[s]
$parts = explode('.', $filename);
// Return if only one extension
if ( count( $parts ) <= 2 ) {
/**
- * Filter a sanitized filename string.
+ * Filters a sanitized filename string.
*
* @since 2.8.0
*
* Removes tags, octets, entities, and if strict is enabled, will only keep
* alphanumeric, _, space, ., -, @. After sanitizing, it passes the username,
* raw username (the username in the parameter), and the value of $strict as
- * parameters for the 'sanitize_user' filter.
+ * parameters for the {@see 'sanitize_user'} filter.
*
* @since 2.0.0
*
$username = preg_replace( '|\s+|', ' ', $username );
/**
- * Filter a sanitized username string.
+ * Filters a sanitized username string.
*
* @since 2.0.1
*
$key = preg_replace( '/[^a-z0-9_\-]/', '', $key );
/**
- * Filter a sanitized key string.
+ * Filters a sanitized key string.
*
* @since 3.0.0
*
$title = remove_accents($title);
/**
- * Filter a sanitized title string.
+ * Filters a sanitized title string.
*
* @since 1.2.0
*
}
$title = strtolower($title);
- $title = preg_replace('/&.+?;/', '', $title); // kill entities
- $title = str_replace('.', '-', $title);
if ( 'save' == $context ) {
// Convert nbsp, ndash and mdash to hyphens
$title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title );
+ // Convert nbsp, ndash and mdash HTML entities to hyphens
+ $title = str_replace( array( ' ', ' ', '–', '–', '—', '—' ), '-', $title );
// Strip these characters entirely
$title = str_replace( array(
$title = str_replace( '%c3%97', 'x', $title );
}
+ $title = preg_replace('/&.+?;/', '', $title); // kill entities
+ $title = str_replace('.', '-', $title);
+
$title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
$title = preg_replace('/\s+/', '-', $title);
$title = preg_replace('|-+|', '-', $title);
return sanitize_html_class( $fallback );
}
/**
- * Filter a sanitized HTML class string.
+ * Filters a sanitized HTML class string.
*
* @since 2.8.0
*
* Acts on text which is about to be edited.
*
* The $content is run through esc_textarea(), which uses htmlspecialchars()
- * to convert special characters to HTML entities. If $richedit is set to true,
- * it is simply a holder for the 'format_to_edit' filter.
+ * to convert special characters to HTML entities. If `$richedit` is set to true,
+ * it is simply a holder for the {@see 'format_to_edit'} filter.
*
* @since 0.71
* @since 4.4.0 The `$richedit` parameter was renamed to `$rich_text` for clarity.
*/
function format_to_edit( $content, $rich_text = false ) {
/**
- * Filter the text to be formatted for editing.
+ * Filters the text to be formatted for editing.
*
* @since 1.2.0
*
* Adds slashes to escape strings.
*
* Slashes will first be removed if magic_quotes_gpc is set, see {@link
- * http://www.php.net/magic_quotes} for more details.
+ * https://secure.php.net/magic_quotes} for more details.
*
* @since 0.71
*
/**
* Callback to convert URI match to HTML A element.
*
- * This function was backported from 2.5.0 to 2.3.2. Regex callback for {@link
- * make_clickable()}.
+ * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable().
*
* @since 2.3.2
* @access private
/**
* Callback to convert URL match to HTML A element.
*
- * This function was backported from 2.5.0 to 2.3.2. Regex callback for {@link
- * make_clickable()}.
+ * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable().
*
* @since 2.3.2
* @access private
/**
* Callback to convert email address match to HTML A element.
*
- * This function was backported from 2.5.0 to 2.3.2. Regex callback for {@link
- * make_clickable()}.
+ * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable().
*
* @since 2.3.2
* @access private
$nested_code_pre = 0; // Keep track of how many levels link is nested inside <pre> or <code>
foreach ( $textarr as $piece ) {
- if ( preg_match( '|^<code[\s>]|i', $piece ) || preg_match( '|^<pre[\s>]|i', $piece ) )
+ if ( preg_match( '|^<code[\s>]|i', $piece ) || preg_match( '|^<pre[\s>]|i', $piece ) || preg_match( '|^<script[\s>]|i', $piece ) || preg_match( '|^<style[\s>]|i', $piece ) )
$nested_code_pre++;
- elseif ( ( '</code>' === strtolower( $piece ) || '</pre>' === strtolower( $piece ) ) && $nested_code_pre )
+ elseif ( $nested_code_pre && ( '</code>' === strtolower( $piece ) || '</pre>' === strtolower( $piece ) || '</script>' === strtolower( $piece ) || '</style>' === strtolower( $piece ) ) )
$nested_code_pre--;
if ( $nested_code_pre || empty( $piece ) || ( $piece[0] === '<' && ! preg_match( '|^<\s*[\w]{1,20}+://|', $piece ) ) ) {
function wp_rel_nofollow_callback( $matches ) {
$text = $matches[1];
$atts = shortcode_parse_atts( $matches[1] );
- $rel = 'nofollow';
+ $rel = 'nofollow';
+
+ if ( preg_match( '%href=["\'](' . preg_quote( set_url_scheme( home_url(), 'http' ) ) . ')%i', $text ) ||
+ preg_match( '%href=["\'](' . preg_quote( set_url_scheme( home_url(), 'https' ) ) . ')%i', $text )
+ ) {
+ return "<a $text>";
+ }
+
if ( ! empty( $atts['rel'] ) ) {
$parts = array_map( 'trim', explode( ' ', $atts['rel'] ) );
if ( false === array_search( 'nofollow', $parts ) ) {
/**
* Convert one smiley code to the icon graphic file equivalent.
*
- * Callback handler for {@link convert_smilies()}.
+ * Callback handler for convert_smilies().
+ *
* Looks up one smiley code in the $wpsmiliestrans global array and returns an
* `<img>` string for that smiley.
*
}
/**
- * Filter the Smiley image URL before it's used in the image element.
+ * Filters the Smiley image URL before it's used in the image element.
*
* @since 2.9.0
*
*/
function is_email( $email, $deprecated = false ) {
if ( ! empty( $deprecated ) )
- _deprecated_argument( __FUNCTION__, '3.0' );
+ _deprecated_argument( __FUNCTION__, '3.0.0' );
// Test for the minimum length the email can be
if ( strlen( $email ) < 3 ) {
/**
- * Filter whether an email address is valid.
+ * Filters whether an email address is valid.
*
* This filter is evaluated under several different contexts, such as 'email_too_short',
* 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits',
*
* @since 1.5.0
*
- * @param string $date_string Date and time in ISO 8601 format {@link http://en.wikipedia.org/wiki/ISO_8601}.
+ * @param string $date_string Date and time in ISO 8601 format {@link https://en.wikipedia.org/wiki/ISO_8601}.
* @param string $timezone Optional. If set to GMT returns the time minus gmt_offset. Default is 'user'.
* @return string The date and time in MySQL DateTime format - Y-m-d H:i:s.
*/
}
}
-/**
- * Adds a element attributes to open links in new windows.
- *
- * Comment text in popup windows should be filtered through this. Right now it's
- * a moderately dumb function, ideally it would detect whether a target or rel
- * attribute was already there and adjust its actions accordingly.
- *
- * @since 0.71
- *
- * @param string $text Content to replace links to open in a new window.
- * @return string Content that has filtered links.
- */
-function popuplinks( $text ) {
- $text = preg_replace('/<a (.+?)>/i', "<a $1 target='_blank' rel='external'>", $text);
- return $text;
-}
-
/**
* Strips out all characters that are not allowable in an email.
*
// Test for the minimum length the email can be
if ( strlen( $email ) < 3 ) {
/**
- * Filter a sanitized email address.
+ * Filters a sanitized email address.
*
* This filter is evaluated under several contexts, including 'email_too_short',
* 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits',
$mins = round( $diff / MINUTE_IN_SECONDS );
if ( $mins <= 1 )
$mins = 1;
- /* translators: min=minute */
+ /* translators: Time difference between two dates, in minutes (min=minute). 1: Number of minutes */
$since = sprintf( _n( '%s min', '%s mins', $mins ), $mins );
} elseif ( $diff < DAY_IN_SECONDS && $diff >= HOUR_IN_SECONDS ) {
$hours = round( $diff / HOUR_IN_SECONDS );
if ( $hours <= 1 )
$hours = 1;
+ /* translators: Time difference between two dates, in hours. 1: Number of hours */
$since = sprintf( _n( '%s hour', '%s hours', $hours ), $hours );
} elseif ( $diff < WEEK_IN_SECONDS && $diff >= DAY_IN_SECONDS ) {
$days = round( $diff / DAY_IN_SECONDS );
if ( $days <= 1 )
$days = 1;
+ /* translators: Time difference between two dates, in days. 1: Number of days */
$since = sprintf( _n( '%s day', '%s days', $days ), $days );
} elseif ( $diff < MONTH_IN_SECONDS && $diff >= WEEK_IN_SECONDS ) {
$weeks = round( $diff / WEEK_IN_SECONDS );
if ( $weeks <= 1 )
$weeks = 1;
+ /* translators: Time difference between two dates, in weeks. 1: Number of weeks */
$since = sprintf( _n( '%s week', '%s weeks', $weeks ), $weeks );
} elseif ( $diff < YEAR_IN_SECONDS && $diff >= MONTH_IN_SECONDS ) {
$months = round( $diff / MONTH_IN_SECONDS );
if ( $months <= 1 )
$months = 1;
+ /* translators: Time difference between two dates, in months. 1: Number of months */
$since = sprintf( _n( '%s month', '%s months', $months ), $months );
} elseif ( $diff >= YEAR_IN_SECONDS ) {
$years = round( $diff / YEAR_IN_SECONDS );
if ( $years <= 1 )
$years = 1;
+ /* translators: Time difference between two dates, in years. 1: Number of years */
$since = sprintf( _n( '%s year', '%s years', $years ), $years );
}
/**
- * Filter the human readable difference between two timestamps.
+ * Filters the human readable difference between two timestamps.
*
* @since 4.0.0
*
* that, then the string ' […]' will be appended to the excerpt. If the string
* is less than 55 words, then the content will be returned as is.
*
- * The 55 word limit can be modified by plugins/themes using the excerpt_length filter
- * The ' […]' string can be modified by plugins/themes using the excerpt_more filter
+ * The 55 word limit can be modified by plugins/themes using the {@see 'excerpt_length'} filter
+ * The ' […]' string can be modified by plugins/themes using the {@see 'excerpt_more'} filter
*
* @since 1.5.0
*
$text = str_replace(']]>', ']]>', $text);
/**
- * Filter the number of words in an excerpt.
+ * Filters the number of words in an excerpt.
*
* @since 2.7.0
*
*/
$excerpt_length = apply_filters( 'excerpt_length', 55 );
/**
- * Filter the string in the "more" link displayed after a trimmed excerpt.
+ * Filters the string in the "more" link displayed after a trimmed excerpt.
*
* @since 2.9.0
*
$text = wp_trim_words( $text, $excerpt_length, $excerpt_more );
}
/**
- * Filter the trimmed excerpt string.
+ * Filters the trimmed excerpt string.
*
* @since 2.8.0
*
}
/**
- * Filter the text content after words have been trimmed.
+ * Filters the text content after words have been trimmed.
*
* @since 3.3.0
*
function ent2ncr( $text ) {
/**
- * Filter text before named entities are converted into numbered entities.
+ * Filters text before named entities are converted into numbered entities.
*
* A non-null string must be returned for the filter to be evaluated.
*
*
* @since 4.3.0
*
- * @param string $text The text to be formatted.
+ * @see _WP_Editors::editor()
+ *
+ * @param string $text The text to be formatted.
+ * @param string $default_editor The default editor for the current user.
+ * It is usually either 'html' or 'tinymce'.
* @return string The formatted text after filter is applied.
*/
function format_for_editor( $text, $default_editor = null ) {
}
/**
- * Filter the text after it is formatted for the editor.
+ * Filters the text after it is formatted for the editor.
*
* @since 4.3.0
*
- * @param string $text The formatted text.
+ * @param string $text The formatted text.
+ * @param string $default_editor The default editor for the current user.
+ * It is usually either 'html' or 'tinymce'.
*/
return apply_filters( 'format_for_editor', $text, $default_editor );
}
* Checks and cleans a URL.
*
* A number of characters are removed from the URL. If the URL is for displaying
- * (the default behaviour) ampersands are also replaced. The 'clean_url' filter
+ * (the default behaviour) ampersands are also replaced. The {@see 'clean_url'} filter
* is applied to the returned cleaned URL.
*
* @since 2.8.0
* @param array $protocols Optional. An array of acceptable protocols.
* Defaults to return value of wp_allowed_protocols()
* @param string $_context Private. Use esc_url_raw() for database usage.
- * @return string The cleaned $url after the 'clean_url' filter is applied.
+ * @return string The cleaned $url after the {@see 'clean_url'} filter is applied.
*/
function esc_url( $url, $protocols = null, $_context = 'display' ) {
$original_url = $url;
}
/**
- * Filter a string cleaned and escaped for output as a URL.
+ * Filters a string cleaned and escaped for output as a URL.
*
* @since 2.3.0
*
/**
* Convert entities, while preserving already-encoded entities.
*
- * @link http://www.php.net/htmlentities Borrowed from the PHP Manual user notes.
+ * @link https://secure.php.net/htmlentities Borrowed from the PHP Manual user notes.
*
* @since 1.2.2
*
*
* Escapes text strings for echoing in JS. It is intended to be used for inline JS
* (in a tag attribute, for example onclick="..."). Note that the strings have to
- * be in single quotes. The filter 'js_escape' is also applied here.
+ * be in single quotes. The {@see 'js_escape'} filter is also applied here.
*
* @since 2.8.0
*
$safe_text = str_replace( "\r", '', $safe_text );
$safe_text = str_replace( "\n", '\\n', addslashes( $safe_text ) );
/**
- * Filter a string cleaned and escaped for output in JavaScript.
+ * Filters a string cleaned and escaped for output in JavaScript.
*
* Text passed to esc_js() is stripped of invalid or special characters,
* and properly slashed for output.
$safe_text = wp_check_invalid_utf8( $text );
$safe_text = _wp_specialchars( $safe_text, ENT_QUOTES );
/**
- * Filter a string cleaned and escaped for output in HTML.
+ * Filters a string cleaned and escaped for output in HTML.
*
* Text passed to esc_html() is stripped of invalid or special characters
* before output.
$safe_text = wp_check_invalid_utf8( $text );
$safe_text = _wp_specialchars( $safe_text, ENT_QUOTES );
/**
- * Filter a string cleaned and escaped for output in an HTML attribute.
+ * Filters a string cleaned and escaped for output in an HTML attribute.
*
* Text passed to esc_attr() is stripped of invalid or special characters
* before output.
function esc_textarea( $text ) {
$safe_text = htmlspecialchars( $text, ENT_QUOTES, get_option( 'blog_charset' ) );
/**
- * Filter a string cleaned and escaped for output in a textarea element.
+ * Filters a string cleaned and escaped for output in a textarea element.
*
* @since 3.1.0
*
function tag_escape( $tag_name ) {
$safe_tag = strtolower( preg_replace('/[^a-zA-Z0-9_:]/', '', $tag_name) );
/**
- * Filter a string cleaned and escaped for output as an HTML tag.
+ * Filters a string cleaned and escaped for output as an HTML tag.
*
* @since 2.8.0
*
case 'blogdescription':
case 'blogname':
$value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value );
+ if ( $value !== $original_value ) {
+ $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', wp_encode_emoji( $original_value ) );
+ }
+
if ( is_wp_error( $value ) ) {
$error = $value->get_error_message();
} else {
- $value = wp_kses_post( $value );
$value = esc_html( $value );
}
break;
$value = esc_url_raw( $value );
$value = str_replace( 'http://', '', $value );
}
+
+ if ( 'permalink_structure' === $option && '' !== $value && ! preg_match( '/%[^\/%]+%/', $value ) ) {
+ $error = sprintf(
+ /* translators: %s: Codex URL */
+ __( 'A structure tag is required when using custom permalinks. <a href="%s">Learn more</a>' ),
+ __( 'https://codex.wordpress.org/Using_Permalinks#Choosing_your_permalink_structure' )
+ );
+ }
break;
case 'default_role' :
}
/**
- * Filter an option value following sanitization.
+ * Filters an option value following sanitization.
*
* @since 2.3.0
* @since 4.3.0 Added the `$original_value` parameter.
*
* @param mixed $value The array, object, or scalar.
* @param callable $callback The function to map onto $value.
- * @return The value with the callback applied to all non-arrays and non-objects inside it.
+ * @return mixed The value with the callback applied to all non-arrays and non-objects inside it.
*/
function map_deep( $value, $callback ) {
- if ( is_array( $value ) || is_object( $value ) ) {
- foreach ( $value as &$item ) {
- $item = map_deep( $item, $callback );
+ if ( is_array( $value ) ) {
+ foreach ( $value as $index => $item ) {
+ $value[ $index ] = map_deep( $item, $callback );
+ }
+ } elseif ( is_object( $value ) ) {
+ $object_vars = get_object_vars( $value );
+ foreach ( $object_vars as $property_name => $property_value ) {
+ $value->$property_name = map_deep( $property_value, $callback );
}
- return $value;
} else {
- return call_user_func( $callback, $value );
+ $value = call_user_func( $callback, $value );
}
+
+ return $value;
}
/**
* Parses a string into variables to be stored in an array.
*
- * Uses {@link http://www.php.net/parse_str parse_str()} and stripslashes if
- * {@link http://www.php.net/magic_quotes magic_quotes_gpc} is on.
+ * Uses {@link https://secure.php.net/parse_str parse_str()} and stripslashes if
+ * {@link https://secure.php.net/magic_quotes magic_quotes_gpc} is on.
*
* @since 2.2.1
*
if ( get_magic_quotes_gpc() )
$array = stripslashes_deep( $array );
/**
- * Filter the array of variables derived from a parsed string.
+ * Filters the array of variables derived from a parsed string.
*
* @since 2.3.0
*
* WordPress implementation of PHP sprintf() with filters.
*
* @since 2.5.0
- * @link http://www.php.net/sprintf
+ * @link https://secure.php.net/sprintf
*
* @param string $pattern The string which formatted args are inserted.
* @param mixed $args ,... Arguments to be formatted into the $pattern string.
}
/**
- * Filter a fragment from the pattern passed to wp_sprintf().
+ * Filters a fragment from the pattern passed to wp_sprintf().
*
* If the fragment is unchanged, then sprintf() will be run on the fragment.
*
return '';
/**
- * Filter the translated delimiters used by wp_sprintf_l().
+ * Filters the translated delimiters used by wp_sprintf_l().
* Placeholders (%s) are included to assist translators and then
* removed before the array of strings reaches the filter.
*
}
/**
- * Sanitize a string from user input or from the db
+ * Sanitizes a string from user input or from the database.
*
- * check for invalid UTF-8,
- * Convert single < characters to entity,
- * strip all tags,
- * remove line breaks, tabs and extra white space,
- * strip octets.
+ * - Checks for invalid UTF-8,
+ * - Converts single `<` characters to entities
+ * - Strips all tags
+ * - Removes line breaks, tabs, and extra whitespace
+ * - Strips octets
*
* @since 2.9.0
*
- * @param string $str
- * @return string
+ * @see sanitize_textarea_field()
+ * @see wp_check_invalid_utf8()
+ * @see wp_strip_all_tags()
+ *
+ * @param string $str String to sanitize.
+ * @return string Sanitized string.
*/
function sanitize_text_field( $str ) {
+ $filtered = _sanitize_text_fields( $str, false );
+
+ /**
+ * Filters a sanitized text field string.
+ *
+ * @since 2.9.0
+ *
+ * @param string $filtered The sanitized string.
+ * @param string $str The string prior to being sanitized.
+ */
+ return apply_filters( 'sanitize_text_field', $filtered, $str );
+}
+
+/**
+ * Sanitizes a multiline string from user input or from the database.
+ *
+ * The function is like sanitize_text_field(), but preserves
+ * new lines (\n) and other whitespace, which are legitimate
+ * input in textarea elements.
+ *
+ * @see sanitize_text_field()
+ *
+ * @since 4.7.0
+ *
+ * @param string $str String to sanitize.
+ * @return string Sanitized string.
+ */
+function sanitize_textarea_field( $str ) {
+ $filtered = _sanitize_text_fields( $str, true );
+
+ /**
+ * Filters a sanitized textarea field string.
+ *
+ * @since 4.7.0
+ *
+ * @param string $filtered The sanitized string.
+ * @param string $str The string prior to being sanitized.
+ */
+ return apply_filters( 'sanitize_textarea_field', $filtered, $str );
+}
+
+/**
+ * Internal helper function to sanitize a string from user input or from the db
+ *
+ * @since 4.7.0
+ * @access private
+ *
+ * @param string $str String to sanitize.
+ * @param bool $keep_newlines optional Whether to keep newlines. Default: false.
+ * @return string Sanitized string.
+ */
+function _sanitize_text_fields( $str, $keep_newlines = false ) {
$filtered = wp_check_invalid_utf8( $str );
if ( strpos($filtered, '<') !== false ) {
$filtered = wp_pre_kses_less_than( $filtered );
// This will strip extra whitespace for us.
- $filtered = wp_strip_all_tags( $filtered, true );
- } else {
- $filtered = trim( preg_replace('/[\r\n\t ]+/', ' ', $filtered) );
+ $filtered = wp_strip_all_tags( $filtered, false );
+
+ // Use html entities in a special case to make sure no later
+ // newline stripping stage could lead to a functional tag
+ $filtered = str_replace("<\n", "<\n", $filtered);
}
+ if ( ! $keep_newlines ) {
+ $filtered = preg_replace( '/[\r\n\t ]+/', ' ', $filtered );
+ }
+ $filtered = trim( $filtered );
+
$found = false;
while ( preg_match('/%[a-f0-9]{2}/i', $filtered, $match) ) {
$filtered = str_replace($match[0], '', $filtered);
$filtered = trim( preg_replace('/ +/', ' ', $filtered) );
}
- /**
- * Filter a sanitized text field string.
- *
- * @since 2.9.0
- *
- * @param string $filtered The sanitized string.
- * @param string $str The string prior to being sanitized.
- */
- return apply_filters( 'sanitize_text_field', $filtered, $str );
+ return $filtered;
}
/**
* @since 3.0.0
*
* @staticvar string|false $dblq
+ *
+ * @param string $text The text to be modified.
+ * @return string The modified text.
*/
function capital_P_dangit( $text ) {
// Simple replacement for titles
function sanitize_mime_type( $mime_type ) {
$sani_mime_type = preg_replace( '/[^-+*.a-zA-Z0-9\/]/', '', $mime_type );
/**
- * Filter a mime type following sanitization.
+ * Filters a mime type following sanitization.
*
* @since 3.1.3
*
$urls_to_ping = array_map( 'esc_url_raw', $urls_to_ping );
$urls_to_ping = implode( "\n", $urls_to_ping );
/**
- * Filter a list of trackback URLs following sanitization.
+ * Filters a list of trackback URLs following sanitization.
*
* The string returned here consists of a space or carriage return-delimited list
* of trackback URLs.
if ( empty( $spaces ) ) {
/**
- * Filter the regexp for common whitespace characters.
+ * Filters the regexp for common whitespace characters.
*
* This string is substituted for the \s sequence as needed in regular
* expressions. For websites not written in English, different characters
}
/**
+ * Print the inline Emoji detection script if it is not already printed.
*
- * @global string $wp_version
+ * @since 4.2.0
* @staticvar bool $printed
*/
function print_emoji_detection_script() {
- global $wp_version;
static $printed = false;
if ( $printed ) {
$printed = true;
+ _print_emoji_detection_script();
+}
+
+/**
+ * Prints inline Emoji dection script
+ *
+ * @ignore
+ * @since 4.6.0
+ * @access private
+ */
+function _print_emoji_detection_script() {
$settings = array(
/**
- * Filter the URL where emoji images are hosted.
+ * Filters the URL where emoji png images are hosted.
*
* @since 4.2.0
*
- * @param string The emoji base URL.
+ * @param string The emoji base URL for png images.
*/
- 'baseUrl' => apply_filters( 'emoji_url', set_url_scheme( '//s.w.org/images/core/emoji/72x72/' ) ),
+ 'baseUrl' => apply_filters( 'emoji_url', 'https://s.w.org/images/core/emoji/2.2.1/72x72/' ),
/**
- * Filter the extension of the emoji files.
+ * Filters the extension of the emoji png files.
*
* @since 4.2.0
*
- * @param string The emoji extension. Default .png.
+ * @param string The emoji extension for png files. Default .png.
*/
'ext' => apply_filters( 'emoji_ext', '.png' ),
+
+ /**
+ * Filters the URL where emoji SVG images are hosted.
+ *
+ * @since 4.6.0
+ *
+ * @param string The emoji base URL for svg images.
+ */
+ 'svgUrl' => apply_filters( 'emoji_svg_url', 'https://s.w.org/images/core/emoji/2.2.1/svg/' ),
+
+ /**
+ * Filters the extension of the emoji SVG files.
+ *
+ * @since 4.6.0
+ *
+ * @param string The emoji extension for svg files. Default .svg.
+ */
+ 'svgExt' => apply_filters( 'emoji_svg_ext', '.svg' ),
);
- $version = 'ver=' . $wp_version;
+ $version = 'ver=' . get_bloginfo( 'version' );
if ( SCRIPT_DEBUG ) {
$settings['source'] = array(
?>
<script type="text/javascript">
window._wpemojiSettings = <?php echo wp_json_encode( $settings ); ?>;
- !function(a,b,c){function d(a){var c=b.createElement("canvas"),d=c.getContext&&c.getContext("2d");return d&&d.fillText?(d.textBaseline="top",d.font="600 32px Arial","flag"===a?(d.fillText(String.fromCharCode(55356,56806,55356,56826),0,0),c.toDataURL().length>3e3):("simple"===a?d.fillText(String.fromCharCode(55357,56835),0,0):d.fillText(String.fromCharCode(55356,57135),0,0),0!==d.getImageData(16,16,1,1).data[0])):!1}function e(a){var c=b.createElement("script");c.src=a,c.type="text/javascript",b.getElementsByTagName("head")[0].appendChild(c)}var f,g;c.supports={simple:d("simple"),flag:d("flag"),unicode8:d("unicode8")},c.DOMReady=!1,c.readyCallback=function(){c.DOMReady=!0},c.supports.simple&&c.supports.flag&&c.supports.unicode8||(g=function(){c.readyCallback()},b.addEventListener?(b.addEventListener("DOMContentLoaded",g,!1),a.addEventListener("load",g,!1)):(a.attachEvent("onload",g),b.attachEvent("onreadystatechange",function(){"complete"===b.readyState&&c.readyCallback()})),f=c.source||{},f.concatemoji?e(f.concatemoji):f.wpemoji&&f.twemoji&&(e(f.twemoji),e(f.wpemoji)))}(window,document,window._wpemojiSettings);
+ !function(a,b,c){function d(a){var b,c,d,e,f=String.fromCharCode;if(!k||!k.fillText)return!1;switch(k.clearRect(0,0,j.width,j.height),k.textBaseline="top",k.font="600 32px Arial",a){case"flag":return k.fillText(f(55356,56826,55356,56819),0,0),!(j.toDataURL().length<3e3)&&(k.clearRect(0,0,j.width,j.height),k.fillText(f(55356,57331,65039,8205,55356,57096),0,0),b=j.toDataURL(),k.clearRect(0,0,j.width,j.height),k.fillText(f(55356,57331,55356,57096),0,0),c=j.toDataURL(),b!==c);case"emoji4":return k.fillText(f(55357,56425,55356,57341,8205,55357,56507),0,0),d=j.toDataURL(),k.clearRect(0,0,j.width,j.height),k.fillText(f(55357,56425,55356,57341,55357,56507),0,0),e=j.toDataURL(),d!==e}return!1}function e(a){var c=b.createElement("script");c.src=a,c.defer=c.type="text/javascript",b.getElementsByTagName("head")[0].appendChild(c)}var f,g,h,i,j=b.createElement("canvas"),k=j.getContext&&j.getContext("2d");for(i=Array("flag","emoji4"),c.supports={everything:!0,everythingExceptFlag:!0},h=0;h<i.length;h++)c.supports[i[h]]=d(i[h]),c.supports.everything=c.supports.everything&&c.supports[i[h]],"flag"!==i[h]&&(c.supports.everythingExceptFlag=c.supports.everythingExceptFlag&&c.supports[i[h]]);c.supports.everythingExceptFlag=c.supports.everythingExceptFlag&&!c.supports.flag,c.DOMReady=!1,c.readyCallback=function(){c.DOMReady=!0},c.supports.everything||(g=function(){c.readyCallback()},b.addEventListener?(b.addEventListener("DOMContentLoaded",g,!1),a.addEventListener("load",g,!1)):(a.attachEvent("onload",g),b.attachEvent("onreadystatechange",function(){"complete"===b.readyState&&c.readyCallback()})),f=c.source||{},f.concatemoji?e(f.concatemoji):f.wpemoji&&f.twemoji&&(e(f.twemoji),e(f.wpemoji)))}(window,document,window._wpemojiSettings);
</script>
<?php
}
$text = wp_encode_emoji( $text );
/** This filter is documented in wp-includes/formatting.php */
- $cdn_url = apply_filters( 'emoji_url', set_url_scheme( '//s.w.org/images/core/emoji/72x72/' ) );
+ $cdn_url = apply_filters( 'emoji_url', 'https://s.w.org/images/core/emoji/2.2.1/72x72/' );
/** This filter is documented in wp-includes/formatting.php */
$ext = apply_filters( 'emoji_ext', '.png' );
}
/**
- * Shorten an URL, to be used as link text.
+ * Shorten a URL, to be used as link text.
*
* @since 1.2.0
* @since 4.4.0 Moved to wp-includes/formatting.php from wp-admin/includes/misc.php and added $length param.
$short_url = substr( $short_url, 0, $length - 3 ) . '…';
}
return $short_url;
-}
\ No newline at end of file
+}
+
+/**
+ * Sanitizes a hex color.
+ *
+ * Returns either '', a 3 or 6 digit hex color (with #), or nothing.
+ * For sanitizing values without a #, see sanitize_hex_color_no_hash().
+ *
+ * @since 3.4.0
+ *
+ * @param string $color
+ * @return string|void
+ */
+function sanitize_hex_color( $color ) {
+ if ( '' === $color ) {
+ return '';
+ }
+
+ // 3 or 6 hex digits, or the empty string.
+ if ( preg_match('|^#([A-Fa-f0-9]{3}){1,2}$|', $color ) ) {
+ return $color;
+ }
+}
+
+/**
+ * Sanitizes a hex color without a hash. Use sanitize_hex_color() when possible.
+ *
+ * Saving hex colors without a hash puts the burden of adding the hash on the
+ * UI, which makes it difficult to use or upgrade to other color types such as
+ * rgba, hsl, rgb, and html color names.
+ *
+ * Returns either '', a 3 or 6 digit hex color (without a #), or null.
+ *
+ * @since 3.4.0
+ *
+ * @param string $color
+ * @return string|null
+ */
+function sanitize_hex_color_no_hash( $color ) {
+ $color = ltrim( $color, '#' );
+
+ if ( '' === $color ) {
+ return '';
+ }
+
+ return sanitize_hex_color( '#' . $color ) ? $color : null;
+}
+
+/**
+ * Ensures that any hex color is properly hashed.
+ * Otherwise, returns value untouched.
+ *
+ * This method should only be necessary if using sanitize_hex_color_no_hash().
+ *
+ * @since 3.4.0
+ *
+ * @param string $color
+ * @return string
+ */
+function maybe_hash_hex_color( $color ) {
+ if ( $unhashed = sanitize_hex_color_no_hash( $color ) ) {
+ return '#' . $unhashed;
+ }
+
+ return $color;
+}