+/**
+ * Encode the Unicode values to be used in the URI.
+ *
+ * @since 1.5.0
+ *
+ * @param string $utf8_string
+ * @param int $length Max length of the string
+ * @return string String with Unicode encoded for URI.
+ */
+function utf8_uri_encode( $utf8_string, $length = 0 ) {
+ $unicode = '';
+ $values = array();
+ $num_octets = 1;
+ $unicode_length = 0;
+
+ mbstring_binary_safe_encoding();
+ $string_length = strlen( $utf8_string );
+ reset_mbstring_encoding();
+
+ for ($i = 0; $i < $string_length; $i++ ) {
+
+ $value = ord( $utf8_string[ $i ] );
+
+ if ( $value < 128 ) {
+ if ( $length && ( $unicode_length >= $length ) )
+ break;
+ $unicode .= chr($value);
+ $unicode_length++;
+ } else {
+ if ( count( $values ) == 0 ) {
+ if ( $value < 224 ) {
+ $num_octets = 2;
+ } elseif ( $value < 240 ) {
+ $num_octets = 3;
+ } else {
+ $num_octets = 4;
+ }
+ }
+
+ $values[] = $value;
+
+ if ( $length && ( $unicode_length + ($num_octets * 3) ) > $length )
+ break;
+ if ( count( $values ) == $num_octets ) {
+ for ( $j = 0; $j < $num_octets; $j++ ) {
+ $unicode .= '%' . dechex( $values[ $j ] );
+ }
+
+ $unicode_length += $num_octets * 3;
+
+ $values = array();
+ $num_octets = 1;
+ }
+ }
+ }
+
+ return $unicode;
+}
+
+/**
+ * Converts all accent characters to ASCII characters.
+ *
+ * If there are no accent characters, then the string given is just returned.
+ *
+ * **Accent characters converted:**
+ *
+ * Currency signs:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | ------------------- |
+ * | U+00A3 | £ | (empty) | British Pound sign |
+ * | U+20AC | € | E | Euro sign |
+ *
+ * Decompositions for Latin-1 Supplement:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | ------- | ----- | ----------- | -------------------------------------- |
+ * | U+00AA | ª | a | Feminine ordinal indicator |
+ * | U+00BA | º | o | Masculine ordinal indicator |
+ * | U+00C0 | À | A | Latin capital letter A with grave |
+ * | U+00C1 | Á | A | Latin capital letter A with acute |
+ * | U+00C2 | Â | A | Latin capital letter A with circumflex |
+ * | U+00C3 | Ã | A | Latin capital letter A with tilde |
+ * | U+00C4 | Ä | A | Latin capital letter A with diaeresis |
+ * | U+00C5 | Å | A | Latin capital letter A with ring above |
+ * | U+00C6 | Æ | AE | Latin capital letter AE |
+ * | U+00C7 | Ç | C | Latin capital letter C with cedilla |
+ * | U+00C8 | È | E | Latin capital letter E with grave |
+ * | U+00C9 | É | E | Latin capital letter E with acute |
+ * | U+00CA | Ê | E | Latin capital letter E with circumflex |
+ * | U+00CB | Ë | E | Latin capital letter E with diaeresis |
+ * | U+00CC | Ì | I | Latin capital letter I with grave |
+ * | U+00CD | Í | I | Latin capital letter I with acute |
+ * | U+00CE | Î | I | Latin capital letter I with circumflex |
+ * | U+00CF | Ï | I | Latin capital letter I with diaeresis |
+ * | U+00D0 | Ð | D | Latin capital letter Eth |
+ * | U+00D1 | Ñ | N | Latin capital letter N with tilde |
+ * | U+00D2 | Ò | O | Latin capital letter O with grave |
+ * | U+00D3 | Ó | O | Latin capital letter O with acute |
+ * | U+00D4 | Ô | O | Latin capital letter O with circumflex |
+ * | U+00D5 | Õ | O | Latin capital letter O with tilde |
+ * | U+00D6 | Ö | O | Latin capital letter O with diaeresis |
+ * | U+00D8 | Ø | O | Latin capital letter O with stroke |
+ * | U+00D9 | Ù | U | Latin capital letter U with grave |
+ * | U+00DA | Ú | U | Latin capital letter U with acute |
+ * | U+00DB | Û | U | Latin capital letter U with circumflex |
+ * | U+00DC | Ü | U | Latin capital letter U with diaeresis |
+ * | U+00DD | Ý | Y | Latin capital letter Y with acute |
+ * | U+00DE | Þ | TH | Latin capital letter Thorn |
+ * | U+00DF | ß | s | Latin small letter sharp s |
+ * | U+00E0 | à | a | Latin small letter a with grave |
+ * | U+00E1 | á | a | Latin small letter a with acute |
+ * | U+00E2 | â | a | Latin small letter a with circumflex |
+ * | U+00E3 | ã | a | Latin small letter a with tilde |
+ * | U+00E4 | ä | a | Latin small letter a with diaeresis |
+ * | U+00E5 | å | a | Latin small letter a with ring above |
+ * | U+00E6 | æ | ae | Latin small letter ae |
+ * | U+00E7 | ç | c | Latin small letter c with cedilla |
+ * | U+00E8 | è | e | Latin small letter e with grave |
+ * | U+00E9 | é | e | Latin small letter e with acute |
+ * | U+00EA | ê | e | Latin small letter e with circumflex |
+ * | U+00EB | ë | e | Latin small letter e with diaeresis |
+ * | U+00EC | ì | i | Latin small letter i with grave |
+ * | U+00ED | í | i | Latin small letter i with acute |
+ * | U+00EE | î | i | Latin small letter i with circumflex |
+ * | U+00EF | ï | i | Latin small letter i with diaeresis |
+ * | U+00F0 | ð | d | Latin small letter Eth |
+ * | U+00F1 | ñ | n | Latin small letter n with tilde |
+ * | U+00F2 | ò | o | Latin small letter o with grave |
+ * | U+00F3 | ó | o | Latin small letter o with acute |
+ * | U+00F4 | ô | o | Latin small letter o with circumflex |
+ * | U+00F5 | õ | o | Latin small letter o with tilde |
+ * | U+00F6 | ö | o | Latin small letter o with diaeresis |
+ * | U+00F8 | ø | o | Latin small letter o with stroke |
+ * | U+00F9 | ù | u | Latin small letter u with grave |
+ * | U+00FA | ú | u | Latin small letter u with acute |
+ * | U+00FB | û | u | Latin small letter u with circumflex |
+ * | U+00FC | ü | u | Latin small letter u with diaeresis |
+ * | U+00FD | ý | y | Latin small letter y with acute |
+ * | U+00FE | þ | th | Latin small letter Thorn |
+ * | U+00FF | ÿ | y | Latin small letter y with diaeresis |
+ *
+ * Decompositions for Latin Extended-A:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | ------- | ----- | ----------- | ------------------------------------------------- |
+ * | U+0100 | Ā | A | Latin capital letter A with macron |
+ * | U+0101 | ā | a | Latin small letter a with macron |
+ * | U+0102 | Ă | A | Latin capital letter A with breve |
+ * | U+0103 | ă | a | Latin small letter a with breve |
+ * | U+0104 | Ą | A | Latin capital letter A with ogonek |
+ * | U+0105 | ą | a | Latin small letter a with ogonek |
+ * | U+01006 | Ć | C | Latin capital letter C with acute |
+ * | U+0107 | ć | c | Latin small letter c with acute |
+ * | U+0108 | Ĉ | C | Latin capital letter C with circumflex |
+ * | U+0109 | ĉ | c | Latin small letter c with circumflex |
+ * | U+010A | Ċ | C | Latin capital letter C with dot above |
+ * | U+010B | ċ | c | Latin small letter c with dot above |
+ * | U+010C | Č | C | Latin capital letter C with caron |
+ * | U+010D | č | c | Latin small letter c with caron |
+ * | U+010E | Ď | D | Latin capital letter D with caron |
+ * | U+010F | ď | d | Latin small letter d with caron |
+ * | U+0110 | Đ | D | Latin capital letter D with stroke |
+ * | U+0111 | đ | d | Latin small letter d with stroke |
+ * | U+0112 | Ē | E | Latin capital letter E with macron |
+ * | U+0113 | ē | e | Latin small letter e with macron |
+ * | U+0114 | Ĕ | E | Latin capital letter E with breve |
+ * | U+0115 | ĕ | e | Latin small letter e with breve |
+ * | U+0116 | Ė | E | Latin capital letter E with dot above |
+ * | U+0117 | ė | e | Latin small letter e with dot above |
+ * | U+0118 | Ę | E | Latin capital letter E with ogonek |
+ * | U+0119 | ę | e | Latin small letter e with ogonek |
+ * | U+011A | Ě | E | Latin capital letter E with caron |
+ * | U+011B | ě | e | Latin small letter e with caron |
+ * | U+011C | Ĝ | G | Latin capital letter G with circumflex |
+ * | U+011D | ĝ | g | Latin small letter g with circumflex |
+ * | U+011E | Ğ | G | Latin capital letter G with breve |
+ * | U+011F | ğ | g | Latin small letter g with breve |
+ * | U+0120 | Ġ | G | Latin capital letter G with dot above |
+ * | U+0121 | ġ | g | Latin small letter g with dot above |
+ * | U+0122 | Ģ | G | Latin capital letter G with cedilla |
+ * | U+0123 | ģ | g | Latin small letter g with cedilla |
+ * | U+0124 | Ĥ | H | Latin capital letter H with circumflex |
+ * | U+0125 | ĥ | h | Latin small letter h with circumflex |
+ * | U+0126 | Ħ | H | Latin capital letter H with stroke |
+ * | U+0127 | ħ | h | Latin small letter h with stroke |
+ * | U+0128 | Ĩ | I | Latin capital letter I with tilde |
+ * | U+0129 | ĩ | i | Latin small letter i with tilde |
+ * | U+012A | Ī | I | Latin capital letter I with macron |
+ * | U+012B | ī | i | Latin small letter i with macron |
+ * | U+012C | Ĭ | I | Latin capital letter I with breve |
+ * | U+012D | ĭ | i | Latin small letter i with breve |
+ * | U+012E | Į | I | Latin capital letter I with ogonek |
+ * | U+012F | į | i | Latin small letter i with ogonek |
+ * | U+0130 | İ | I | Latin capital letter I with dot above |
+ * | U+0131 | ı | i | Latin small letter dotless i |
+ * | U+0132 | IJ | IJ | Latin capital ligature IJ |
+ * | U+0133 | ij | ij | Latin small ligature ij |
+ * | U+0134 | Ĵ | J | Latin capital letter J with circumflex |
+ * | U+0135 | ĵ | j | Latin small letter j with circumflex |
+ * | U+0136 | Ķ | K | Latin capital letter K with cedilla |
+ * | U+0137 | ķ | k | Latin small letter k with cedilla |
+ * | U+0138 | ĸ | k | Latin small letter Kra |
+ * | U+0139 | Ĺ | L | Latin capital letter L with acute |
+ * | U+013A | ĺ | l | Latin small letter l with acute |
+ * | U+013B | Ļ | L | Latin capital letter L with cedilla |
+ * | U+013C | ļ | l | Latin small letter l with cedilla |
+ * | U+013D | Ľ | L | Latin capital letter L with caron |
+ * | U+013E | ľ | l | Latin small letter l with caron |
+ * | U+013F | Ŀ | L | Latin capital letter L with middle dot |
+ * | U+0140 | ŀ | l | Latin small letter l with middle dot |
+ * | U+0141 | Ł | L | Latin capital letter L with stroke |
+ * | U+0142 | ł | l | Latin small letter l with stroke |
+ * | U+0143 | Ń | N | Latin capital letter N with acute |
+ * | U+0144 | ń | n | Latin small letter N with acute |
+ * | U+0145 | Ņ | N | Latin capital letter N with cedilla |
+ * | U+0146 | ņ | n | Latin small letter n with cedilla |
+ * | U+0147 | Ň | N | Latin capital letter N with caron |
+ * | U+0148 | ň | n | Latin small letter n with caron |
+ * | U+0149 | ʼn | N | Latin small letter n preceded by apostrophe |
+ * | U+014A | Ŋ | n | Latin capital letter Eng |
+ * | U+014B | ŋ | N | Latin small letter Eng |
+ * | U+014C | Ō | O | Latin capital letter O with macron |
+ * | U+014D | ō | o | Latin small letter o with macron |
+ * | U+014E | Ŏ | O | Latin capital letter O with breve |
+ * | U+014F | ŏ | o | Latin small letter o with breve |
+ * | U+0150 | Ő | O | Latin capital letter O with double acute |
+ * | U+0151 | ő | o | Latin small letter o with double acute |
+ * | U+0152 | Œ | OE | Latin capital ligature OE |
+ * | U+0153 | œ | oe | Latin small ligature oe |
+ * | U+0154 | Ŕ | R | Latin capital letter R with acute |
+ * | U+0155 | ŕ | r | Latin small letter r with acute |
+ * | U+0156 | Ŗ | R | Latin capital letter R with cedilla |
+ * | U+0157 | ŗ | r | Latin small letter r with cedilla |
+ * | U+0158 | Ř | R | Latin capital letter R with caron |
+ * | U+0159 | ř | r | Latin small letter r with caron |
+ * | U+015A | Ś | S | Latin capital letter S with acute |
+ * | U+015B | ś | s | Latin small letter s with acute |
+ * | U+015C | Ŝ | S | Latin capital letter S with circumflex |
+ * | U+015D | ŝ | s | Latin small letter s with circumflex |
+ * | U+015E | Ş | S | Latin capital letter S with cedilla |
+ * | U+015F | ş | s | Latin small letter s with cedilla |
+ * | U+0160 | Š | S | Latin capital letter S with caron |
+ * | U+0161 | š | s | Latin small letter s with caron |
+ * | U+0162 | Ţ | T | Latin capital letter T with cedilla |
+ * | U+0163 | ţ | t | Latin small letter t with cedilla |
+ * | U+0164 | Ť | T | Latin capital letter T with caron |
+ * | U+0165 | ť | t | Latin small letter t with caron |
+ * | U+0166 | Ŧ | T | Latin capital letter T with stroke |
+ * | U+0167 | ŧ | t | Latin small letter t with stroke |
+ * | U+0168 | Ũ | U | Latin capital letter U with tilde |
+ * | U+0169 | ũ | u | Latin small letter u with tilde |
+ * | U+016A | Ū | U | Latin capital letter U with macron |
+ * | U+016B | ū | u | Latin small letter u with macron |
+ * | U+016C | Ŭ | U | Latin capital letter U with breve |
+ * | U+016D | ŭ | u | Latin small letter u with breve |
+ * | U+016E | Ů | U | Latin capital letter U with ring above |
+ * | U+016F | ů | u | Latin small letter u with ring above |
+ * | U+0170 | Ű | U | Latin capital letter U with double acute |
+ * | U+0171 | ű | u | Latin small letter u with double acute |
+ * | U+0172 | Ų | U | Latin capital letter U with ogonek |
+ * | U+0173 | ų | u | Latin small letter u with ogonek |
+ * | U+0174 | Ŵ | W | Latin capital letter W with circumflex |
+ * | U+0175 | ŵ | w | Latin small letter w with circumflex |
+ * | U+0176 | Ŷ | Y | Latin capital letter Y with circumflex |
+ * | U+0177 | ŷ | y | Latin small letter y with circumflex |
+ * | U+0178 | Ÿ | Y | Latin capital letter Y with diaeresis |
+ * | U+0179 | Ź | Z | Latin capital letter Z with acute |
+ * | U+017A | ź | z | Latin small letter z with acute |
+ * | U+017B | Ż | Z | Latin capital letter Z with dot above |
+ * | U+017C | ż | z | Latin small letter z with dot above |
+ * | U+017D | Ž | Z | Latin capital letter Z with caron |
+ * | U+017E | ž | z | Latin small letter z with caron |
+ * | U+017F | ſ | s | Latin small letter long s |
+ * | U+01A0 | Ơ | O | Latin capital letter O with horn |
+ * | U+01A1 | ơ | o | Latin small letter o with horn |
+ * | U+01AF | Ư | U | Latin capital letter U with horn |
+ * | U+01B0 | ư | u | Latin small letter u with horn |
+ * | U+01CD | Ǎ | A | Latin capital letter A with caron |
+ * | U+01CE | ǎ | a | Latin small letter a with caron |
+ * | U+01CF | Ǐ | I | Latin capital letter I with caron |
+ * | U+01D0 | ǐ | i | Latin small letter i with caron |
+ * | U+01D1 | Ǒ | O | Latin capital letter O with caron |
+ * | U+01D2 | ǒ | o | Latin small letter o with caron |
+ * | U+01D3 | Ǔ | U | Latin capital letter U with caron |
+ * | U+01D4 | ǔ | u | Latin small letter u with caron |
+ * | U+01D5 | Ǖ | U | Latin capital letter U with diaeresis and macron |
+ * | U+01D6 | ǖ | u | Latin small letter u with diaeresis and macron |
+ * | U+01D7 | Ǘ | U | Latin capital letter U with diaeresis and acute |
+ * | U+01D8 | ǘ | u | Latin small letter u with diaeresis and acute |
+ * | U+01D9 | Ǚ | U | Latin capital letter U with diaeresis and caron |
+ * | U+01DA | ǚ | u | Latin small letter u with diaeresis and caron |
+ * | U+01DB | Ǜ | U | Latin capital letter U with diaeresis and grave |
+ * | U+01DC | ǜ | u | Latin small letter u with diaeresis and grave |
+ *
+ * Decompositions for Latin Extended-B:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | ----------------------------------------- |
+ * | U+0218 | Ș | S | Latin capital letter S with comma below |
+ * | U+0219 | ș | s | Latin small letter s with comma below |
+ * | U+021A | Ț | T | Latin capital letter T with comma below |
+ * | U+021B | ț | t | Latin small letter t with comma below |
+ *
+ * Vowels with diacritic (Chinese, Hanyu Pinyin):
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | ----------------------------------------------------- |
+ * | U+0251 | ɑ | a | Latin small letter alpha |
+ * | U+1EA0 | Ạ | A | Latin capital letter A with dot below |
+ * | U+1EA1 | ạ | a | Latin small letter a with dot below |
+ * | U+1EA2 | Ả | A | Latin capital letter A with hook above |
+ * | U+1EA3 | ả | a | Latin small letter a with hook above |
+ * | U+1EA4 | Ấ | A | Latin capital letter A with circumflex and acute |
+ * | U+1EA5 | ấ | a | Latin small letter a with circumflex and acute |
+ * | U+1EA6 | Ầ | A | Latin capital letter A with circumflex and grave |
+ * | U+1EA7 | ầ | a | Latin small letter a with circumflex and grave |
+ * | U+1EA8 | Ẩ | A | Latin capital letter A with circumflex and hook above |
+ * | U+1EA9 | ẩ | a | Latin small letter a with circumflex and hook above |
+ * | U+1EAA | Ẫ | A | Latin capital letter A with circumflex and tilde |
+ * | U+1EAB | ẫ | a | Latin small letter a with circumflex and tilde |
+ * | U+1EA6 | Ậ | A | Latin capital letter A with circumflex and dot below |
+ * | U+1EAD | ậ | a | Latin small letter a with circumflex and dot below |
+ * | U+1EAE | Ắ | A | Latin capital letter A with breve and acute |
+ * | U+1EAF | ắ | a | Latin small letter a with breve and acute |
+ * | U+1EB0 | Ằ | A | Latin capital letter A with breve and grave |
+ * | U+1EB1 | ằ | a | Latin small letter a with breve and grave |
+ * | U+1EB2 | Ẳ | A | Latin capital letter A with breve and hook above |
+ * | U+1EB3 | ẳ | a | Latin small letter a with breve and hook above |
+ * | U+1EB4 | Ẵ | A | Latin capital letter A with breve and tilde |
+ * | U+1EB5 | ẵ | a | Latin small letter a with breve and tilde |
+ * | U+1EB6 | Ặ | A | Latin capital letter A with breve and dot below |
+ * | U+1EB7 | ặ | a | Latin small letter a with breve and dot below |
+ * | U+1EB8 | Ẹ | E | Latin capital letter E with dot below |
+ * | U+1EB9 | ẹ | e | Latin small letter e with dot below |
+ * | U+1EBA | Ẻ | E | Latin capital letter E with hook above |
+ * | U+1EBB | ẻ | e | Latin small letter e with hook above |
+ * | U+1EBC | Ẽ | E | Latin capital letter E with tilde |
+ * | U+1EBD | ẽ | e | Latin small letter e with tilde |
+ * | U+1EBE | Ế | E | Latin capital letter E with circumflex and acute |
+ * | U+1EBF | ế | e | Latin small letter e with circumflex and acute |
+ * | U+1EC0 | Ề | E | Latin capital letter E with circumflex and grave |
+ * | U+1EC1 | ề | e | Latin small letter e with circumflex and grave |
+ * | U+1EC2 | Ể | E | Latin capital letter E with circumflex and hook above |
+ * | U+1EC3 | ể | e | Latin small letter e with circumflex and hook above |
+ * | U+1EC4 | Ễ | E | Latin capital letter E with circumflex and tilde |
+ * | U+1EC5 | ễ | e | Latin small letter e with circumflex and tilde |
+ * | U+1EC6 | Ệ | E | Latin capital letter E with circumflex and dot below |
+ * | U+1EC7 | ệ | e | Latin small letter e with circumflex and dot below |
+ * | U+1EC8 | Ỉ | I | Latin capital letter I with hook above |
+ * | U+1EC9 | ỉ | i | Latin small letter i with hook above |
+ * | U+1ECA | Ị | I | Latin capital letter I with dot below |
+ * | U+1ECB | ị | i | Latin small letter i with dot below |
+ * | U+1ECC | Ọ | O | Latin capital letter O with dot below |
+ * | U+1ECD | ọ | o | Latin small letter o with dot below |
+ * | U+1ECE | Ỏ | O | Latin capital letter O with hook above |
+ * | U+1ECF | ỏ | o | Latin small letter o with hook above |
+ * | U+1ED0 | Ố | O | Latin capital letter O with circumflex and acute |
+ * | U+1ED1 | ố | o | Latin small letter o with circumflex and acute |
+ * | U+1ED2 | Ồ | O | Latin capital letter O with circumflex and grave |
+ * | U+1ED3 | ồ | o | Latin small letter o with circumflex and grave |
+ * | U+1ED4 | Ổ | O | Latin capital letter O with circumflex and hook above |
+ * | U+1ED5 | ổ | o | Latin small letter o with circumflex and hook above |
+ * | U+1ED6 | Ỗ | O | Latin capital letter O with circumflex and tilde |
+ * | U+1ED7 | ỗ | o | Latin small letter o with circumflex and tilde |
+ * | U+1ED8 | Ộ | O | Latin capital letter O with circumflex and dot below |
+ * | U+1ED9 | ộ | o | Latin small letter o with circumflex and dot below |
+ * | U+1EDA | Ớ | O | Latin capital letter O with horn and acute |
+ * | U+1EDB | ớ | o | Latin small letter o with horn and acute |
+ * | U+1EDC | Ờ | O | Latin capital letter O with horn and grave |
+ * | U+1EDD | ờ | o | Latin small letter o with horn and grave |
+ * | U+1EDE | Ở | O | Latin capital letter O with horn and hook above |
+ * | U+1EDF | ở | o | Latin small letter o with horn and hook above |
+ * | U+1EE0 | Ỡ | O | Latin capital letter O with horn and tilde |
+ * | U+1EE1 | ỡ | o | Latin small letter o with horn and tilde |
+ * | U+1EE2 | Ợ | O | Latin capital letter O with horn and dot below |
+ * | U+1EE3 | ợ | o | Latin small letter o with horn and dot below |
+ * | U+1EE4 | Ụ | U | Latin capital letter U with dot below |
+ * | U+1EE5 | ụ | u | Latin small letter u with dot below |
+ * | U+1EE6 | Ủ | U | Latin capital letter U with hook above |
+ * | U+1EE7 | ủ | u | Latin small letter u with hook above |
+ * | U+1EE8 | Ứ | U | Latin capital letter U with horn and acute |
+ * | U+1EE9 | ứ | u | Latin small letter u with horn and acute |
+ * | U+1EEA | Ừ | U | Latin capital letter U with horn and grave |
+ * | U+1EEB | ừ | u | Latin small letter u with horn and grave |
+ * | U+1EEC | Ử | U | Latin capital letter U with horn and hook above |
+ * | U+1EED | ử | u | Latin small letter u with horn and hook above |
+ * | U+1EEE | Ữ | U | Latin capital letter U with horn and tilde |
+ * | U+1EEF | ữ | u | Latin small letter u with horn and tilde |
+ * | U+1EF0 | Ự | U | Latin capital letter U with horn and dot below |
+ * | U+1EF1 | ự | u | Latin small letter u with horn and dot below |
+ * | U+1EF2 | Ỳ | Y | Latin capital letter Y with grave |
+ * | U+1EF3 | ỳ | y | Latin small letter y with grave |
+ * | U+1EF4 | Ỵ | Y | Latin capital letter Y with dot below |
+ * | U+1EF5 | ỵ | y | Latin small letter y with dot below |
+ * | U+1EF6 | Ỷ | Y | Latin capital letter Y with hook above |
+ * | U+1EF7 | ỷ | y | Latin small letter y with hook above |
+ * | U+1EF8 | Ỹ | Y | Latin capital letter Y with tilde |
+ * | U+1EF9 | ỹ | y | Latin small letter y with tilde |
+ *
+ * German (`de_DE`), German formal (`de_DE_formal`), German (Switzerland) formal (`de_CH`),
+ * and German (Switzerland) informal (`de_CH_informal`) locales:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | --------------------------------------- |
+ * | U+00C4 | Ä | Ae | Latin capital letter A with diaeresis |
+ * | U+00E4 | ä | ae | Latin small letter a with diaeresis |
+ * | U+00D6 | Ö | Oe | Latin capital letter O with diaeresis |
+ * | U+00F6 | ö | oe | Latin small letter o with diaeresis |
+ * | U+00DC | Ü | Ue | Latin capital letter U with diaeresis |
+ * | U+00FC | ü | ue | Latin small letter u with diaeresis |
+ * | U+00DF | ß | ss | Latin small letter sharp s |
+ *
+ * Danish (`da_DK`) locale:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | --------------------------------------- |
+ * | U+00C6 | Æ | Ae | Latin capital letter AE |
+ * | U+00E6 | æ | ae | Latin small letter ae |
+ * | U+00D8 | Ø | Oe | Latin capital letter O with stroke |
+ * | U+00F8 | ø | oe | Latin small letter o with stroke |
+ * | U+00C5 | Å | Aa | Latin capital letter A with ring above |
+ * | U+00E5 | å | aa | Latin small letter a with ring above |
+ *
+ * Catalan (`ca`) locale:
+ *
+ * | Code | Glyph | Replacement | Description |
+ * | -------- | ----- | ----------- | --------------------------------------- |
+ * | U+00B7 | l·l | ll | Flown dot (between two Ls) |
+ *
+ * @since 1.2.1
+ * @since 4.6.0 Added locale support for `de_CH`, `de_CH_informal`, and `ca`.
+ *
+ * @param string $string Text that might have accent characters
+ * @return string Filtered string with replaced "nice" characters.
+ */
+function remove_accents( $string ) {
+ if ( !preg_match('/[\x80-\xff]/', $string) )
+ return $string;
+
+ if (seems_utf8($string)) {
+ $chars = array(
+ // Decompositions for Latin-1 Supplement
+ chr(194).chr(170) => 'a', chr(194).chr(186) => 'o',
+ chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
+ chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
+ chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
+ chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C',
+ chr(195).chr(136) => 'E', chr(195).chr(137) => 'E',
+ chr(195).chr(138) => 'E', chr(195).chr(139) => 'E',
+ chr(195).chr(140) => 'I', chr(195).chr(141) => 'I',
+ chr(195).chr(142) => 'I', chr(195).chr(143) => 'I',
+ chr(195).chr(144) => 'D', chr(195).chr(145) => 'N',
+ chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
+ chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
+ chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
+ chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
+ chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
+ chr(195).chr(158) => 'TH',chr(195).chr(159) => 's',
+ chr(195).chr(160) => 'a', chr(195).chr(161) => 'a',
+ chr(195).chr(162) => 'a', chr(195).chr(163) => 'a',
+ chr(195).chr(164) => 'a', chr(195).chr(165) => 'a',
+ chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c',
+ chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
+ chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
+ chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
+ chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
+ chr(195).chr(176) => 'd', chr(195).chr(177) => 'n',
+ chr(195).chr(178) => 'o', chr(195).chr(179) => 'o',
+ chr(195).chr(180) => 'o', chr(195).chr(181) => 'o',
+ chr(195).chr(182) => 'o', chr(195).chr(184) => 'o',
+ chr(195).chr(185) => 'u', chr(195).chr(186) => 'u',
+ chr(195).chr(187) => 'u', chr(195).chr(188) => 'u',
+ chr(195).chr(189) => 'y', chr(195).chr(190) => 'th',
+ chr(195).chr(191) => 'y', chr(195).chr(152) => 'O',
+ // Decompositions for Latin Extended-A
+ chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
+ chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
+ chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
+ chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
+ chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
+ chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
+ chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
+ chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
+ chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
+ chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
+ chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
+ chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
+ chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
+ chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
+ chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
+ chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
+ chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
+ chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
+ chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
+ chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
+ chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
+ chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
+ chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
+ chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
+ chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
+ chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
+ chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
+ chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
+ chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
+ chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
+ chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
+ chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
+ chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
+ chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
+ chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
+ chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
+ chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
+ chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
+ chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
+ chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
+ chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
+ chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
+ chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
+ chr(197).chr(150) => 'R',chr(197).chr(151) => 'r',
+ chr(197).chr(152) => 'R',chr(197).chr(153) => 'r',
+ chr(197).chr(154) => 'S',chr(197).chr(155) => 's',
+ chr(197).chr(156) => 'S',chr(197).chr(157) => 's',
+ chr(197).chr(158) => 'S',chr(197).chr(159) => 's',
+ chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
+ chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
+ chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
+ chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
+ chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
+ chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
+ chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
+ chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
+ chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
+ chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
+ chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
+ chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
+ chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
+ chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
+ chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
+ chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
+ // Decompositions for Latin Extended-B
+ chr(200).chr(152) => 'S', chr(200).chr(153) => 's',
+ chr(200).chr(154) => 'T', chr(200).chr(155) => 't',
+ // Euro Sign
+ chr(226).chr(130).chr(172) => 'E',
+ // GBP (Pound) Sign
+ chr(194).chr(163) => '',
+ // Vowels with diacritic (Vietnamese)
+ // unmarked
+ chr(198).chr(160) => 'O', chr(198).chr(161) => 'o',
+ chr(198).chr(175) => 'U', chr(198).chr(176) => 'u',
+ // grave accent
+ chr(225).chr(186).chr(166) => 'A', chr(225).chr(186).chr(167) => 'a',
+ chr(225).chr(186).chr(176) => 'A', chr(225).chr(186).chr(177) => 'a',
+ chr(225).chr(187).chr(128) => 'E', chr(225).chr(187).chr(129) => 'e',
+ chr(225).chr(187).chr(146) => 'O', chr(225).chr(187).chr(147) => 'o',
+ chr(225).chr(187).chr(156) => 'O', chr(225).chr(187).chr(157) => 'o',
+ chr(225).chr(187).chr(170) => 'U', chr(225).chr(187).chr(171) => 'u',
+ chr(225).chr(187).chr(178) => 'Y', chr(225).chr(187).chr(179) => 'y',
+ // hook
+ chr(225).chr(186).chr(162) => 'A', chr(225).chr(186).chr(163) => 'a',
+ chr(225).chr(186).chr(168) => 'A', chr(225).chr(186).chr(169) => 'a',
+ chr(225).chr(186).chr(178) => 'A', chr(225).chr(186).chr(179) => 'a',
+ chr(225).chr(186).chr(186) => 'E', chr(225).chr(186).chr(187) => 'e',
+ chr(225).chr(187).chr(130) => 'E', chr(225).chr(187).chr(131) => 'e',
+ chr(225).chr(187).chr(136) => 'I', chr(225).chr(187).chr(137) => 'i',
+ chr(225).chr(187).chr(142) => 'O', chr(225).chr(187).chr(143) => 'o',
+ chr(225).chr(187).chr(148) => 'O', chr(225).chr(187).chr(149) => 'o',
+ chr(225).chr(187).chr(158) => 'O', chr(225).chr(187).chr(159) => 'o',
+ chr(225).chr(187).chr(166) => 'U', chr(225).chr(187).chr(167) => 'u',
+ chr(225).chr(187).chr(172) => 'U', chr(225).chr(187).chr(173) => 'u',
+ chr(225).chr(187).chr(182) => 'Y', chr(225).chr(187).chr(183) => 'y',
+ // tilde
+ chr(225).chr(186).chr(170) => 'A', chr(225).chr(186).chr(171) => 'a',
+ chr(225).chr(186).chr(180) => 'A', chr(225).chr(186).chr(181) => 'a',
+ chr(225).chr(186).chr(188) => 'E', chr(225).chr(186).chr(189) => 'e',
+ chr(225).chr(187).chr(132) => 'E', chr(225).chr(187).chr(133) => 'e',
+ chr(225).chr(187).chr(150) => 'O', chr(225).chr(187).chr(151) => 'o',
+ chr(225).chr(187).chr(160) => 'O', chr(225).chr(187).chr(161) => 'o',
+ chr(225).chr(187).chr(174) => 'U', chr(225).chr(187).chr(175) => 'u',
+ chr(225).chr(187).chr(184) => 'Y', chr(225).chr(187).chr(185) => 'y',
+ // acute accent
+ chr(225).chr(186).chr(164) => 'A', chr(225).chr(186).chr(165) => 'a',
+ chr(225).chr(186).chr(174) => 'A', chr(225).chr(186).chr(175) => 'a',
+ chr(225).chr(186).chr(190) => 'E', chr(225).chr(186).chr(191) => 'e',
+ chr(225).chr(187).chr(144) => 'O', chr(225).chr(187).chr(145) => 'o',
+ chr(225).chr(187).chr(154) => 'O', chr(225).chr(187).chr(155) => 'o',
+ chr(225).chr(187).chr(168) => 'U', chr(225).chr(187).chr(169) => 'u',
+ // dot below
+ chr(225).chr(186).chr(160) => 'A', chr(225).chr(186).chr(161) => 'a',
+ chr(225).chr(186).chr(172) => 'A', chr(225).chr(186).chr(173) => 'a',
+ chr(225).chr(186).chr(182) => 'A', chr(225).chr(186).chr(183) => 'a',
+ chr(225).chr(186).chr(184) => 'E', chr(225).chr(186).chr(185) => 'e',
+ chr(225).chr(187).chr(134) => 'E', chr(225).chr(187).chr(135) => 'e',
+ chr(225).chr(187).chr(138) => 'I', chr(225).chr(187).chr(139) => 'i',
+ chr(225).chr(187).chr(140) => 'O', chr(225).chr(187).chr(141) => 'o',
+ chr(225).chr(187).chr(152) => 'O', chr(225).chr(187).chr(153) => 'o',
+ chr(225).chr(187).chr(162) => 'O', chr(225).chr(187).chr(163) => 'o',
+ chr(225).chr(187).chr(164) => 'U', chr(225).chr(187).chr(165) => 'u',
+ chr(225).chr(187).chr(176) => 'U', chr(225).chr(187).chr(177) => 'u',
+ chr(225).chr(187).chr(180) => 'Y', chr(225).chr(187).chr(181) => 'y',
+ // Vowels with diacritic (Chinese, Hanyu Pinyin)
+ chr(201).chr(145) => 'a',
+ // macron
+ chr(199).chr(149) => 'U', chr(199).chr(150) => 'u',
+ // acute accent
+ chr(199).chr(151) => 'U', chr(199).chr(152) => 'u',
+ // caron
+ chr(199).chr(141) => 'A', chr(199).chr(142) => 'a',
+ chr(199).chr(143) => 'I', chr(199).chr(144) => 'i',
+ chr(199).chr(145) => 'O', chr(199).chr(146) => 'o',
+ chr(199).chr(147) => 'U', chr(199).chr(148) => 'u',
+ chr(199).chr(153) => 'U', chr(199).chr(154) => 'u',
+ // grave accent
+ chr(199).chr(155) => 'U', chr(199).chr(156) => 'u',
+ );
+
+ // Used for locale-specific rules
+ $locale = get_locale();
+
+ if ( 'de_DE' == $locale || 'de_DE_formal' == $locale || 'de_CH' == $locale || 'de_CH_informal' == $locale ) {
+ $chars[ chr(195).chr(132) ] = 'Ae';
+ $chars[ chr(195).chr(164) ] = 'ae';
+ $chars[ chr(195).chr(150) ] = 'Oe';
+ $chars[ chr(195).chr(182) ] = 'oe';
+ $chars[ chr(195).chr(156) ] = 'Ue';
+ $chars[ chr(195).chr(188) ] = 'ue';
+ $chars[ chr(195).chr(159) ] = 'ss';
+ } elseif ( 'da_DK' === $locale ) {
+ $chars[ chr(195).chr(134) ] = 'Ae';
+ $chars[ chr(195).chr(166) ] = 'ae';
+ $chars[ chr(195).chr(152) ] = 'Oe';
+ $chars[ chr(195).chr(184) ] = 'oe';
+ $chars[ chr(195).chr(133) ] = 'Aa';
+ $chars[ chr(195).chr(165) ] = 'aa';
+ } elseif ( 'ca' === $locale ) {
+ $chars[ chr(108).chr(194).chr(183).chr(108) ] = 'll';
+ }
+
+ $string = strtr($string, $chars);
+ } else {
+ $chars = array();
+ // Assume ISO-8859-1 if not UTF-8
+ $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158)
+ .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194)
+ .chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202)
+ .chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210)
+ .chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218)
+ .chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227)
+ .chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235)
+ .chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243)
+ .chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251)
+ .chr(252).chr(253).chr(255);
+
+ $chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy";
+
+ $string = strtr($string, $chars['in'], $chars['out']);
+ $double_chars = array();
+ $double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254));
+ $double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th');
+ $string = str_replace($double_chars['in'], $double_chars['out'], $string);
+ }
+
+ return $string;
+}
+
+/**
+ * Sanitizes a filename, replacing whitespace with dashes.
+ *
+ * Removes special characters that are illegal in filenames on certain
+ * operating systems and special characters requiring special escaping
+ * to manipulate at the command line. Replaces spaces and consecutive
+ * dashes with a single dash. Trims period, dash and underscore from beginning
+ * and end of filename. It is not guaranteed that this function will return a
+ * filename that is allowed to be uploaded.
+ *
+ * @since 2.1.0
+ *
+ * @param string $filename The filename to be sanitized
+ * @return string The sanitized filename
+ */
+function sanitize_file_name( $filename ) {
+ $filename_raw = $filename;
+ $special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", "%", "+", chr(0));
+ /**
+ * Filters the list of characters to remove from a filename.
+ *
+ * @since 2.8.0
+ *
+ * @param array $special_chars Characters to remove.
+ * @param string $filename_raw Filename as it was passed into sanitize_file_name().
+ */
+ $special_chars = apply_filters( 'sanitize_file_name_chars', $special_chars, $filename_raw );
+ $filename = preg_replace( "#\x{00a0}#siu", ' ', $filename );
+ $filename = str_replace( $special_chars, '', $filename );
+ $filename = str_replace( array( '%20', '+' ), '-', $filename );
+ $filename = preg_replace( '/[\r\n\t -]+/', '-', $filename );
+ $filename = trim( $filename, '.-_' );
+
+ if ( false === strpos( $filename, '.' ) ) {
+ $mime_types = wp_get_mime_types();
+ $filetype = wp_check_filetype( 'test.' . $filename, $mime_types );
+ if ( $filetype['ext'] === $filename ) {
+ $filename = 'unnamed-file.' . $filetype['ext'];
+ }
+ }
+
+ // Split the filename into a base and extension[s]
+ $parts = explode('.', $filename);
+
+ // Return if only one extension
+ if ( count( $parts ) <= 2 ) {
+ /**
+ * Filters a sanitized filename string.
+ *
+ * @since 2.8.0
+ *
+ * @param string $filename Sanitized filename.
+ * @param string $filename_raw The filename prior to sanitization.
+ */
+ return apply_filters( 'sanitize_file_name', $filename, $filename_raw );
+ }
+
+ // Process multiple extensions
+ $filename = array_shift($parts);
+ $extension = array_pop($parts);
+ $mimes = get_allowed_mime_types();
+
+ /*
+ * Loop over any intermediate extensions. Postfix them with a trailing underscore
+ * if they are a 2 - 5 character long alpha string not in the extension whitelist.
+ */
+ foreach ( (array) $parts as $part) {
+ $filename .= '.' . $part;
+
+ if ( preg_match("/^[a-zA-Z]{2,5}\d?$/", $part) ) {
+ $allowed = false;
+ foreach ( $mimes as $ext_preg => $mime_match ) {
+ $ext_preg = '!^(' . $ext_preg . ')$!i';
+ if ( preg_match( $ext_preg, $part ) ) {
+ $allowed = true;
+ break;
+ }
+ }
+ if ( !$allowed )
+ $filename .= '_';
+ }
+ }
+ $filename .= '.' . $extension;
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters('sanitize_file_name', $filename, $filename_raw);
+}
+
+/**
+ * Sanitizes a username, stripping out unsafe characters.
+ *
+ * Removes tags, octets, entities, and if strict is enabled, will only keep
+ * alphanumeric, _, space, ., -, @. After sanitizing, it passes the username,
+ * raw username (the username in the parameter), and the value of $strict as
+ * parameters for the {@see 'sanitize_user'} filter.
+ *
+ * @since 2.0.0
+ *
+ * @param string $username The username to be sanitized.
+ * @param bool $strict If set limits $username to specific characters. Default false.
+ * @return string The sanitized username, after passing through filters.
+ */
+function sanitize_user( $username, $strict = false ) {
+ $raw_username = $username;
+ $username = wp_strip_all_tags( $username );
+ $username = remove_accents( $username );
+ // Kill octets
+ $username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username );
+ $username = preg_replace( '/&.+?;/', '', $username ); // Kill entities
+
+ // If strict, reduce to ASCII for max portability.
+ if ( $strict )
+ $username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username );
+
+ $username = trim( $username );
+ // Consolidate contiguous whitespace
+ $username = preg_replace( '|\s+|', ' ', $username );
+
+ /**
+ * Filters a sanitized username string.
+ *
+ * @since 2.0.1
+ *
+ * @param string $username Sanitized username.
+ * @param string $raw_username The username prior to sanitization.
+ * @param bool $strict Whether to limit the sanitization to specific characters. Default false.
+ */
+ return apply_filters( 'sanitize_user', $username, $raw_username, $strict );
+}
+
+/**
+ * Sanitizes a string key.
+ *
+ * Keys are used as internal identifiers. Lowercase alphanumeric characters, dashes and underscores are allowed.
+ *
+ * @since 3.0.0
+ *
+ * @param string $key String key
+ * @return string Sanitized key
+ */
+function sanitize_key( $key ) {
+ $raw_key = $key;
+ $key = strtolower( $key );
+ $key = preg_replace( '/[^a-z0-9_\-]/', '', $key );
+
+ /**
+ * Filters a sanitized key string.
+ *
+ * @since 3.0.0
+ *
+ * @param string $key Sanitized key.
+ * @param string $raw_key The key prior to sanitization.
+ */
+ return apply_filters( 'sanitize_key', $key, $raw_key );
+}
+
+/**
+ * Sanitizes a title, or returns a fallback title.
+ *
+ * Specifically, HTML and PHP tags are stripped. Further actions can be added
+ * via the plugin API. If $title is empty and $fallback_title is set, the latter
+ * will be used.
+ *
+ * @since 1.0.0
+ *
+ * @param string $title The string to be sanitized.
+ * @param string $fallback_title Optional. A title to use if $title is empty.
+ * @param string $context Optional. The operation for which the string is sanitized
+ * @return string The sanitized string.
+ */
+function sanitize_title( $title, $fallback_title = '', $context = 'save' ) {
+ $raw_title = $title;
+
+ if ( 'save' == $context )
+ $title = remove_accents($title);
+
+ /**
+ * Filters a sanitized title string.
+ *
+ * @since 1.2.0
+ *
+ * @param string $title Sanitized title.
+ * @param string $raw_title The title prior to sanitization.
+ * @param string $context The context for which the title is being sanitized.
+ */
+ $title = apply_filters( 'sanitize_title', $title, $raw_title, $context );
+
+ if ( '' === $title || false === $title )
+ $title = $fallback_title;
+
+ return $title;
+}
+
+/**
+ * Sanitizes a title with the 'query' context.
+ *
+ * Used for querying the database for a value from URL.
+ *
+ * @since 3.1.0
+ *
+ * @param string $title The string to be sanitized.
+ * @return string The sanitized string.
+ */
+function sanitize_title_for_query( $title ) {
+ return sanitize_title( $title, '', 'query' );
+}
+
+/**
+ * Sanitizes a title, replacing whitespace and a few other characters with dashes.
+ *
+ * Limits the output to alphanumeric characters, underscore (_) and dash (-).
+ * Whitespace becomes a dash.
+ *
+ * @since 1.2.0
+ *
+ * @param string $title The title to be sanitized.
+ * @param string $raw_title Optional. Not used.
+ * @param string $context Optional. The operation for which the string is sanitized.
+ * @return string The sanitized title.
+ */
+function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'display' ) {
+ $title = strip_tags($title);
+ // Preserve escaped octets.
+ $title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
+ // Remove percent signs that are not part of an octet.
+ $title = str_replace('%', '', $title);
+ // Restore octets.
+ $title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
+
+ if (seems_utf8($title)) {
+ if (function_exists('mb_strtolower')) {
+ $title = mb_strtolower($title, 'UTF-8');
+ }
+ $title = utf8_uri_encode($title, 200);
+ }
+
+ $title = strtolower($title);
+
+ if ( 'save' == $context ) {
+ // Convert nbsp, ndash and mdash to hyphens
+ $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title );
+ // Convert nbsp, ndash and mdash HTML entities to hyphens
+ $title = str_replace( array( ' ', ' ', '–', '–', '—', '—' ), '-', $title );
+
+ // Strip these characters entirely
+ $title = str_replace( array(
+ // iexcl and iquest
+ '%c2%a1', '%c2%bf',
+ // angle quotes
+ '%c2%ab', '%c2%bb', '%e2%80%b9', '%e2%80%ba',
+ // curly quotes
+ '%e2%80%98', '%e2%80%99', '%e2%80%9c', '%e2%80%9d',
+ '%e2%80%9a', '%e2%80%9b', '%e2%80%9e', '%e2%80%9f',
+ // copy, reg, deg, hellip and trade
+ '%c2%a9', '%c2%ae', '%c2%b0', '%e2%80%a6', '%e2%84%a2',
+ // acute accents
+ '%c2%b4', '%cb%8a', '%cc%81', '%cd%81',
+ // grave accent, macron, caron
+ '%cc%80', '%cc%84', '%cc%8c',
+ ), '', $title );
+
+ // Convert times to x
+ $title = str_replace( '%c3%97', 'x', $title );
+ }
+
+ $title = preg_replace('/&.+?;/', '', $title); // kill entities
+ $title = str_replace('.', '-', $title);
+
+ $title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
+ $title = preg_replace('/\s+/', '-', $title);
+ $title = preg_replace('|-+|', '-', $title);
+ $title = trim($title, '-');
+
+ return $title;
+}
+
+/**
+ * Ensures a string is a valid SQL 'order by' clause.
+ *
+ * Accepts one or more columns, with or without a sort order (ASC / DESC).
+ * e.g. 'column_1', 'column_1, column_2', 'column_1 ASC, column_2 DESC' etc.
+ *
+ * Also accepts 'RAND()'.
+ *
+ * @since 2.5.1
+ *
+ * @param string $orderby Order by clause to be validated.
+ * @return string|false Returns $orderby if valid, false otherwise.
+ */
+function sanitize_sql_orderby( $orderby ) {
+ if ( preg_match( '/^\s*(([a-z0-9_]+|`[a-z0-9_]+`)(\s+(ASC|DESC))?\s*(,\s*(?=[a-z0-9_`])|$))+$/i', $orderby ) || preg_match( '/^\s*RAND\(\s*\)\s*$/i', $orderby ) ) {
+ return $orderby;
+ }
+ return false;
+}
+
+/**
+ * Sanitizes an HTML classname to ensure it only contains valid characters.
+ *
+ * Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty
+ * string then it will return the alternative value supplied.
+ *
+ * @todo Expand to support the full range of CDATA that a class attribute can contain.
+ *
+ * @since 2.8.0
+ *
+ * @param string $class The classname to be sanitized
+ * @param string $fallback Optional. The value to return if the sanitization ends up as an empty string.
+ * Defaults to an empty string.
+ * @return string The sanitized value
+ */
+function sanitize_html_class( $class, $fallback = '' ) {
+ //Strip out any % encoded octets
+ $sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $class );
+
+ //Limit to A-Z,a-z,0-9,_,-
+ $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized );
+
+ if ( '' == $sanitized && $fallback ) {
+ return sanitize_html_class( $fallback );
+ }
+ /**
+ * Filters a sanitized HTML class string.
+ *
+ * @since 2.8.0
+ *
+ * @param string $sanitized The sanitized HTML class.
+ * @param string $class HTML class before sanitization.
+ * @param string $fallback The fallback string.
+ */
+ return apply_filters( 'sanitize_html_class', $sanitized, $class, $fallback );
+}
+
+/**
+ * Converts lone & characters into `&` (a.k.a. `&`)
+ *
+ * @since 0.71
+ *
+ * @param string $content String of characters to be converted.
+ * @param string $deprecated Not used.
+ * @return string Converted string.
+ */
+function convert_chars( $content, $deprecated = '' ) {
+ if ( ! empty( $deprecated ) ) {
+ _deprecated_argument( __FUNCTION__, '0.71' );
+ }
+
+ if ( strpos( $content, '&' ) !== false ) {
+ $content = preg_replace( '/&([^#])(?![a-z1-4]{1,8};)/i', '&$1', $content );
+ }
+
+ return $content;
+}
+
+/**
+ * Converts invalid Unicode references range to valid range.
+ *
+ * @since 4.3.0
+ *
+ * @param string $content String with entities that need converting.
+ * @return string Converted string.
+ */
+function convert_invalid_entities( $content ) {
+ $wp_htmltranswinuni = array(
+ '€' => '€', // the Euro sign
+ '' => '',
+ '‚' => '‚', // these are Windows CP1252 specific characters
+ 'ƒ' => 'ƒ', // they would look weird on non-Windows browsers
+ '„' => '„',
+ '…' => '…',
+ '†' => '†',
+ '‡' => '‡',
+ 'ˆ' => 'ˆ',
+ '‰' => '‰',
+ 'Š' => 'Š',
+ '‹' => '‹',
+ 'Œ' => 'Œ',
+ '' => '',
+ 'Ž' => 'Ž',
+ '' => '',
+ '' => '',
+ '‘' => '‘',
+ '’' => '’',
+ '“' => '“',
+ '”' => '”',
+ '•' => '•',
+ '–' => '–',
+ '—' => '—',
+ '˜' => '˜',
+ '™' => '™',
+ 'š' => 'š',
+ '›' => '›',
+ 'œ' => 'œ',
+ '' => '',
+ 'ž' => 'ž',
+ 'Ÿ' => 'Ÿ'
+ );
+
+ if ( strpos( $content, '' ) !== false ) {
+ $content = strtr( $content, $wp_htmltranswinuni );
+ }
+
+ return $content;
+}
+
+/**
+ * Balances tags if forced to, or if the 'use_balanceTags' option is set to true.
+ *
+ * @since 0.71
+ *
+ * @param string $text Text to be balanced
+ * @param bool $force If true, forces balancing, ignoring the value of the option. Default false.
+ * @return string Balanced text
+ */
+function balanceTags( $text, $force = false ) {
+ if ( $force || get_option('use_balanceTags') == 1 ) {
+ return force_balance_tags( $text );
+ } else {
+ return $text;
+ }
+}
+
+/**
+ * Balances tags of string using a modified stack.
+ *
+ * @since 2.0.4
+ *
+ * @author Leonard Lin <leonard@acm.org>
+ * @license GPL
+ * @copyright November 4, 2001
+ * @version 1.1
+ * @todo Make better - change loop condition to $text in 1.2
+ * @internal Modified by Scott Reilly (coffee2code) 02 Aug 2004
+ * 1.1 Fixed handling of append/stack pop order of end text
+ * Added Cleaning Hooks
+ * 1.0 First Version
+ *
+ * @param string $text Text to be balanced.
+ * @return string Balanced text.
+ */
+function force_balance_tags( $text ) {
+ $tagstack = array();
+ $stacksize = 0;
+ $tagqueue = '';
+ $newtext = '';
+ // Known single-entity/self-closing tags
+ $single_tags = array( 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param', 'source' );
+ // Tags that can be immediately nested within themselves
+ $nestable_tags = array( 'blockquote', 'div', 'object', 'q', 'span' );
+
+ // WP bug fix for comments - in case you REALLY meant to type '< !--'
+ $text = str_replace('< !--', '< !--', $text);
+ // WP bug fix for LOVE <3 (and other situations with '<' before a number)
+ $text = preg_replace('#<([0-9]{1})#', '<$1', $text);
+
+ while ( preg_match("/<(\/?[\w:]*)\s*([^>]*)>/", $text, $regex) ) {
+ $newtext .= $tagqueue;
+
+ $i = strpos($text, $regex[0]);
+ $l = strlen($regex[0]);
+
+ // clear the shifter
+ $tagqueue = '';
+ // Pop or Push
+ if ( isset($regex[1][0]) && '/' == $regex[1][0] ) { // End Tag
+ $tag = strtolower(substr($regex[1],1));
+ // if too many closing tags
+ if ( $stacksize <= 0 ) {
+ $tag = '';
+ // or close to be safe $tag = '/' . $tag;
+ }
+ // if stacktop value = tag close value then pop
+ elseif ( $tagstack[$stacksize - 1] == $tag ) { // found closing tag
+ $tag = '</' . $tag . '>'; // Close Tag
+ // Pop
+ array_pop( $tagstack );
+ $stacksize--;
+ } else { // closing tag not at top, search for it
+ for ( $j = $stacksize-1; $j >= 0; $j-- ) {
+ if ( $tagstack[$j] == $tag ) {
+ // add tag to tagqueue
+ for ( $k = $stacksize-1; $k >= $j; $k--) {
+ $tagqueue .= '</' . array_pop( $tagstack ) . '>';
+ $stacksize--;
+ }
+ break;
+ }
+ }
+ $tag = '';
+ }
+ } else { // Begin Tag
+ $tag = strtolower($regex[1]);
+
+ // Tag Cleaning
+
+ // If it's an empty tag "< >", do nothing
+ if ( '' == $tag ) {
+ // do nothing
+ }
+ // ElseIf it presents itself as a self-closing tag...
+ elseif ( substr( $regex[2], -1 ) == '/' ) {
+ // ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such and
+ // immediately close it with a closing tag (the tag will encapsulate no text as a result)
+ if ( ! in_array( $tag, $single_tags ) )
+ $regex[2] = trim( substr( $regex[2], 0, -1 ) ) . "></$tag";
+ }
+ // ElseIf it's a known single-entity tag but it doesn't close itself, do so
+ elseif ( in_array($tag, $single_tags) ) {
+ $regex[2] .= '/';
+ }
+ // Else it's not a single-entity tag
+ else {
+ // If the top of the stack is the same as the tag we want to push, close previous tag
+ if ( $stacksize > 0 && !in_array($tag, $nestable_tags) && $tagstack[$stacksize - 1] == $tag ) {
+ $tagqueue = '</' . array_pop( $tagstack ) . '>';
+ $stacksize--;
+ }
+ $stacksize = array_push( $tagstack, $tag );
+ }
+
+ // Attributes
+ $attributes = $regex[2];
+ if ( ! empty( $attributes ) && $attributes[0] != '>' )
+ $attributes = ' ' . $attributes;
+
+ $tag = '<' . $tag . $attributes . '>';
+ //If already queuing a close tag, then put this tag on, too
+ if ( !empty($tagqueue) ) {
+ $tagqueue .= $tag;
+ $tag = '';
+ }
+ }
+ $newtext .= substr($text, 0, $i) . $tag;
+ $text = substr($text, $i + $l);
+ }
+
+ // Clear Tag Queue
+ $newtext .= $tagqueue;
+
+ // Add Remaining text
+ $newtext .= $text;
+
+ // Empty Stack
+ while( $x = array_pop($tagstack) )
+ $newtext .= '</' . $x . '>'; // Add remaining tags to close
+
+ // WP fix for the bug with HTML comments
+ $newtext = str_replace("< !--","<!--",$newtext);
+ $newtext = str_replace("< !--","< !--",$newtext);
+
+ return $newtext;
+}
+
+/**
+ * Acts on text which is about to be edited.
+ *
+ * The $content is run through esc_textarea(), which uses htmlspecialchars()
+ * to convert special characters to HTML entities. If `$richedit` is set to true,
+ * it is simply a holder for the {@see 'format_to_edit'} filter.
+ *
+ * @since 0.71
+ * @since 4.4.0 The `$richedit` parameter was renamed to `$rich_text` for clarity.
+ *
+ * @param string $content The text about to be edited.
+ * @param bool $rich_text Optional. Whether `$content` should be considered rich text,
+ * in which case it would not be passed through esc_textarea().
+ * Default false.
+ * @return string The text after the filter (and possibly htmlspecialchars()) has been run.
+ */
+function format_to_edit( $content, $rich_text = false ) {
+ /**
+ * Filters the text to be formatted for editing.
+ *
+ * @since 1.2.0
+ *
+ * @param string $content The text, prior to formatting for editing.
+ */
+ $content = apply_filters( 'format_to_edit', $content );
+ if ( ! $rich_text )
+ $content = esc_textarea( $content );
+ return $content;
+}
+
+/**
+ * Add leading zeros when necessary.
+ *
+ * If you set the threshold to '4' and the number is '10', then you will get
+ * back '0010'. If you set the threshold to '4' and the number is '5000', then you
+ * will get back '5000'.
+ *
+ * Uses sprintf to append the amount of zeros based on the $threshold parameter
+ * and the size of the number. If the number is large enough, then no zeros will
+ * be appended.
+ *
+ * @since 0.71
+ *
+ * @param int $number Number to append zeros to if not greater than threshold.
+ * @param int $threshold Digit places number needs to be to not have zeros added.
+ * @return string Adds leading zeros to number if needed.
+ */
+function zeroise( $number, $threshold ) {
+ return sprintf( '%0' . $threshold . 's', $number );
+}
+
+/**
+ * Adds backslashes before letters and before a number at the start of a string.
+ *
+ * @since 0.71
+ *
+ * @param string $string Value to which backslashes will be added.
+ * @return string String with backslashes inserted.
+ */
+function backslashit( $string ) {
+ if ( isset( $string[0] ) && $string[0] >= '0' && $string[0] <= '9' )
+ $string = '\\\\' . $string;
+ return addcslashes( $string, 'A..Za..z' );
+}
+
+/**
+ * Appends a trailing slash.
+ *
+ * Will remove trailing forward and backslashes if it exists already before adding
+ * a trailing forward slash. This prevents double slashing a string or path.
+ *
+ * The primary use of this is for paths and thus should be used for paths. It is
+ * not restricted to paths and offers no specific path support.
+ *
+ * @since 1.2.0
+ *
+ * @param string $string What to add the trailing slash to.
+ * @return string String with trailing slash added.
+ */
+function trailingslashit( $string ) {
+ return untrailingslashit( $string ) . '/';
+}
+
+/**
+ * Removes trailing forward slashes and backslashes if they exist.
+ *
+ * The primary use of this is for paths and thus should be used for paths. It is
+ * not restricted to paths and offers no specific path support.
+ *
+ * @since 2.2.0
+ *
+ * @param string $string What to remove the trailing slashes from.
+ * @return string String without the trailing slashes.
+ */
+function untrailingslashit( $string ) {
+ return rtrim( $string, '/\\' );
+}
+
+/**
+ * Adds slashes to escape strings.
+ *
+ * Slashes will first be removed if magic_quotes_gpc is set, see {@link
+ * https://secure.php.net/magic_quotes} for more details.
+ *
+ * @since 0.71
+ *
+ * @param string $gpc The string returned from HTTP request data.
+ * @return string Returns a string escaped with slashes.
+ */
+function addslashes_gpc($gpc) {
+ if ( get_magic_quotes_gpc() )
+ $gpc = stripslashes($gpc);
+
+ return wp_slash($gpc);
+}
+
+/**
+ * Navigates through an array, object, or scalar, and removes slashes from the values.
+ *
+ * @since 2.0.0
+ *
+ * @param mixed $value The value to be stripped.
+ * @return mixed Stripped value.
+ */
+function stripslashes_deep( $value ) {
+ return map_deep( $value, 'stripslashes_from_strings_only' );
+}
+
+/**
+ * Callback function for `stripslashes_deep()` which strips slashes from strings.
+ *
+ * @since 4.4.0
+ *
+ * @param mixed $value The array or string to be stripped.
+ * @return mixed $value The stripped value.
+ */
+function stripslashes_from_strings_only( $value ) {
+ return is_string( $value ) ? stripslashes( $value ) : $value;
+}
+
+/**
+ * Navigates through an array, object, or scalar, and encodes the values to be used in a URL.
+ *
+ * @since 2.2.0
+ *
+ * @param mixed $value The array or string to be encoded.
+ * @return mixed $value The encoded value.
+ */
+function urlencode_deep( $value ) {
+ return map_deep( $value, 'urlencode' );
+}
+
+/**
+ * Navigates through an array, object, or scalar, and raw-encodes the values to be used in a URL.
+ *
+ * @since 3.4.0
+ *
+ * @param mixed $value The array or string to be encoded.
+ * @return mixed $value The encoded value.
+ */
+function rawurlencode_deep( $value ) {
+ return map_deep( $value, 'rawurlencode' );
+}
+
+/**
+ * Navigates through an array, object, or scalar, and decodes URL-encoded values
+ *
+ * @since 4.4.0
+ *
+ * @param mixed $value The array or string to be decoded.
+ * @return mixed $value The decoded value.
+ */
+function urldecode_deep( $value ) {
+ return map_deep( $value, 'urldecode' );
+}
+
+/**
+ * Converts email addresses characters to HTML entities to block spam bots.
+ *
+ * @since 0.71
+ *
+ * @param string $email_address Email address.
+ * @param int $hex_encoding Optional. Set to 1 to enable hex encoding.
+ * @return string Converted email address.
+ */
+function antispambot( $email_address, $hex_encoding = 0 ) {
+ $email_no_spam_address = '';
+ for ( $i = 0, $len = strlen( $email_address ); $i < $len; $i++ ) {
+ $j = rand( 0, 1 + $hex_encoding );
+ if ( $j == 0 ) {
+ $email_no_spam_address .= '&#' . ord( $email_address[$i] ) . ';';
+ } elseif ( $j == 1 ) {
+ $email_no_spam_address .= $email_address[$i];
+ } elseif ( $j == 2 ) {
+ $email_no_spam_address .= '%' . zeroise( dechex( ord( $email_address[$i] ) ), 2 );
+ }
+ }
+
+ return str_replace( '@', '@', $email_no_spam_address );
+}
+
+/**
+ * Callback to convert URI match to HTML A element.
+ *
+ * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable().
+ *
+ * @since 2.3.2
+ * @access private
+ *
+ * @param array $matches Single Regex Match.
+ * @return string HTML A element with URI address.
+ */
+function _make_url_clickable_cb( $matches ) {
+ $url = $matches[2];
+
+ if ( ')' == $matches[3] && strpos( $url, '(' ) ) {
+ // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, add the closing parenthesis to the URL.
+ // Then we can let the parenthesis balancer do its thing below.
+ $url .= $matches[3];
+ $suffix = '';
+ } else {
+ $suffix = $matches[3];
+ }
+
+ // Include parentheses in the URL only if paired
+ while ( substr_count( $url, '(' ) < substr_count( $url, ')' ) ) {
+ $suffix = strrchr( $url, ')' ) . $suffix;
+ $url = substr( $url, 0, strrpos( $url, ')' ) );
+ }
+
+ $url = esc_url($url);
+ if ( empty($url) )
+ return $matches[0];
+
+ return $matches[1] . "<a href=\"$url\" rel=\"nofollow\">$url</a>" . $suffix;
+}
+
+/**
+ * Callback to convert URL match to HTML A element.
+ *
+ * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable().
+ *
+ * @since 2.3.2
+ * @access private
+ *
+ * @param array $matches Single Regex Match.
+ * @return string HTML A element with URL address.
+ */
+function _make_web_ftp_clickable_cb( $matches ) {
+ $ret = '';
+ $dest = $matches[2];
+ $dest = 'http://' . $dest;
+
+ // removed trailing [.,;:)] from URL
+ if ( in_array( substr($dest, -1), array('.', ',', ';', ':', ')') ) === true ) {
+ $ret = substr($dest, -1);
+ $dest = substr($dest, 0, strlen($dest)-1);
+ }
+
+ $dest = esc_url($dest);
+ if ( empty($dest) )
+ return $matches[0];
+
+ return $matches[1] . "<a href=\"$dest\" rel=\"nofollow\">$dest</a>$ret";
+}
+
+/**
+ * Callback to convert email address match to HTML A element.
+ *
+ * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable().
+ *
+ * @since 2.3.2
+ * @access private
+ *
+ * @param array $matches Single Regex Match.
+ * @return string HTML A element with email address.
+ */
+function _make_email_clickable_cb( $matches ) {
+ $email = $matches[2] . '@' . $matches[3];
+ return $matches[1] . "<a href=\"mailto:$email\">$email</a>";
+}
+
+/**
+ * Convert plaintext URI to HTML links.
+ *
+ * Converts URI, www and ftp, and email addresses. Finishes by fixing links
+ * within links.
+ *
+ * @since 0.71
+ *
+ * @param string $text Content to convert URIs.
+ * @return string Content with converted URIs.
+ */
+function make_clickable( $text ) {
+ $r = '';
+ $textarr = preg_split( '/(<[^<>]+>)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // split out HTML tags
+ $nested_code_pre = 0; // Keep track of how many levels link is nested inside <pre> or <code>
+ foreach ( $textarr as $piece ) {
+
+ if ( preg_match( '|^<code[\s>]|i', $piece ) || preg_match( '|^<pre[\s>]|i', $piece ) || preg_match( '|^<script[\s>]|i', $piece ) || preg_match( '|^<style[\s>]|i', $piece ) )
+ $nested_code_pre++;
+ elseif ( $nested_code_pre && ( '</code>' === strtolower( $piece ) || '</pre>' === strtolower( $piece ) || '</script>' === strtolower( $piece ) || '</style>' === strtolower( $piece ) ) )
+ $nested_code_pre--;
+
+ if ( $nested_code_pre || empty( $piece ) || ( $piece[0] === '<' && ! preg_match( '|^<\s*[\w]{1,20}+://|', $piece ) ) ) {
+ $r .= $piece;
+ continue;
+ }
+
+ // Long strings might contain expensive edge cases ...
+ if ( 10000 < strlen( $piece ) ) {
+ // ... break it up
+ foreach ( _split_str_by_whitespace( $piece, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses
+ if ( 2101 < strlen( $chunk ) ) {
+ $r .= $chunk; // Too big, no whitespace: bail.
+ } else {
+ $r .= make_clickable( $chunk );
+ }
+ }
+ } else {
+ $ret = " $piece "; // Pad with whitespace to simplify the regexes
+
+ $url_clickable = '~
+ ([\\s(<.,;:!?]) # 1: Leading whitespace, or punctuation
+ ( # 2: URL
+ [\\w]{1,20}+:// # Scheme and hier-part prefix
+ (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long
+ [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character
+ (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character
+ [\'.,;:!?)] # Punctuation URL character
+ [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character
+ )*
+ )
+ (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing)
+ ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character.
+ // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times.
+
+ $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret );
+
+ $ret = preg_replace_callback( '#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret );
+ $ret = preg_replace_callback( '#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret );
+
+ $ret = substr( $ret, 1, -1 ); // Remove our whitespace padding.
+ $r .= $ret;
+ }
+ }
+
+ // Cleanup of accidental links within links
+ return preg_replace( '#(<a([ \r\n\t]+[^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i', "$1$3</a>", $r );
+}
+
+/**
+ * Breaks a string into chunks by splitting at whitespace characters.
+ * The length of each returned chunk is as close to the specified length goal as possible,
+ * with the caveat that each chunk includes its trailing delimiter.
+ * Chunks longer than the goal are guaranteed to not have any inner whitespace.
+ *
+ * Joining the returned chunks with empty delimiters reconstructs the input string losslessly.
+ *
+ * Input string must have no null characters (or eventual transformations on output chunks must not care about null characters)
+ *
+ * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234 890 123456789 1234567890a 45678 1 3 5 7 90 ", 10 ) ==
+ * array (
+ * 0 => '1234 67890 ', // 11 characters: Perfect split
+ * 1 => '1234 ', // 5 characters: '1234 67890a' was too long
+ * 2 => '67890a cd ', // 10 characters: '67890a cd 1234' was too long
+ * 3 => '1234 890 ', // 11 characters: Perfect split
+ * 4 => '123456789 ', // 10 characters: '123456789 1234567890a' was too long
+ * 5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split
+ * 6 => ' 45678 ', // 11 characters: Perfect split
+ * 7 => '1 3 5 7 90 ', // 11 characters: End of $string
+ * );
+ *
+ * @since 3.4.0
+ * @access private
+ *
+ * @param string $string The string to split.
+ * @param int $goal The desired chunk length.
+ * @return array Numeric array of chunks.
+ */
+function _split_str_by_whitespace( $string, $goal ) {
+ $chunks = array();
+
+ $string_nullspace = strtr( $string, "\r\n\t\v\f ", "\000\000\000\000\000\000" );
+
+ while ( $goal < strlen( $string_nullspace ) ) {
+ $pos = strrpos( substr( $string_nullspace, 0, $goal + 1 ), "\000" );
+
+ if ( false === $pos ) {
+ $pos = strpos( $string_nullspace, "\000", $goal + 1 );
+ if ( false === $pos ) {
+ break;
+ }
+ }
+
+ $chunks[] = substr( $string, 0, $pos + 1 );
+ $string = substr( $string, $pos + 1 );
+ $string_nullspace = substr( $string_nullspace, $pos + 1 );
+ }
+
+ if ( $string ) {
+ $chunks[] = $string;
+ }
+
+ return $chunks;
+}
+
+/**
+ * Adds rel nofollow string to all HTML A elements in content.
+ *
+ * @since 1.5.0
+ *
+ * @param string $text Content that may contain HTML A elements.
+ * @return string Converted content.
+ */
+function wp_rel_nofollow( $text ) {
+ // This is a pre save filter, so text is already escaped.
+ $text = stripslashes($text);
+ $text = preg_replace_callback('|<a (.+?)>|i', 'wp_rel_nofollow_callback', $text);
+ return wp_slash( $text );
+}
+
+/**
+ * Callback to add rel=nofollow string to HTML A element.
+ *
+ * Will remove already existing rel="nofollow" and rel='nofollow' from the
+ * string to prevent from invalidating (X)HTML.
+ *
+ * @since 2.3.0
+ *
+ * @param array $matches Single Match
+ * @return string HTML A Element with rel nofollow.
+ */
+function wp_rel_nofollow_callback( $matches ) {
+ $text = $matches[1];
+ $atts = shortcode_parse_atts( $matches[1] );
+ $rel = 'nofollow';
+
+ if ( preg_match( '%href=["\'](' . preg_quote( set_url_scheme( home_url(), 'http' ) ) . ')%i', $text ) ||
+ preg_match( '%href=["\'](' . preg_quote( set_url_scheme( home_url(), 'https' ) ) . ')%i', $text )
+ ) {
+ return "<a $text>";
+ }
+
+ if ( ! empty( $atts['rel'] ) ) {
+ $parts = array_map( 'trim', explode( ' ', $atts['rel'] ) );
+ if ( false === array_search( 'nofollow', $parts ) ) {
+ $parts[] = 'nofollow';
+ }
+ $rel = implode( ' ', $parts );
+ unset( $atts['rel'] );
+
+ $html = '';
+ foreach ( $atts as $name => $value ) {
+ $html .= "{$name}=\"$value\" ";
+ }
+ $text = trim( $html );
+ }
+ return "<a $text rel=\"$rel\">";
+}
+
+/**
+ * Convert one smiley code to the icon graphic file equivalent.
+ *
+ * Callback handler for convert_smilies().
+ *
+ * Looks up one smiley code in the $wpsmiliestrans global array and returns an
+ * `<img>` string for that smiley.
+ *
+ * @since 2.8.0
+ *
+ * @global array $wpsmiliestrans
+ *
+ * @param array $matches Single match. Smiley code to convert to image.
+ * @return string Image string for smiley.
+ */
+function translate_smiley( $matches ) {
+ global $wpsmiliestrans;
+
+ if ( count( $matches ) == 0 )
+ return '';
+
+ $smiley = trim( reset( $matches ) );
+ $img = $wpsmiliestrans[ $smiley ];
+
+ $matches = array();
+ $ext = preg_match( '/\.([^.]+)$/', $img, $matches ) ? strtolower( $matches[1] ) : false;
+ $image_exts = array( 'jpg', 'jpeg', 'jpe', 'gif', 'png' );
+
+ // Don't convert smilies that aren't images - they're probably emoji.
+ if ( ! in_array( $ext, $image_exts ) ) {
+ return $img;
+ }
+
+ /**
+ * Filters the Smiley image URL before it's used in the image element.
+ *
+ * @since 2.9.0
+ *
+ * @param string $smiley_url URL for the smiley image.
+ * @param string $img Filename for the smiley image.
+ * @param string $site_url Site URL, as returned by site_url().
+ */
+ $src_url = apply_filters( 'smilies_src', includes_url( "images/smilies/$img" ), $img, site_url() );
+
+ return sprintf( '<img src="%s" alt="%s" class="wp-smiley" style="height: 1em; max-height: 1em;" />', esc_url( $src_url ), esc_attr( $smiley ) );
+}
+
+/**
+ * Convert text equivalent of smilies to images.
+ *
+ * Will only convert smilies if the option 'use_smilies' is true and the global
+ * used in the function isn't empty.
+ *
+ * @since 0.71
+ *
+ * @global string|array $wp_smiliessearch
+ *
+ * @param string $text Content to convert smilies from text.
+ * @return string Converted content with text smilies replaced with images.
+ */
+function convert_smilies( $text ) {
+ global $wp_smiliessearch;
+ $output = '';
+ if ( get_option( 'use_smilies' ) && ! empty( $wp_smiliessearch ) ) {
+ // HTML loop taken from texturize function, could possible be consolidated
+ $textarr = preg_split( '/(<.*>)/U', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // capture the tags as well as in between
+ $stop = count( $textarr );// loop stuff
+
+ // Ignore proessing of specific tags
+ $tags_to_ignore = 'code|pre|style|script|textarea';
+ $ignore_block_element = '';
+
+ for ( $i = 0; $i < $stop; $i++ ) {
+ $content = $textarr[$i];
+
+ // If we're in an ignore block, wait until we find its closing tag
+ if ( '' == $ignore_block_element && preg_match( '/^<(' . $tags_to_ignore . ')>/', $content, $matches ) ) {
+ $ignore_block_element = $matches[1];
+ }
+
+ // If it's not a tag and not in ignore block
+ if ( '' == $ignore_block_element && strlen( $content ) > 0 && '<' != $content[0] ) {
+ $content = preg_replace_callback( $wp_smiliessearch, 'translate_smiley', $content );
+ }
+
+ // did we exit ignore block
+ if ( '' != $ignore_block_element && '</' . $ignore_block_element . '>' == $content ) {
+ $ignore_block_element = '';
+ }
+
+ $output .= $content;
+ }
+ } else {
+ // return default text.
+ $output = $text;
+ }
+ return $output;
+}
+
+/**
+ * Verifies that an email is valid.
+ *
+ * Does not grok i18n domains. Not RFC compliant.
+ *
+ * @since 0.71
+ *
+ * @param string $email Email address to verify.
+ * @param bool $deprecated Deprecated.
+ * @return string|bool Either false or the valid email address.
+ */
+function is_email( $email, $deprecated = false ) {
+ if ( ! empty( $deprecated ) )
+ _deprecated_argument( __FUNCTION__, '3.0.0' );
+
+ // Test for the minimum length the email can be
+ if ( strlen( $email ) < 3 ) {
+ /**
+ * Filters whether an email address is valid.
+ *
+ * This filter is evaluated under several different contexts, such as 'email_too_short',
+ * 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits',
+ * 'domain_no_periods', 'sub_hyphen_limits', 'sub_invalid_chars', or no specific context.
+ *
+ * @since 2.8.0
+ *
+ * @param bool $is_email Whether the email address has passed the is_email() checks. Default false.
+ * @param string $email The email address being checked.
+ * @param string $context Context under which the email was tested.
+ */
+ return apply_filters( 'is_email', false, $email, 'email_too_short' );
+ }
+
+ // Test for an @ character after the first position
+ if ( strpos( $email, '@', 1 ) === false ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'is_email', false, $email, 'email_no_at' );
+ }
+
+ // Split out the local and domain parts
+ list( $local, $domain ) = explode( '@', $email, 2 );
+
+ // LOCAL PART
+ // Test for invalid characters
+ if ( !preg_match( '/^[a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'is_email', false, $email, 'local_invalid_chars' );
+ }
+
+ // DOMAIN PART
+ // Test for sequences of periods
+ if ( preg_match( '/\.{2,}/', $domain ) ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'is_email', false, $email, 'domain_period_sequence' );
+ }
+
+ // Test for leading and trailing periods and whitespace
+ if ( trim( $domain, " \t\n\r\0\x0B." ) !== $domain ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'is_email', false, $email, 'domain_period_limits' );
+ }
+
+ // Split the domain into subs
+ $subs = explode( '.', $domain );
+
+ // Assume the domain will have at least two subs
+ if ( 2 > count( $subs ) ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'is_email', false, $email, 'domain_no_periods' );
+ }
+
+ // Loop through each sub
+ foreach ( $subs as $sub ) {
+ // Test for leading and trailing hyphens and whitespace
+ if ( trim( $sub, " \t\n\r\0\x0B-" ) !== $sub ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'is_email', false, $email, 'sub_hyphen_limits' );
+ }
+
+ // Test for invalid characters
+ if ( !preg_match('/^[a-z0-9-]+$/i', $sub ) ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'is_email', false, $email, 'sub_invalid_chars' );
+ }
+ }
+
+ // Congratulations your email made it!
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'is_email', $email, $email, null );
+}
+
+/**
+ * Convert to ASCII from email subjects.
+ *
+ * @since 1.2.0
+ *
+ * @param string $string Subject line
+ * @return string Converted string to ASCII
+ */
+function wp_iso_descrambler( $string ) {
+ /* this may only work with iso-8859-1, I'm afraid */
+ if (!preg_match('#\=\?(.+)\?Q\?(.+)\?\=#i', $string, $matches)) {
+ return $string;
+ } else {
+ $subject = str_replace('_', ' ', $matches[2]);
+ return preg_replace_callback( '#\=([0-9a-f]{2})#i', '_wp_iso_convert', $subject );
+ }
+}
+
+/**
+ * Helper function to convert hex encoded chars to ASCII
+ *
+ * @since 3.1.0
+ * @access private
+ *
+ * @param array $match The preg_replace_callback matches array
+ * @return string Converted chars
+ */
+function _wp_iso_convert( $match ) {
+ return chr( hexdec( strtolower( $match[1] ) ) );
+}
+
+/**
+ * Returns a date in the GMT equivalent.
+ *
+ * Requires and returns a date in the Y-m-d H:i:s format. If there is a
+ * timezone_string available, the date is assumed to be in that timezone,
+ * otherwise it simply subtracts the value of the 'gmt_offset' option. Return
+ * format can be overridden using the $format parameter.
+ *
+ * @since 1.2.0
+ *
+ * @param string $string The date to be converted.
+ * @param string $format The format string for the returned date (default is Y-m-d H:i:s)
+ * @return string GMT version of the date provided.
+ */
+function get_gmt_from_date( $string, $format = 'Y-m-d H:i:s' ) {
+ $tz = get_option( 'timezone_string' );
+ if ( $tz ) {
+ $datetime = date_create( $string, new DateTimeZone( $tz ) );
+ if ( ! $datetime ) {
+ return gmdate( $format, 0 );
+ }
+ $datetime->setTimezone( new DateTimeZone( 'UTC' ) );
+ $string_gmt = $datetime->format( $format );
+ } else {
+ if ( ! preg_match( '#([0-9]{1,4})-([0-9]{1,2})-([0-9]{1,2}) ([0-9]{1,2}):([0-9]{1,2}):([0-9]{1,2})#', $string, $matches ) ) {
+ $datetime = strtotime( $string );
+ if ( false === $datetime ) {
+ return gmdate( $format, 0 );
+ }
+ return gmdate( $format, $datetime );
+ }
+ $string_time = gmmktime( $matches[4], $matches[5], $matches[6], $matches[2], $matches[3], $matches[1] );
+ $string_gmt = gmdate( $format, $string_time - get_option( 'gmt_offset' ) * HOUR_IN_SECONDS );
+ }
+ return $string_gmt;
+}
+
+/**
+ * Converts a GMT date into the correct format for the blog.
+ *
+ * Requires and returns a date in the Y-m-d H:i:s format. If there is a
+ * timezone_string available, the returned date is in that timezone, otherwise
+ * it simply adds the value of gmt_offset. Return format can be overridden
+ * using the $format parameter
+ *
+ * @since 1.2.0
+ *
+ * @param string $string The date to be converted.
+ * @param string $format The format string for the returned date (default is Y-m-d H:i:s)
+ * @return string Formatted date relative to the timezone / GMT offset.
+ */
+function get_date_from_gmt( $string, $format = 'Y-m-d H:i:s' ) {
+ $tz = get_option( 'timezone_string' );
+ if ( $tz ) {
+ $datetime = date_create( $string, new DateTimeZone( 'UTC' ) );
+ if ( ! $datetime )
+ return date( $format, 0 );
+ $datetime->setTimezone( new DateTimeZone( $tz ) );
+ $string_localtime = $datetime->format( $format );
+ } else {
+ if ( ! preg_match('#([0-9]{1,4})-([0-9]{1,2})-([0-9]{1,2}) ([0-9]{1,2}):([0-9]{1,2}):([0-9]{1,2})#', $string, $matches) )
+ return date( $format, 0 );
+ $string_time = gmmktime( $matches[4], $matches[5], $matches[6], $matches[2], $matches[3], $matches[1] );
+ $string_localtime = gmdate( $format, $string_time + get_option( 'gmt_offset' ) * HOUR_IN_SECONDS );
+ }
+ return $string_localtime;
+}
+
+/**
+ * Computes an offset in seconds from an iso8601 timezone.
+ *
+ * @since 1.5.0
+ *
+ * @param string $timezone Either 'Z' for 0 offset or '±hhmm'.
+ * @return int|float The offset in seconds.
+ */
+function iso8601_timezone_to_offset( $timezone ) {
+ // $timezone is either 'Z' or '[+|-]hhmm'
+ if ($timezone == 'Z') {
+ $offset = 0;
+ } else {
+ $sign = (substr($timezone, 0, 1) == '+') ? 1 : -1;
+ $hours = intval(substr($timezone, 1, 2));
+ $minutes = intval(substr($timezone, 3, 4)) / 60;
+ $offset = $sign * HOUR_IN_SECONDS * ($hours + $minutes);
+ }
+ return $offset;
+}
+
+/**
+ * Converts an iso8601 date to MySQL DateTime format used by post_date[_gmt].
+ *
+ * @since 1.5.0
+ *
+ * @param string $date_string Date and time in ISO 8601 format {@link https://en.wikipedia.org/wiki/ISO_8601}.
+ * @param string $timezone Optional. If set to GMT returns the time minus gmt_offset. Default is 'user'.
+ * @return string The date and time in MySQL DateTime format - Y-m-d H:i:s.
+ */
+function iso8601_to_datetime( $date_string, $timezone = 'user' ) {
+ $timezone = strtolower($timezone);
+
+ if ($timezone == 'gmt') {
+
+ preg_match('#([0-9]{4})([0-9]{2})([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(Z|[\+|\-][0-9]{2,4}){0,1}#', $date_string, $date_bits);
+
+ if (!empty($date_bits[7])) { // we have a timezone, so let's compute an offset
+ $offset = iso8601_timezone_to_offset($date_bits[7]);
+ } else { // we don't have a timezone, so we assume user local timezone (not server's!)
+ $offset = HOUR_IN_SECONDS * get_option('gmt_offset');
+ }
+
+ $timestamp = gmmktime($date_bits[4], $date_bits[5], $date_bits[6], $date_bits[2], $date_bits[3], $date_bits[1]);
+ $timestamp -= $offset;
+
+ return gmdate('Y-m-d H:i:s', $timestamp);
+
+ } elseif ($timezone == 'user') {
+ return preg_replace('#([0-9]{4})([0-9]{2})([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(Z|[\+|\-][0-9]{2,4}){0,1}#', '$1-$2-$3 $4:$5:$6', $date_string);
+ }
+}
+
+/**
+ * Strips out all characters that are not allowable in an email.
+ *
+ * @since 1.5.0
+ *
+ * @param string $email Email address to filter.
+ * @return string Filtered email address.
+ */
+function sanitize_email( $email ) {
+ // Test for the minimum length the email can be
+ if ( strlen( $email ) < 3 ) {
+ /**
+ * Filters a sanitized email address.
+ *
+ * This filter is evaluated under several contexts, including 'email_too_short',
+ * 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits',
+ * 'domain_no_periods', 'domain_no_valid_subs', or no context.
+ *
+ * @since 2.8.0
+ *
+ * @param string $email The sanitized email address.
+ * @param string $email The email address, as provided to sanitize_email().
+ * @param string $message A message to pass to the user.
+ */
+ return apply_filters( 'sanitize_email', '', $email, 'email_too_short' );
+ }
+
+ // Test for an @ character after the first position
+ if ( strpos( $email, '@', 1 ) === false ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'sanitize_email', '', $email, 'email_no_at' );
+ }
+
+ // Split out the local and domain parts
+ list( $local, $domain ) = explode( '@', $email, 2 );
+
+ // LOCAL PART
+ // Test for invalid characters
+ $local = preg_replace( '/[^a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]/', '', $local );
+ if ( '' === $local ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'sanitize_email', '', $email, 'local_invalid_chars' );
+ }
+
+ // DOMAIN PART
+ // Test for sequences of periods
+ $domain = preg_replace( '/\.{2,}/', '', $domain );
+ if ( '' === $domain ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'sanitize_email', '', $email, 'domain_period_sequence' );
+ }
+
+ // Test for leading and trailing periods and whitespace
+ $domain = trim( $domain, " \t\n\r\0\x0B." );
+ if ( '' === $domain ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'sanitize_email', '', $email, 'domain_period_limits' );
+ }
+
+ // Split the domain into subs
+ $subs = explode( '.', $domain );
+
+ // Assume the domain will have at least two subs
+ if ( 2 > count( $subs ) ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'sanitize_email', '', $email, 'domain_no_periods' );
+ }
+
+ // Create an array that will contain valid subs
+ $new_subs = array();
+
+ // Loop through each sub
+ foreach ( $subs as $sub ) {
+ // Test for leading and trailing hyphens
+ $sub = trim( $sub, " \t\n\r\0\x0B-" );
+
+ // Test for invalid characters
+ $sub = preg_replace( '/[^a-z0-9-]+/i', '', $sub );
+
+ // If there's anything left, add it to the valid subs
+ if ( '' !== $sub ) {
+ $new_subs[] = $sub;
+ }
+ }
+
+ // If there aren't 2 or more valid subs
+ if ( 2 > count( $new_subs ) ) {
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'sanitize_email', '', $email, 'domain_no_valid_subs' );
+ }
+
+ // Join valid subs into the new domain
+ $domain = join( '.', $new_subs );
+
+ // Put the email back together
+ $email = $local . '@' . $domain;
+
+ // Congratulations your email made it!
+ /** This filter is documented in wp-includes/formatting.php */
+ return apply_filters( 'sanitize_email', $email, $email, null );
+}
+
+/**
+ * Determines the difference between two timestamps.
+ *
+ * The difference is returned in a human readable format such as "1 hour",
+ * "5 mins", "2 days".
+ *
+ * @since 1.5.0
+ *
+ * @param int $from Unix timestamp from which the difference begins.
+ * @param int $to Optional. Unix timestamp to end the time difference. Default becomes time() if not set.
+ * @return string Human readable time difference.
+ */
+function human_time_diff( $from, $to = '' ) {
+ if ( empty( $to ) ) {
+ $to = time();
+ }
+
+ $diff = (int) abs( $to - $from );
+
+ if ( $diff < HOUR_IN_SECONDS ) {
+ $mins = round( $diff / MINUTE_IN_SECONDS );
+ if ( $mins <= 1 )
+ $mins = 1;
+ /* translators: min=minute */
+ $since = sprintf( _n( '%s min', '%s mins', $mins ), $mins );
+ } elseif ( $diff < DAY_IN_SECONDS && $diff >= HOUR_IN_SECONDS ) {
+ $hours = round( $diff / HOUR_IN_SECONDS );
+ if ( $hours <= 1 )
+ $hours = 1;
+ $since = sprintf( _n( '%s hour', '%s hours', $hours ), $hours );
+ } elseif ( $diff < WEEK_IN_SECONDS && $diff >= DAY_IN_SECONDS ) {
+ $days = round( $diff / DAY_IN_SECONDS );
+ if ( $days <= 1 )
+ $days = 1;
+ $since = sprintf( _n( '%s day', '%s days', $days ), $days );
+ } elseif ( $diff < MONTH_IN_SECONDS && $diff >= WEEK_IN_SECONDS ) {
+ $weeks = round( $diff / WEEK_IN_SECONDS );
+ if ( $weeks <= 1 )
+ $weeks = 1;
+ $since = sprintf( _n( '%s week', '%s weeks', $weeks ), $weeks );
+ } elseif ( $diff < YEAR_IN_SECONDS && $diff >= MONTH_IN_SECONDS ) {
+ $months = round( $diff / MONTH_IN_SECONDS );
+ if ( $months <= 1 )
+ $months = 1;
+ $since = sprintf( _n( '%s month', '%s months', $months ), $months );
+ } elseif ( $diff >= YEAR_IN_SECONDS ) {
+ $years = round( $diff / YEAR_IN_SECONDS );
+ if ( $years <= 1 )
+ $years = 1;
+ $since = sprintf( _n( '%s year', '%s years', $years ), $years );
+ }
+
+ /**
+ * Filters the human readable difference between two timestamps.
+ *
+ * @since 4.0.0
+ *
+ * @param string $since The difference in human readable text.
+ * @param int $diff The difference in seconds.
+ * @param int $from Unix timestamp from which the difference begins.
+ * @param int $to Unix timestamp to end the time difference.
+ */
+ return apply_filters( 'human_time_diff', $since, $diff, $from, $to );
+}
+
+/**
+ * Generates an excerpt from the content, if needed.
+ *
+ * The excerpt word amount will be 55 words and if the amount is greater than
+ * that, then the string ' […]' will be appended to the excerpt. If the string
+ * is less than 55 words, then the content will be returned as is.
+ *
+ * The 55 word limit can be modified by plugins/themes using the {@see 'excerpt_length'} filter
+ * The ' […]' string can be modified by plugins/themes using the {@see 'excerpt_more'} filter
+ *
+ * @since 1.5.0
+ *
+ * @param string $text Optional. The excerpt. If set to empty, an excerpt is generated.
+ * @return string The excerpt.
+ */
+function wp_trim_excerpt( $text = '' ) {
+ $raw_excerpt = $text;
+ if ( '' == $text ) {
+ $text = get_the_content('');
+
+ $text = strip_shortcodes( $text );
+
+ /** This filter is documented in wp-includes/post-template.php */
+ $text = apply_filters( 'the_content', $text );
+ $text = str_replace(']]>', ']]>', $text);
+
+ /**
+ * Filters the number of words in an excerpt.
+ *
+ * @since 2.7.0
+ *
+ * @param int $number The number of words. Default 55.
+ */
+ $excerpt_length = apply_filters( 'excerpt_length', 55 );
+ /**
+ * Filters the string in the "more" link displayed after a trimmed excerpt.
+ *
+ * @since 2.9.0
+ *
+ * @param string $more_string The string shown within the more link.
+ */
+ $excerpt_more = apply_filters( 'excerpt_more', ' ' . '[…]' );
+ $text = wp_trim_words( $text, $excerpt_length, $excerpt_more );
+ }
+ /**
+ * Filters the trimmed excerpt string.
+ *
+ * @since 2.8.0
+ *
+ * @param string $text The trimmed text.
+ * @param string $raw_excerpt The text prior to trimming.
+ */
+ return apply_filters( 'wp_trim_excerpt', $text, $raw_excerpt );
+}
+
+/**
+ * Trims text to a certain number of words.
+ *
+ * This function is localized. For languages that count 'words' by the individual
+ * character (such as East Asian languages), the $num_words argument will apply
+ * to the number of individual characters.
+ *
+ * @since 3.3.0
+ *
+ * @param string $text Text to trim.
+ * @param int $num_words Number of words. Default 55.
+ * @param string $more Optional. What to append if $text needs to be trimmed. Default '…'.
+ * @return string Trimmed text.
+ */
+function wp_trim_words( $text, $num_words = 55, $more = null ) {
+ if ( null === $more ) {
+ $more = __( '…' );
+ }
+
+ $original_text = $text;
+ $text = wp_strip_all_tags( $text );
+
+ /*
+ * translators: If your word count is based on single characters (e.g. East Asian characters),
+ * enter 'characters_excluding_spaces' or 'characters_including_spaces'. Otherwise, enter 'words'.
+ * Do not translate into your own language.
+ */
+ if ( strpos( _x( 'words', 'Word count type. Do not translate!' ), 'characters' ) === 0 && preg_match( '/^utf\-?8$/i', get_option( 'blog_charset' ) ) ) {
+ $text = trim( preg_replace( "/[\n\r\t ]+/", ' ', $text ), ' ' );
+ preg_match_all( '/./u', $text, $words_array );
+ $words_array = array_slice( $words_array[0], 0, $num_words + 1 );
+ $sep = '';
+ } else {
+ $words_array = preg_split( "/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY );
+ $sep = ' ';
+ }
+
+ if ( count( $words_array ) > $num_words ) {
+ array_pop( $words_array );
+ $text = implode( $sep, $words_array );
+ $text = $text . $more;
+ } else {
+ $text = implode( $sep, $words_array );
+ }
+
+ /**
+ * Filters the text content after words have been trimmed.
+ *
+ * @since 3.3.0
+ *
+ * @param string $text The trimmed text.
+ * @param int $num_words The number of words to trim the text to. Default 5.
+ * @param string $more An optional string to append to the end of the trimmed text, e.g. ….
+ * @param string $original_text The text before it was trimmed.
+ */
+ return apply_filters( 'wp_trim_words', $text, $num_words, $more, $original_text );
+}
+
+/**
+ * Converts named entities into numbered entities.
+ *
+ * @since 1.5.1
+ *
+ * @param string $text The text within which entities will be converted.
+ * @return string Text with converted entities.
+ */
+function ent2ncr( $text ) {
+
+ /**
+ * Filters text before named entities are converted into numbered entities.
+ *
+ * A non-null string must be returned for the filter to be evaluated.
+ *
+ * @since 3.3.0
+ *
+ * @param null $converted_text The text to be converted. Default null.
+ * @param string $text The text prior to entity conversion.
+ */
+ $filtered = apply_filters( 'pre_ent2ncr', null, $text );
+ if ( null !== $filtered )
+ return $filtered;
+
+ $to_ncr = array(
+ '"' => '"',
+ '&' => '&',
+ '<' => '<',
+ '>' => '>',
+ '|' => '|',
+ ' ' => ' ',
+ '¡' => '¡',