WordPress 4.2.2

[autoinstalls/wordpress.git] / wp-includes / compat.php
diff --git a/wp-includes/compat.php b/wp-includes/compat.php

index ab4045250a6e434a62f19c4dc20d196a9efad1a9..4317eb689b1416f187f66870a9a5b1a068666af2 100644 (file)
--- a/wp-includes/compat.php
+++ b/wp-includes/compat.php
@@ -13,23 +13,141 @@ if ( !function_exists('_') ) {
         }
  }
  
-if ( !function_exists('mb_substr') ):
-       function mb_substr( $str, $start, $length=null, $encoding=null ) {
-               return _mb_substr($str, $start, $length, $encoding);
+/**
+ * Returns whether PCRE/u (PCRE_UTF8 modifier) is available for use.
+ *
+ * @ignore
+ * @since 4.2.2
+ * @access private
+ *
+ * @param bool $set - Used for testing only
+ *             null   : default - get PCRE/u capability
+ *             false  : Used for testing - return false for future calls to this function
+ *             'reset': Used for testing - restore default behavior of this function
+ */
+function _wp_can_use_pcre_u( $set = null ) {
+       static $utf8_pcre = 'reset';
+
+       if ( null !== $set ) {
+               $utf8_pcre = $set;
+       }
+
+       if ( 'reset' === $utf8_pcre ) {
+               $utf8_pcre = @preg_match( '/^./u', 'a' );
+       }
+
+       return $utf8_pcre;
+}
+
+if ( ! function_exists( 'mb_substr' ) ) :
+       function mb_substr( $str, $start, $length = null, $encoding = null ) {
+               return _mb_substr( $str, $start, $length, $encoding );
         }
  endif;
  
-function _mb_substr( $str, $start, $length=null, $encoding=null ) {
-       // the solution below, works only for utf-8, so in case of a different
-       // charset, just use built-in substr
-       $charset = get_option( 'blog_charset' );
-       if ( !in_array( $charset, array('utf8', 'utf-8', 'UTF8', 'UTF-8') ) ) {
-               return is_null( $length )? substr( $str, $start ) : substr( $str, $start, $length);
-       }
-       // use the regex unicode support to separate the UTF-8 characters into an array
-       preg_match_all( '/./us', $str, $match );
-       $chars = is_null( $length )? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
-       return implode( '', $chars );
+/*
+ * Only understands UTF-8 and 8bit.  All other character sets will be treated as 8bit.
+ * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
+ * The behavior of this function for invalid inputs is undefined.
+ */
+function _mb_substr( $str, $start, $length = null, $encoding = null ) {
+       if ( null === $encoding ) {
+               $encoding = get_option( 'blog_charset' );
+       }
+
+       // The solution below works only for UTF-8,
+       // so in case of a different charset just use built-in substr()
+       if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
+               return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
+       }
+
+       if ( _wp_can_use_pcre_u() ) {
+               // Use the regex unicode support to separate the UTF-8 characters into an array
+               preg_match_all( '/./us', $str, $match );
+               $chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
+               return implode( '', $chars );
+       }
+
+       $regex = '/(
+                 [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
+               | [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
+               | \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
+               | [\xE1-\xEC][\x80-\xBF]{2}
+               | \xED[\x80-\x9F][\x80-\xBF]
+               | [\xEE-\xEF][\x80-\xBF]{2}
+               | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences   11110xxx 10xxxxxx * 3
+               | [\xF1-\xF3][\x80-\xBF]{3}
+               | \xF4[\x80-\x8F][\x80-\xBF]{2}
+       )/x';
+
+       $chars = array( '' ); // Start with 1 element instead of 0 since the first thing we do is pop
+       do {
+               // We had some string left over from the last round, but we counted it in that last round.
+               array_pop( $chars );
+
+               // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string)
+               $pieces = preg_split( $regex, $str, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
+
+               $chars = array_merge( $chars, $pieces );
+       } while ( count( $pieces ) > 1 && $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop.
+
+       return join( '', array_slice( $chars, $start, $length ) );
+}
+
+if ( ! function_exists( 'mb_strlen' ) ) :
+       function mb_strlen( $str, $encoding = null ) {
+               return _mb_strlen( $str, $encoding );
+       }
+endif;
+
+/*
+ * Only understands UTF-8 and 8bit.  All other character sets will be treated as 8bit.
+ * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
+ * The behavior of this function for invalid inputs is undefined.
+ */
+function _mb_strlen( $str, $encoding = null ) {
+       if ( null === $encoding ) {
+               $encoding = get_option( 'blog_charset' );
+       }
+
+       // The solution below works only for UTF-8,
+       // so in case of a different charset just use built-in strlen()
+       if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
+               return strlen( $str );
+       }
+
+       if ( _wp_can_use_pcre_u() ) {
+               // Use the regex unicode support to separate the UTF-8 characters into an array
+               preg_match_all( '/./us', $str, $match );
+               return count( $match[0] );
+       }
+
+       $regex = '/(?:
+                 [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
+               | [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
+               | \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
+               | [\xE1-\xEC][\x80-\xBF]{2}
+               | \xED[\x80-\x9F][\x80-\xBF]
+               | [\xEE-\xEF][\x80-\xBF]{2}
+               | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences   11110xxx 10xxxxxx * 3
+               | [\xF1-\xF3][\x80-\xBF]{3}
+               | \xF4[\x80-\x8F][\x80-\xBF]{2}
+       )/x';
+
+       $count = 1; // Start at 1 instead of 0 since the first thing we do is decrement
+       do {
+               // We had some string left over from the last round, but we counted it in that last round.
+               $count--;
+
+               // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string)
+               $pieces = preg_split( $regex, $str, 1000 );
+
+               // Increment
+               $count += count( $pieces );
+       } while ( $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop.
+
+       // Fencepost: preg_split() always returns one extra item in the array
+       return --$count;
  }
  
  if ( !function_exists('hash_hmac') ):
@@ -65,7 +183,7 @@ if ( !function_exists('json_encode') ) {
         function json_encode( $string ) {
                 global $wp_json;
  
-               if ( !is_a($wp_json, 'Services_JSON') ) {
+               if ( ! ( $wp_json instanceof Services_JSON ) ) {
                         require_once( ABSPATH . WPINC . '/class-json.php' );
                         $wp_json = new Services_JSON();
                 }
@@ -78,7 +196,7 @@ if ( !function_exists('json_decode') ) {
         function json_decode( $string, $assoc_array = false ) {
                 global $wp_json;
  
-               if ( !is_a($wp_json, 'Services_JSON') ) {
+               if ( ! ($wp_json instanceof Services_JSON ) ) {
                         require_once( ABSPATH . WPINC . '/class-json.php' );
                         $wp_json = new Services_JSON();
                 }