+/*
+ * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit.
+ * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
+ * The behavior of this function for invalid inputs is undefined.
+ */
+function _mb_substr( $str, $start, $length = null, $encoding = null ) {
+ if ( null === $encoding ) {
+ $encoding = get_option( 'blog_charset' );
+ }
+
+ // The solution below works only for UTF-8,
+ // so in case of a different charset just use built-in substr()
+ if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
+ return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
+ }
+
+ if ( _wp_can_use_pcre_u() ) {
+ // Use the regex unicode support to separate the UTF-8 characters into an array
+ preg_match_all( '/./us', $str, $match );
+ $chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
+ return implode( '', $chars );