+ /**
+ * Retrieves the character set for the given table.
+ *
+ * @since 4.2.0
+ * @access protected
+ *
+ * @param string $table Table name.
+ * @return string|WP_Error Table character set, WP_Error object if it couldn't be found.
+ */
+ protected function get_table_charset( $table ) {
+ $tablekey = strtolower( $table );
+
+ /**
+ * Filters the table charset value before the DB is checked.
+ *
+ * Passing a non-null value to the filter will effectively short-circuit
+ * checking the DB for the charset, returning that value instead.
+ *
+ * @since 4.2.0
+ *
+ * @param string $charset The character set to use. Default null.
+ * @param string $table The name of the table being checked.
+ */
+ $charset = apply_filters( 'pre_get_table_charset', null, $table );
+ if ( null !== $charset ) {
+ return $charset;
+ }
+
+ if ( isset( $this->table_charset[ $tablekey ] ) ) {
+ return $this->table_charset[ $tablekey ];
+ }
+
+ $charsets = $columns = array();
+
+ $table_parts = explode( '.', $table );
+ $table = '`' . implode( '`.`', $table_parts ) . '`';
+ $results = $this->get_results( "SHOW FULL COLUMNS FROM $table" );
+ if ( ! $results ) {
+ return new WP_Error( 'wpdb_get_table_charset_failure' );
+ }
+
+ foreach ( $results as $column ) {
+ $columns[ strtolower( $column->Field ) ] = $column;
+ }
+
+ $this->col_meta[ $tablekey ] = $columns;
+
+ foreach ( $columns as $column ) {
+ if ( ! empty( $column->Collation ) ) {
+ list( $charset ) = explode( '_', $column->Collation );
+
+ // If the current connection can't support utf8mb4 characters, let's only send 3-byte utf8 characters.
+ if ( 'utf8mb4' === $charset && ! $this->has_cap( 'utf8mb4' ) ) {
+ $charset = 'utf8';
+ }
+
+ $charsets[ strtolower( $charset ) ] = true;
+ }
+
+ list( $type ) = explode( '(', $column->Type );
+
+ // A binary/blob means the whole query gets treated like this.
+ if ( in_array( strtoupper( $type ), array( 'BINARY', 'VARBINARY', 'TINYBLOB', 'MEDIUMBLOB', 'BLOB', 'LONGBLOB' ) ) ) {
+ $this->table_charset[ $tablekey ] = 'binary';
+ return 'binary';
+ }
+ }
+
+ // utf8mb3 is an alias for utf8.
+ if ( isset( $charsets['utf8mb3'] ) ) {
+ $charsets['utf8'] = true;
+ unset( $charsets['utf8mb3'] );
+ }
+
+ // Check if we have more than one charset in play.
+ $count = count( $charsets );
+ if ( 1 === $count ) {
+ $charset = key( $charsets );
+ } elseif ( 0 === $count ) {
+ // No charsets, assume this table can store whatever.
+ $charset = false;
+ } else {
+ // More than one charset. Remove latin1 if present and recalculate.
+ unset( $charsets['latin1'] );
+ $count = count( $charsets );
+ if ( 1 === $count ) {
+ // Only one charset (besides latin1).
+ $charset = key( $charsets );
+ } elseif ( 2 === $count && isset( $charsets['utf8'], $charsets['utf8mb4'] ) ) {
+ // Two charsets, but they're utf8 and utf8mb4, use utf8.
+ $charset = 'utf8';
+ } else {
+ // Two mixed character sets. ascii.
+ $charset = 'ascii';
+ }
+ }
+
+ $this->table_charset[ $tablekey ] = $charset;
+ return $charset;
+ }
+
+ /**
+ * Retrieves the character set for the given column.
+ *
+ * @since 4.2.0
+ * @access public
+ *
+ * @param string $table Table name.
+ * @param string $column Column name.
+ * @return string|false|WP_Error Column character set as a string. False if the column has no
+ * character set. WP_Error object if there was an error.
+ */
+ public function get_col_charset( $table, $column ) {
+ $tablekey = strtolower( $table );
+ $columnkey = strtolower( $column );
+
+ /**
+ * Filters the column charset value before the DB is checked.
+ *
+ * Passing a non-null value to the filter will short-circuit
+ * checking the DB for the charset, returning that value instead.
+ *
+ * @since 4.2.0
+ *
+ * @param string $charset The character set to use. Default null.
+ * @param string $table The name of the table being checked.
+ * @param string $column The name of the column being checked.
+ */
+ $charset = apply_filters( 'pre_get_col_charset', null, $table, $column );
+ if ( null !== $charset ) {
+ return $charset;
+ }
+
+ // Skip this entirely if this isn't a MySQL database.
+ if ( empty( $this->is_mysql ) ) {
+ return false;
+ }
+
+ if ( empty( $this->table_charset[ $tablekey ] ) ) {
+ // This primes column information for us.
+ $table_charset = $this->get_table_charset( $table );
+ if ( is_wp_error( $table_charset ) ) {
+ return $table_charset;
+ }
+ }
+
+ // If still no column information, return the table charset.
+ if ( empty( $this->col_meta[ $tablekey ] ) ) {
+ return $this->table_charset[ $tablekey ];
+ }
+
+ // If this column doesn't exist, return the table charset.
+ if ( empty( $this->col_meta[ $tablekey ][ $columnkey ] ) ) {
+ return $this->table_charset[ $tablekey ];
+ }
+
+ // Return false when it's not a string column.
+ if ( empty( $this->col_meta[ $tablekey ][ $columnkey ]->Collation ) ) {
+ return false;
+ }
+
+ list( $charset ) = explode( '_', $this->col_meta[ $tablekey ][ $columnkey ]->Collation );
+ return $charset;
+ }
+
+ /**
+ * Retrieve the maximum string length allowed in a given column.
+ * The length may either be specified as a byte length or a character length.
+ *
+ * @since 4.2.1
+ * @access public
+ *
+ * @param string $table Table name.
+ * @param string $column Column name.
+ * @return array|false|WP_Error array( 'length' => (int), 'type' => 'byte' | 'char' )
+ * false if the column has no length (for example, numeric column)
+ * WP_Error object if there was an error.
+ */
+ public function get_col_length( $table, $column ) {
+ $tablekey = strtolower( $table );
+ $columnkey = strtolower( $column );
+
+ // Skip this entirely if this isn't a MySQL database.
+ if ( empty( $this->is_mysql ) ) {
+ return false;
+ }
+
+ if ( empty( $this->col_meta[ $tablekey ] ) ) {
+ // This primes column information for us.
+ $table_charset = $this->get_table_charset( $table );
+ if ( is_wp_error( $table_charset ) ) {
+ return $table_charset;
+ }
+ }
+
+ if ( empty( $this->col_meta[ $tablekey ][ $columnkey ] ) ) {
+ return false;
+ }
+
+ $typeinfo = explode( '(', $this->col_meta[ $tablekey ][ $columnkey ]->Type );
+
+ $type = strtolower( $typeinfo[0] );
+ if ( ! empty( $typeinfo[1] ) ) {
+ $length = trim( $typeinfo[1], ')' );
+ } else {
+ $length = false;
+ }
+
+ switch( $type ) {
+ case 'char':
+ case 'varchar':
+ return array(
+ 'type' => 'char',
+ 'length' => (int) $length,
+ );
+
+ case 'binary':
+ case 'varbinary':
+ return array(
+ 'type' => 'byte',
+ 'length' => (int) $length,
+ );
+
+ case 'tinyblob':
+ case 'tinytext':
+ return array(
+ 'type' => 'byte',
+ 'length' => 255, // 2^8 - 1
+ );
+
+ case 'blob':
+ case 'text':
+ return array(
+ 'type' => 'byte',
+ 'length' => 65535, // 2^16 - 1
+ );
+
+ case 'mediumblob':
+ case 'mediumtext':
+ return array(
+ 'type' => 'byte',
+ 'length' => 16777215, // 2^24 - 1
+ );
+
+ case 'longblob':
+ case 'longtext':
+ return array(
+ 'type' => 'byte',
+ 'length' => 4294967295, // 2^32 - 1
+ );
+
+ default:
+ return false;
+ }
+ }
+
+ /**
+ * Check if a string is ASCII.
+ *
+ * The negative regex is faster for non-ASCII strings, as it allows
+ * the search to finish as soon as it encounters a non-ASCII character.
+ *
+ * @since 4.2.0
+ * @access protected
+ *
+ * @param string $string String to check.
+ * @return bool True if ASCII, false if not.
+ */
+ protected function check_ascii( $string ) {
+ if ( function_exists( 'mb_check_encoding' ) ) {
+ if ( mb_check_encoding( $string, 'ASCII' ) ) {
+ return true;
+ }
+ } elseif ( ! preg_match( '/[^\x00-\x7F]/', $string ) ) {
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Check if the query is accessing a collation considered safe on the current version of MySQL.
+ *
+ * @since 4.2.0
+ * @access protected
+ *
+ * @param string $query The query to check.
+ * @return bool True if the collation is safe, false if it isn't.
+ */
+ protected function check_safe_collation( $query ) {
+ if ( $this->checking_collation ) {
+ return true;
+ }
+
+ // We don't need to check the collation for queries that don't read data.
+ $query = ltrim( $query, "\r\n\t (" );
+ if ( preg_match( '/^(?:SHOW|DESCRIBE|DESC|EXPLAIN|CREATE)\s/i', $query ) ) {
+ return true;
+ }
+
+ // All-ASCII queries don't need extra checking.
+ if ( $this->check_ascii( $query ) ) {
+ return true;
+ }
+
+ $table = $this->get_table_from_query( $query );
+ if ( ! $table ) {
+ return false;
+ }
+
+ $this->checking_collation = true;
+ $collation = $this->get_table_charset( $table );
+ $this->checking_collation = false;
+
+ // Tables with no collation, or latin1 only, don't need extra checking.
+ if ( false === $collation || 'latin1' === $collation ) {
+ return true;
+ }
+
+ $table = strtolower( $table );
+ if ( empty( $this->col_meta[ $table ] ) ) {
+ return false;
+ }
+
+ // If any of the columns don't have one of these collations, it needs more sanity checking.
+ foreach ( $this->col_meta[ $table ] as $col ) {
+ if ( empty( $col->Collation ) ) {
+ continue;
+ }
+
+ if ( ! in_array( $col->Collation, array( 'utf8_general_ci', 'utf8_bin', 'utf8mb4_general_ci', 'utf8mb4_bin' ), true ) ) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Strips any invalid characters based on value/charset pairs.
+ *
+ * @since 4.2.0
+ * @access protected
+ *
+ * @param array $data Array of value arrays. Each value array has the keys
+ * 'value' and 'charset'. An optional 'ascii' key can be
+ * set to false to avoid redundant ASCII checks.
+ * @return array|WP_Error The $data parameter, with invalid characters removed from
+ * each value. This works as a passthrough: any additional keys
+ * such as 'field' are retained in each value array. If we cannot
+ * remove invalid characters, a WP_Error object is returned.
+ */
+ protected function strip_invalid_text( $data ) {
+ $db_check_string = false;
+
+ foreach ( $data as &$value ) {
+ $charset = $value['charset'];
+
+ if ( is_array( $value['length'] ) ) {
+ $length = $value['length']['length'];
+ $truncate_by_byte_length = 'byte' === $value['length']['type'];
+ } else {
+ $length = false;
+ // Since we have no length, we'll never truncate.
+ // Initialize the variable to false. true would take us
+ // through an unnecessary (for this case) codepath below.
+ $truncate_by_byte_length = false;
+ }
+
+ // There's no charset to work with.
+ if ( false === $charset ) {
+ continue;
+ }
+
+ // Column isn't a string.
+ if ( ! is_string( $value['value'] ) ) {
+ continue;
+ }
+
+ $needs_validation = true;
+ if (
+ // latin1 can store any byte sequence
+ 'latin1' === $charset
+ ||
+ // ASCII is always OK.
+ ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) )
+ ) {
+ $truncate_by_byte_length = true;
+ $needs_validation = false;
+ }
+
+ if ( $truncate_by_byte_length ) {
+ mbstring_binary_safe_encoding();
+ if ( false !== $length && strlen( $value['value'] ) > $length ) {
+ $value['value'] = substr( $value['value'], 0, $length );
+ }
+ reset_mbstring_encoding();
+
+ if ( ! $needs_validation ) {
+ continue;
+ }
+ }
+
+ // utf8 can be handled by regex, which is a bunch faster than a DB lookup.
+ if ( ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) && function_exists( 'mb_strlen' ) ) {
+ $regex = '/
+ (
+ (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
+ | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
+ | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
+ | [\xE1-\xEC][\x80-\xBF]{2}
+ | \xED[\x80-\x9F][\x80-\xBF]
+ | [\xEE-\xEF][\x80-\xBF]{2}';
+
+ if ( 'utf8mb4' === $charset ) {
+ $regex .= '
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
+ | [\xF1-\xF3][\x80-\xBF]{3}
+ | \xF4[\x80-\x8F][\x80-\xBF]{2}
+ ';
+ }
+
+ $regex .= '){1,40} # ...one or more times
+ )
+ | . # anything else
+ /x';
+ $value['value'] = preg_replace( $regex, '$1', $value['value'] );
+
+
+ if ( false !== $length && mb_strlen( $value['value'], 'UTF-8' ) > $length ) {
+ $value['value'] = mb_substr( $value['value'], 0, $length, 'UTF-8' );
+ }
+ continue;
+ }
+
+ // We couldn't use any local conversions, send it to the DB.
+ $value['db'] = $db_check_string = true;
+ }
+ unset( $value ); // Remove by reference.
+
+ if ( $db_check_string ) {
+ $queries = array();
+ foreach ( $data as $col => $value ) {
+ if ( ! empty( $value['db'] ) ) {
+ // We're going to need to truncate by characters or bytes, depending on the length value we have.
+ if ( 'byte' === $value['length']['type'] ) {
+ // Using binary causes LEFT() to truncate by bytes.
+ $charset = 'binary';
+ } else {
+ $charset = $value['charset'];
+ }
+
+ if ( $this->charset ) {
+ $connection_charset = $this->charset;
+ } else {
+ if ( $this->use_mysqli ) {
+ $connection_charset = mysqli_character_set_name( $this->dbh );
+ } else {
+ $connection_charset = mysql_client_encoding();
+ }
+ }
+
+ if ( is_array( $value['length'] ) ) {
+ $queries[ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING $charset ), %.0f ) USING $connection_charset )", $value['value'], $value['length']['length'] );
+ } else if ( 'binary' !== $charset ) {
+ // If we don't have a length, there's no need to convert binary - it will always return the same result.
+ $queries[ $col ] = $this->prepare( "CONVERT( CONVERT( %s USING $charset ) USING $connection_charset )", $value['value'] );
+ }
+
+ unset( $data[ $col ]['db'] );
+ }
+ }
+
+ $sql = array();
+ foreach ( $queries as $column => $query ) {
+ if ( ! $query ) {
+ continue;
+ }
+
+ $sql[] = $query . " AS x_$column";
+ }
+
+ $this->check_current_query = false;
+ $row = $this->get_row( "SELECT " . implode( ', ', $sql ), ARRAY_A );
+ if ( ! $row ) {
+ return new WP_Error( 'wpdb_strip_invalid_text_failure' );
+ }
+
+ foreach ( array_keys( $data ) as $column ) {
+ if ( isset( $row["x_$column"] ) ) {
+ $data[ $column ]['value'] = $row["x_$column"];
+ }
+ }
+ }
+
+ return $data;
+ }
+
+ /**
+ * Strips any invalid characters from the query.
+ *
+ * @since 4.2.0
+ * @access protected
+ *
+ * @param string $query Query to convert.
+ * @return string|WP_Error The converted query, or a WP_Error object if the conversion fails.
+ */
+ protected function strip_invalid_text_from_query( $query ) {
+ // We don't need to check the collation for queries that don't read data.
+ $trimmed_query = ltrim( $query, "\r\n\t (" );
+ if ( preg_match( '/^(?:SHOW|DESCRIBE|DESC|EXPLAIN|CREATE)\s/i', $trimmed_query ) ) {
+ return $query;
+ }
+
+ $table = $this->get_table_from_query( $query );
+ if ( $table ) {
+ $charset = $this->get_table_charset( $table );
+ if ( is_wp_error( $charset ) ) {
+ return $charset;
+ }
+
+ // We can't reliably strip text from tables containing binary/blob columns
+ if ( 'binary' === $charset ) {
+ return $query;
+ }
+ } else {
+ $charset = $this->charset;
+ }
+
+ $data = array(
+ 'value' => $query,
+ 'charset' => $charset,
+ 'ascii' => false,
+ 'length' => false,
+ );
+
+ $data = $this->strip_invalid_text( array( $data ) );
+ if ( is_wp_error( $data ) ) {
+ return $data;
+ }
+
+ return $data[0]['value'];
+ }
+
+ /**
+ * Strips any invalid characters from the string for a given table and column.
+ *
+ * @since 4.2.0
+ * @access public
+ *
+ * @param string $table Table name.
+ * @param string $column Column name.
+ * @param string $value The text to check.
+ * @return string|WP_Error The converted string, or a WP_Error object if the conversion fails.
+ */
+ public function strip_invalid_text_for_column( $table, $column, $value ) {
+ if ( ! is_string( $value ) ) {
+ return $value;
+ }
+
+ $charset = $this->get_col_charset( $table, $column );
+ if ( ! $charset ) {
+ // Not a string column.
+ return $value;
+ } elseif ( is_wp_error( $charset ) ) {
+ // Bail on real errors.
+ return $charset;
+ }
+
+ $data = array(
+ $column => array(
+ 'value' => $value,
+ 'charset' => $charset,
+ 'length' => $this->get_col_length( $table, $column ),
+ )
+ );
+
+ $data = $this->strip_invalid_text( $data );
+ if ( is_wp_error( $data ) ) {
+ return $data;
+ }
+
+ return $data[ $column ]['value'];
+ }
+
+ /**
+ * Find the first table name referenced in a query.
+ *
+ * @since 4.2.0
+ * @access protected
+ *
+ * @param string $query The query to search.
+ * @return string|false $table The table name found, or false if a table couldn't be found.
+ */
+ protected function get_table_from_query( $query ) {
+ // Remove characters that can legally trail the table name.
+ $query = rtrim( $query, ';/-#' );
+
+ // Allow (select...) union [...] style queries. Use the first query's table name.
+ $query = ltrim( $query, "\r\n\t (" );
+
+ // Strip everything between parentheses except nested selects.
+ $query = preg_replace( '/\((?!\s*select)[^(]*?\)/is', '()', $query );
+
+ // Quickly match most common queries.
+ if ( preg_match( '/^\s*(?:'
+ . 'SELECT.*?\s+FROM'
+ . '|INSERT(?:\s+LOW_PRIORITY|\s+DELAYED|\s+HIGH_PRIORITY)?(?:\s+IGNORE)?(?:\s+INTO)?'
+ . '|REPLACE(?:\s+LOW_PRIORITY|\s+DELAYED)?(?:\s+INTO)?'
+ . '|UPDATE(?:\s+LOW_PRIORITY)?(?:\s+IGNORE)?'
+ . '|DELETE(?:\s+LOW_PRIORITY|\s+QUICK|\s+IGNORE)*(?:\s+FROM)?'
+ . ')\s+((?:[0-9a-zA-Z$_.`-]|[\xC2-\xDF][\x80-\xBF])+)/is', $query, $maybe ) ) {
+ return str_replace( '`', '', $maybe[1] );
+ }
+
+ // SHOW TABLE STATUS and SHOW TABLES
+ if ( preg_match( '/^\s*(?:'
+ . 'SHOW\s+TABLE\s+STATUS.+(?:LIKE\s+|WHERE\s+Name\s*=\s*)'
+ . '|SHOW\s+(?:FULL\s+)?TABLES.+(?:LIKE\s+|WHERE\s+Name\s*=\s*)'
+ . ')\W((?:[0-9a-zA-Z$_.`-]|[\xC2-\xDF][\x80-\xBF])+)\W/is', $query, $maybe ) ) {
+ return str_replace( '`', '', $maybe[1] );
+ }
+
+ // Big pattern for the rest of the table-related queries.
+ if ( preg_match( '/^\s*(?:'
+ . '(?:EXPLAIN\s+(?:EXTENDED\s+)?)?SELECT.*?\s+FROM'
+ . '|DESCRIBE|DESC|EXPLAIN|HANDLER'
+ . '|(?:LOCK|UNLOCK)\s+TABLE(?:S)?'
+ . '|(?:RENAME|OPTIMIZE|BACKUP|RESTORE|CHECK|CHECKSUM|ANALYZE|REPAIR).*\s+TABLE'
+ . '|TRUNCATE(?:\s+TABLE)?'
+ . '|CREATE(?:\s+TEMPORARY)?\s+TABLE(?:\s+IF\s+NOT\s+EXISTS)?'
+ . '|ALTER(?:\s+IGNORE)?\s+TABLE'
+ . '|DROP\s+TABLE(?:\s+IF\s+EXISTS)?'
+ . '|CREATE(?:\s+\w+)?\s+INDEX.*\s+ON'
+ . '|DROP\s+INDEX.*\s+ON'
+ . '|LOAD\s+DATA.*INFILE.*INTO\s+TABLE'
+ . '|(?:GRANT|REVOKE).*ON\s+TABLE'
+ . '|SHOW\s+(?:.*FROM|.*TABLE)'
+ . ')\s+\(*\s*((?:[0-9a-zA-Z$_.`-]|[\xC2-\xDF][\x80-\xBF])+)\s*\)*/is', $query, $maybe ) ) {
+ return str_replace( '`', '', $maybe[1] );
+ }
+
+ return false;
+ }
+