WordPress 4.1.2
[autoinstalls/wordpress.git] / wp-includes / wp-db.php
index 1656df7be76959d8b66f51438f0bac18587e107e..76c41d8bbca4018817ef4f1b26caf63dc37a40bb 100644 (file)
@@ -143,6 +143,43 @@ class wpdb {
         */
        protected $result;
 
+       /**
+        * Cached column info, for sanity checking data before inserting
+        *
+        * @since 4.2.0
+        * @access protected
+        * @var array
+        */
+       protected $col_meta = array();
+
+       /**
+        * Calculated character sets on tables
+        *
+        * @since 4.2.0
+        * @access protected
+        * @var array
+        */
+       protected $table_charset = array();
+
+       /**
+        * Whether text fields in the current query need to be sanity checked.
+        *
+        * @since 4.2.0
+        * @access protected
+        * @var bool
+        */
+       protected $check_current_query = true;
+
+       /**
+        * Flag to ensure we don't run into recursion problems when checking the collation.
+        *
+        * @since 4.2.0
+        * @access private
+        * @see wpdb::check_safe_collation()
+        * @var boolean
+        */
+       private $checking_collation = false;
+
        /**
         * Saved info on the table column
         *
@@ -647,6 +684,14 @@ class wpdb {
         * @param mixed  $value The value to set
         */
        public function __set( $name, $value ) {
+               $protected_members = array(
+                       'col_meta',
+                       'table_charset',
+                       'check_current_query',
+               );
+               if (  in_array( $name, $protected_members, true ) ) {
+                       return;
+               }
                $this->$name = $value;
        }
 
@@ -716,7 +761,7 @@ class wpdb {
                                        $query = $this->prepare( 'SET NAMES %s', $charset );
                                        if ( ! empty( $collate ) )
                                                $query .= $this->prepare( ' COLLATE %s', $collate );
-                                       mysqli_query( $query, $dbh );
+                                       mysqli_query( $dbh, $query );
                                }
                        } else {
                                if ( function_exists( 'mysql_set_charset' ) && $this->has_cap( 'set_charset' ) ) {
@@ -1541,6 +1586,7 @@ class wpdb {
         */
        public function query( $query ) {
                if ( ! $this->ready ) {
+                       $this->check_current_query = true;
                        return false;
                }
 
@@ -1561,6 +1607,19 @@ class wpdb {
                // Log how the function was called
                $this->func_call = "\$db->query(\"$query\")";
 
+               // If we're writing to the database, make sure the query will write safely.
+               if ( $this->check_current_query && ! $this->check_ascii( $query ) ) {
+                       $stripped_query = $this->strip_invalid_text_from_query( $query );
+                       // strip_invalid_text_from_query() can perform queries, so we need
+                       // to flush again, just to make sure everything is clear.
+                       $this->flush();
+                       if ( $stripped_query !== $query ) {
+                               $this->insert_id = 0;
+                               return false;
+                       }
+               }
+
+               $this->check_current_query = true;
                // Keep track of the last query for debug..
                $this->last_query = $query;
 
@@ -1730,23 +1789,29 @@ class wpdb {
         * @return int|false The number of rows affected, or false on error.
         */
        function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) {
-               if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) )
+               if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) {
+                       return false;
+               }
+
+               $data = $this->process_fields( $table, $data, $format );
+               if ( false === $data ) {
                        return false;
-               $this->insert_id = 0;
-               $formats = $format = (array) $format;
-               $fields = array_keys( $data );
-               $formatted_fields = array();
-               foreach ( $fields as $field ) {
-                       if ( !empty( $format ) )
-                               $form = ( $form = array_shift( $formats ) ) ? $form : $format[0];
-                       elseif ( isset( $this->field_types[$field] ) )
-                               $form = $this->field_types[$field];
-                       else
-                               $form = '%s';
-                       $formatted_fields[] = $form;
                }
-               $sql = "{$type} INTO `$table` (`" . implode( '`,`', $fields ) . "`) VALUES (" . implode( ",", $formatted_fields ) . ")";
-               return $this->query( $this->prepare( $sql, $data ) );
+
+               $formats = $values = array();
+               foreach ( $data as $value ) {
+                       $formats[] = $value['format'];
+                       $values[]  = $value['value'];
+               }
+
+               $fields  = '`' . implode( '`, `', array_keys( $data ) ) . '`';
+               $formats = implode( ', ', $formats );
+
+               $sql = "$type INTO `$table` ($fields) VALUES ($formats)";
+
+               $this->insert_id = 0;
+               $this->check_current_query = false;
+               return $this->query( $this->prepare( $sql, $values ) );
        }
 
        /**
@@ -1769,34 +1834,36 @@ class wpdb {
         * @return int|false The number of rows updated, or false on error.
         */
        public function update( $table, $data, $where, $format = null, $where_format = null ) {
-               if ( ! is_array( $data ) || ! is_array( $where ) )
+               if ( ! is_array( $data ) || ! is_array( $where ) ) {
                        return false;
+               }
 
-               $formats = $format = (array) $format;
-               $bits = $wheres = array();
-               foreach ( (array) array_keys( $data ) as $field ) {
-                       if ( !empty( $format ) )
-                               $form = ( $form = array_shift( $formats ) ) ? $form : $format[0];
-                       elseif ( isset($this->field_types[$field]) )
-                               $form = $this->field_types[$field];
-                       else
-                               $form = '%s';
-                       $bits[] = "`$field` = {$form}";
+               $data = $this->process_fields( $table, $data, $format );
+               if ( false === $data ) {
+                       return false;
+               }
+               $where = $this->process_fields( $table, $where, $where_format );
+               if ( false === $where ) {
+                       return false;
                }
 
-               $where_formats = $where_format = (array) $where_format;
-               foreach ( (array) array_keys( $where ) as $field ) {
-                       if ( !empty( $where_format ) )
-                               $form = ( $form = array_shift( $where_formats ) ) ? $form : $where_format[0];
-                       elseif ( isset( $this->field_types[$field] ) )
-                               $form = $this->field_types[$field];
-                       else
-                               $form = '%s';
-                       $wheres[] = "`$field` = {$form}";
+               $fields = $conditions = $values = array();
+               foreach ( $data as $field => $value ) {
+                       $fields[] = "`$field` = " . $value['format'];
+                       $values[] = $value['value'];
                }
+               foreach ( $where as $field => $value ) {
+                       $conditions[] = "`$field` = " . $value['format'];
+                       $values[] = $value['value'];
+               }
+
+               $fields = implode( ', ', $fields );
+               $conditions = implode( ' AND ', $conditions );
 
-               $sql = "UPDATE `$table` SET " . implode( ', ', $bits ) . ' WHERE ' . implode( ' AND ', $wheres );
-               return $this->query( $this->prepare( $sql, array_merge( array_values( $data ), array_values( $where ) ) ) );
+               $sql = "UPDATE `$table` SET $fields WHERE $conditions";
+
+               $this->check_current_query = false;
+               return $this->query( $this->prepare( $sql, $values ) );
        }
 
        /**
@@ -1816,27 +1883,132 @@ class wpdb {
         * @return int|false The number of rows updated, or false on error.
         */
        public function delete( $table, $where, $where_format = null ) {
-               if ( ! is_array( $where ) )
+               if ( ! is_array( $where ) ) {
+                       return false;
+               }
+
+               $where = $this->process_fields( $table, $where, $where_format );
+               if ( false === $where ) {
+                       return false;
+               }
+
+               $conditions = $values = array();
+               foreach ( $where as $field => $value ) {
+                       $conditions[] = "`$field` = " . $value['format'];
+                       $values[] = $value['value'];
+               }
+
+               $conditions = implode( ' AND ', $conditions );
+
+               $sql = "DELETE FROM `$table` WHERE $conditions";
+
+               $this->check_current_query = false;
+               return $this->query( $this->prepare( $sql, $values ) );
+       }
+
+       /**
+        * Processes arrays of field/value pairs and field formats.
+        *
+        * This is a helper method for wpdb's CRUD methods, which take field/value
+        * pairs for inserts, updates, and where clauses. This method first pairs
+        * each value with a format. Then it determines the charset of that field,
+        * using that to determine if any invalid text would be stripped. If text is
+        * stripped, then field processing is rejected and the query fails.
+        *
+        * @since 4.2.0
+        * @access protected
+        *
+        * @param string $table  Table name.
+        * @param array  $data   Field/value pair.
+        * @param mixed  $format Format for each field.
+        * @return array|bool Returns an array of fields that contain paired values
+        *                    and formats. Returns false for invalid values.
+        */
+       protected function process_fields( $table, $data, $format ) {
+               $data = $this->process_field_formats( $data, $format );
+               $data = $this->process_field_charsets( $data, $table );
+               if ( false === $data ) {
                        return false;
+               }
 
-               $wheres = array();
+               $converted_data = $this->strip_invalid_text( $data );
 
-               $where_formats = $where_format = (array) $where_format;
+               if ( $data !== $converted_data ) {
+                       return false;
+               }
 
-               foreach ( array_keys( $where ) as $field ) {
-                       if ( !empty( $where_format ) ) {
-                               $form = ( $form = array_shift( $where_formats ) ) ? $form : $where_format[0];
+               return $data;
+       }
+
+       /**
+        * Prepares arrays of value/format pairs as passed to wpdb CRUD methods.
+        *
+        * @since 4.2.0
+        * @access protected
+        *
+        * @param array $data   Array of fields to values.
+        * @param mixed $format Formats to be mapped to the values in $data.
+        * @return array Array, keyed by field names with values being an array
+        *               of 'value' and 'format' keys.
+        */
+       protected function process_field_formats( $data, $format ) {
+               $formats = $original_formats = (array) $format;
+
+               foreach ( $data as $field => $value ) {
+                       $value = array(
+                               'value'  => $value,
+                               'format' => '%s',
+                       );
+
+                       if ( ! empty( $format ) ) {
+                               $value['format'] = array_shift( $formats );
+                               if ( ! $value['format'] ) {
+                                       $value['format'] = reset( $original_formats );
+                               }
                        } elseif ( isset( $this->field_types[ $field ] ) ) {
-                               $form = $this->field_types[ $field ];
+                               $value['format'] = $this->field_types[ $field ];
+                       }
+
+                       $data[ $field ] = $value;
+               }
+
+               return $data;
+       }
+
+       /**
+        * Adds field charsets to field/value/format arrays generated by
+        * the {@see wpdb::process_field_formats()} method.
+        *
+        * @since 4.2.0
+        * @access protected
+        *
+        * @param array  $data  As it comes from the {@see wpdb::process_field_formats()} method.
+        * @param string $table Table name.
+        * @return The same array as $data with additional 'charset' keys.
+        */
+       protected function process_field_charsets( $data, $table ) {
+               foreach ( $data as $field => $value ) {
+                       if ( '%d' === $value['format'] || '%f' === $value['format'] ) {
+                               // We can skip this field if we know it isn't a string.
+                               // This checks %d/%f versus ! %s because it's sprintf() could take more.
+                               $value['charset'] = false;
+                       } elseif ( $this->check_ascii( $value['value'] ) ) {
+                               // If it's ASCII, then we don't need the charset. We can skip this field.
+                               $value['charset'] = false;
                        } else {
-                               $form = '%s';
+                               $value['charset'] = $this->get_col_charset( $table, $field );
+                               if ( is_wp_error( $value['charset'] ) ) {
+                                       return false;
+                               }
+
+                               // This isn't ASCII. Don't have strip_invalid_text() re-check.
+                               $value['ascii'] = false;
                        }
 
-                       $wheres[] = "$field = $form";
+                       $data[ $field ] = $value;
                }
 
-               $sql = "DELETE FROM $table WHERE " . implode( ' AND ', $wheres );
-               return $this->query( $this->prepare( $sql, $where ) );
+               return $data;
        }
 
        /**
@@ -1856,6 +2028,10 @@ class wpdb {
        public function get_var( $query = null, $x = 0, $y = 0 ) {
                $this->func_call = "\$db->get_var(\"$query\", $x, $y)";
 
+               if ( $this->check_safe_collation( $query ) ) {
+                       $this->check_current_query = false;
+               }
+
                if ( $query ) {
                        $this->query( $query );
                }
@@ -1884,6 +2060,11 @@ class wpdb {
         */
        public function get_row( $query = null, $output = OBJECT, $y = 0 ) {
                $this->func_call = "\$db->get_row(\"$query\",$output,$y)";
+
+               if ( $this->check_safe_collation( $query ) ) {
+                       $this->check_current_query = false;
+               }
+
                if ( $query ) {
                        $this->query( $query );
                } else {
@@ -1921,6 +2102,10 @@ class wpdb {
         * @return array Database query result. Array indexed from 0 by SQL result row number.
         */
        public function get_col( $query = null , $x = 0 ) {
+               if ( $this->check_safe_collation( $query ) ) {
+                       $this->check_current_query = false;
+               }
+
                if ( $query ) {
                        $this->query( $query );
                }
@@ -1949,6 +2134,10 @@ class wpdb {
        public function get_results( $query = null, $output = OBJECT ) {
                $this->func_call = "\$db->get_results(\"$query\", $output)";
 
+               if ( $this->check_safe_collation( $query ) ) {
+                       $this->check_current_query = false;
+               }
+
                if ( $query ) {
                        $this->query( $query );
                } else {
@@ -1990,6 +2179,526 @@ class wpdb {
                return null;
        }
 
+       /**
+        * Retrieves the character set for the given table.
+        *
+        * @since 4.2.0
+        * @access protected
+        *
+        * @param string $table Table name.
+        * @return string|WP_Error Table character set, {@see WP_Error} object if it couldn't be found.
+        */
+       protected function get_table_charset( $table ) {
+               $tablekey = strtolower( $table );
+
+               /**
+                * Filter the table charset value before the DB is checked.
+                *
+                * Passing a non-null value to the filter will effectively short-circuit
+                * checking the DB for the charset, returning that value instead.
+                *
+                * @since 4.2.0
+                *
+                * @param string $charset The character set to use. Default null.
+                * @param string $table   The name of the table being checked.
+                */
+               $charset = apply_filters( 'pre_get_table_charset', null, $table );
+               if ( null !== $charset ) {
+                       return $charset;
+               }
+
+               if ( isset( $this->table_charset[ $tablekey ] ) ) {
+                       return $this->table_charset[ $tablekey ];
+               }
+
+               $charsets = $columns = array();
+               $results = $this->get_results( "SHOW FULL COLUMNS FROM `$table`" );
+               if ( ! $results ) {
+                       return new WP_Error( 'wpdb_get_table_charset_failure' );
+               }
+
+               foreach ( $results as $column ) {
+                       $columns[ strtolower( $column->Field ) ] = $column;
+               }
+
+               $this->col_meta[ $tablekey ] = $columns;
+
+               foreach ( $columns as $column ) {
+                       if ( ! empty( $column->Collation ) ) {
+                               list( $charset ) = explode( '_', $column->Collation );
+                               $charsets[ strtolower( $charset ) ] = true;
+                       }
+
+                       list( $type ) = explode( '(', $column->Type );
+
+                       // A binary/blob means the whole query gets treated like this.
+                       if ( in_array( strtoupper( $type ), array( 'BINARY', 'VARBINARY', 'TINYBLOB', 'MEDIUMBLOB', 'BLOB', 'LONGBLOB' ) ) ) {
+                               $this->table_charset[ $tablekey ] = 'binary';
+                               return 'binary';
+                       }
+               }
+
+               // utf8mb3 is an alias for utf8.
+               if ( isset( $charsets['utf8mb3'] ) ) {
+                       $charsets['utf8'] = true;
+                       unset( $charsets['utf8mb3'] );
+               }
+
+               // Check if we have more than one charset in play.
+               $count = count( $charsets );
+               if ( 1 === $count ) {
+                       $charset = key( $charsets );
+               } elseif ( 0 === $count ) {
+                       // No charsets, assume this table can store whatever.
+                       $charset = false;
+               } else {
+                       // More than one charset. Remove latin1 if present and recalculate.
+                       unset( $charsets['latin1'] );
+                       $count = count( $charsets );
+                       if ( 1 === $count ) {
+                               // Only one charset (besides latin1).
+                               $charset = key( $charsets );
+                       } elseif ( 2 === $count && isset( $charsets['utf8'], $charsets['utf8mb4'] ) ) {
+                               // Two charsets, but they're utf8 and utf8mb4, use utf8.
+                               $charset = 'utf8';
+                       } else {
+                               // Two mixed character sets. ascii.
+                               $charset = 'ascii';
+                       }
+               }
+
+               $this->table_charset[ $tablekey ] = $charset;
+               return $charset;
+       }
+
+       /**
+        * Retrieves the character set for the given column.
+        *
+        * @since 4.2.0
+        * @access public
+        *
+        * @param string $table  Table name.
+        * @param string $column Column name.
+        * @return mixed Column character set as a string. False if the column has no
+        *               character set. {@see WP_Error} object if there was an error.
+        */
+       public function get_col_charset( $table, $column ) {
+               $tablekey = strtolower( $table );
+               $columnkey = strtolower( $column );
+
+               /**
+                * Filter the column charset value before the DB is checked.
+                *
+                * Passing a non-null value to the filter will short-circuit
+                * checking the DB for the charset, returning that value instead.
+                *
+                * @since 4.2.0
+                *
+                * @param string $charset The character set to use. Default null.
+                * @param string $table   The name of the table being checked.
+                * @param string $column  The name of the column being checked.
+                */
+               $charset = apply_filters( 'pre_get_col_charset', null, $table, $column );
+               if ( null !== $charset ) {
+                       return $charset;
+               }
+
+               // Skip this entirely if this isn't a MySQL database.
+               if ( false === $this->is_mysql ) {
+                       return false;
+               }
+
+               if ( empty( $this->table_charset[ $tablekey ] ) ) {
+                       // This primes column information for us.
+                       $table_charset = $this->get_table_charset( $table );
+                       if ( is_wp_error( $table_charset ) ) {
+                               return $table_charset;
+                       }
+               }
+
+               // If still no column information, return the table charset.
+               if ( empty( $this->col_meta[ $tablekey ] ) ) {
+                       return $this->table_charset[ $tablekey ];
+               }
+
+               // If this column doesn't exist, return the table charset.
+               if ( empty( $this->col_meta[ $tablekey ][ $columnkey ] ) ) {
+                       return $this->table_charset[ $tablekey ];
+               }
+
+               // Return false when it's not a string column.
+               if ( empty( $this->col_meta[ $tablekey ][ $columnkey ]->Collation ) ) {
+                       return false;
+               }
+
+               list( $charset ) = explode( '_', $this->col_meta[ $tablekey ][ $columnkey ]->Collation );
+               return $charset;
+       }
+
+       /**
+        * Check if a string is ASCII.
+        *
+        * The negative regex is faster for non-ASCII strings, as it allows
+        * the search to finish as soon as it encounters a non-ASCII character.
+        *
+        * @since 4.2.0
+        * @access protected
+        *
+        * @param string $string String to check.
+        * @return bool True if ASCII, false if not.
+        */
+       protected function check_ascii( $string ) {
+               if ( function_exists( 'mb_check_encoding' ) ) {
+                       if ( mb_check_encoding( $string, 'ASCII' ) ) {
+                               return true;
+                       }
+               } elseif ( ! preg_match( '/[^\x00-\x7F]/', $string ) ) {
+                       return true;
+               }
+
+               return false;
+       }
+
+       /**
+        * Check if the query is accessing a collation considered safe on the current version of MySQL.
+        *
+        * @since 4.2.0
+        * @access protected
+        *
+        * @param string $query The query to check.
+        * @return bool True if the collation is safe, false if it isn't.
+        */
+       protected function check_safe_collation( $query ) {
+               if ( $this->checking_collation ) {
+                       return true;
+               }
+
+               // We don't need to check the collation for queries that don't read data.
+               $query = ltrim( $query, "\r\n\t (" );
+               if ( preg_match( '/^(?:SHOW|DESCRIBE|DESC|EXPLAIN)\s/i', $query ) ) {
+                       return true;
+               }
+
+               // All-ASCII queries don't need extra checking.
+               if ( $this->check_ascii( $query ) ) {
+                       return true;
+               }
+
+               $table = $this->get_table_from_query( $query );
+               if ( ! $table ) {
+                       return false;
+               }
+
+               $this->checking_collation = true;
+               $collation = $this->get_table_charset( $table );
+               $this->checking_collation = false;
+
+               // Tables with no collation, or latin1 only, don't need extra checking.
+               if ( false === $collation || 'latin1' === $collation ) {
+                       return true;
+               }
+
+               $table = strtolower( $table );
+               if ( empty( $this->col_meta[ $table ] ) ) {
+                       return false;
+               }
+
+               // If any of the columns don't have one of these collations, it needs more sanity checking.
+               foreach( $this->col_meta[ $table ] as $col ) {
+                       if ( empty( $col->Collation ) ) {
+                               continue;
+                       }
+
+                       if ( ! in_array( $col->Collation, array( 'utf8_general_ci', 'utf8_bin', 'utf8mb4_general_ci', 'utf8mb4_bin' ), true ) ) {
+                               return false;
+                       }
+               }
+
+               return true;
+       }
+
+       /**
+        * Strips any invalid characters based on value/charset pairs.
+        *
+        * @since 4.2.0
+        * @access protected
+        *
+        * @param array $data Array of value arrays. Each value array has the keys
+        *                    'value' and 'charset'. An optional 'ascii' key can be
+        *                    set to false to avoid redundant ASCII checks.
+        * @return array|WP_Error The $data parameter, with invalid characters removed from
+        *                        each value. This works as a passthrough: any additional keys
+        *                        such as 'field' are retained in each value array. If we cannot
+        *                        remove invalid characters, a {@see WP_Error} object is returned.
+        */
+               // If any of the columns don't have one of these collations, it needs more sanity checking.
+       protected function strip_invalid_text( $data ) {
+               // Some multibyte character sets that we can check in PHP.
+               $mb_charsets = array(
+                       'ascii'   => 'ASCII',
+                       'big5'    => 'BIG-5',
+                       'eucjpms' => 'eucJP-win',
+                       'gb2312'  => 'EUC-CN',
+                       'ujis'    => 'EUC-JP',
+                       'utf32'   => 'UTF-32',
+               );
+
+               $supported_charsets = array();
+               if ( function_exists( 'mb_list_encodings' ) ) {
+                       $supported_charsets = mb_list_encodings();
+               }
+
+               $db_check_string = false;
+
+               foreach ( $data as &$value ) {
+                       $charset = $value['charset'];
+
+                       // Column isn't a string, or is latin1, which will will happily store anything.
+                       if ( false === $charset || 'latin1' === $charset ) {
+                               continue;
+                       }
+
+                       if ( ! is_string( $value['value'] ) ) {
+                               continue;
+                       }
+
+                       // ASCII is always OK.
+                       if ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) {
+                               continue;
+                       }
+
+                       // Convert the text locally.
+                       if ( $supported_charsets ) {
+                               if ( isset( $mb_charsets[ $charset ] ) && in_array( $mb_charsets[ $charset ], $supported_charsets ) ) {
+                                       $value['value'] = mb_convert_encoding( $value['value'], $mb_charsets[ $charset ], $mb_charsets[ $charset ] );
+                                       continue;
+                               }
+                       }
+
+                       // utf8 can be handled by regex, which is a bunch faster than a DB lookup.
+                       if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) {
+                               $regex = '/
+                                       (
+                                               (?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
+                                               |   [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
+                                               |   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
+                                               |   [\xE1-\xEC][\x80-\xBF]{2}
+                                               |   \xED[\x80-\x9F][\x80-\xBF]
+                                               |   [\xEE-\xEF][\x80-\xBF]{2}';
+
+                               if ( 'utf8mb4' === $charset) {
+                                       $regex .= '
+                                               |    \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences   11110xxx 10xxxxxx * 3
+                                               |    [\xF1-\xF3][\x80-\xBF]{3}
+                                               |    \xF4[\x80-\x8F][\x80-\xBF]{2}
+                                       ';
+                               }
+
+                               $regex .= '){1,50}                          # ...one or more times
+                                       )
+                                       | .                                  # anything else
+                                       /x';
+                               $value['value'] = preg_replace( $regex, '$1', $value['value'] );
+                               continue;
+                       }
+
+                       // We couldn't use any local conversions, send it to the DB.
+                       $value['db'] = $db_check_string = true;
+               }
+               unset( $value ); // Remove by reference.
+
+               if ( $db_check_string ) {
+                       $queries = array();
+                       foreach ( $data as $col => $value ) {
+                               if ( ! empty( $value['db'] ) ) {
+                                       if ( ! isset( $queries[ $value['charset'] ] ) ) {
+                                               $queries[ $value['charset'] ] = array();
+                                       }
+
+                                       // Split the CONVERT() calls by charset, so we can make sure the connection is right
+                                       $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( %s USING {$value['charset']} )", $value['value'] );
+                               }
+                       }
+
+                       $connection_charset = $this->charset;
+                       foreach ( $queries as $charset => $query ) {
+                               if ( ! $query ) {
+                                       continue;
+                               }
+
+                               // Change the charset to match the string(s) we're converting
+                               if ( $charset !== $connection_charset ) {
+                                       $connection_charset = $charset;
+                                       $this->set_charset( $this->dbh, $charset );
+                               }
+
+                               $this->check_current_query = false;
+
+                               $row = $this->get_row( "SELECT " . implode( ', ', $query ), ARRAY_N );
+                               if ( ! $row ) {
+                                       $this->set_charset( $this->dbh, $connection_charset );
+                                       return new WP_Error( 'wpdb_strip_invalid_text_failure' );
+                               }
+
+                               $cols = array_keys( $query );
+                               $col_count = count( $cols );
+                               for ( $ii = 0; $ii < $col_count; $ii++ ) {
+                                       $data[ $cols[ $ii ] ]['value'] = $row[ $ii ];
+                               }
+                       }
+
+                       // Don't forget to change the charset back!
+                       if ( $connection_charset !== $this->charset ) {
+                               $this->set_charset( $this->dbh );
+                       }
+               }
+
+               return $data;
+       }
+
+       /**
+        * Strips any invalid characters from the query.
+        *
+        * @since 4.2.0
+        * @access protected
+        *
+        * @param string $query Query to convert.
+        * @return string|WP_Error The converted query, or a {@see WP_Error} object if the conversion fails.
+        */
+       protected function strip_invalid_text_from_query( $query ) {
+               $table = $this->get_table_from_query( $query );
+               if ( $table ) {
+                       $charset = $this->get_table_charset( $table );
+                       if ( is_wp_error( $charset ) ) {
+                               return $charset;
+                       }
+
+                       // We can't reliably strip text from tables containing binary/blob columns
+                       if ( 'binary' === $charset ) {
+                               return $query;
+                       }
+               } else {
+                       $charset = $this->charset;
+               }
+
+               $data = array(
+                       'value'   => $query,
+                       'charset' => $charset,
+                       'ascii'   => false,
+               );
+
+               $data = $this->strip_invalid_text( array( $data ) );
+               if ( is_wp_error( $data ) ) {
+                       return $data;
+               }
+
+               return $data[0]['value'];
+       }
+
+       /**
+        * Strips any invalid characters from the string for a given table and column.
+        *
+        * @since 4.2.0
+        * @access public
+        *
+        * @param string $table  Table name.
+        * @param string $column Column name.
+        * @param string $value  The text to check.
+        * @return string|WP_Error The converted string, or a `WP_Error` object if the conversion fails.
+        */
+       public function strip_invalid_text_for_column( $table, $column, $value ) {
+               if ( ! is_string( $value ) || $this->check_ascii( $value ) ) {
+                       return $value;
+               }
+
+               $charset = $this->get_col_charset( $table, $column );
+               if ( ! $charset ) {
+                       // Not a string column.
+                       return $value;
+               } elseif ( is_wp_error( $charset ) ) {
+                       // Bail on real errors.
+                       return $charset;
+               }
+
+               $data = array(
+                       $column => array(
+                               'value'   => $value,
+                               'charset' => $charset,
+                               'ascii'   => false,
+                       )
+               );
+
+               $data = $this->strip_invalid_text( $data );
+               if ( is_wp_error( $data ) ) {
+                       return $data;
+               }
+
+               return $data[ $column ]['value'];
+       }
+
+       /**
+        * Find the first table name referenced in a query.
+        *
+        * @since 4.2.0
+        * @access protected
+        *
+        * @param string $query The query to search.
+        * @return string|false $table The table name found, or false if a table couldn't be found.
+        */
+       protected function get_table_from_query( $query ) {
+               // Remove characters that can legally trail the table name.
+               $query = rtrim( $query, ';/-#' );
+
+               // Allow (select...) union [...] style queries. Use the first query's table name.
+               $query = ltrim( $query, "\r\n\t (" );
+
+               /*
+                * Strip everything between parentheses except nested selects and use only 1,000
+                * chars of the query.
+                */
+               $query = preg_replace( '/\((?!\s*select)[^(]*?\)/is', '()', substr( $query, 0, 1000 ) );
+
+               // Quickly match most common queries.
+               if ( preg_match( '/^\s*(?:'
+                               . 'SELECT.*?\s+FROM'
+                               . '|INSERT(?:\s+LOW_PRIORITY|\s+DELAYED|\s+HIGH_PRIORITY)?(?:\s+IGNORE)?(?:\s+INTO)?'
+                               . '|REPLACE(?:\s+LOW_PRIORITY|\s+DELAYED)?(?:\s+INTO)?'
+                               . '|UPDATE(?:\s+LOW_PRIORITY)?(?:\s+IGNORE)?'
+                               . '|DELETE(?:\s+LOW_PRIORITY|\s+QUICK|\s+IGNORE)*(?:\s+FROM)?'
+                               . ')\s+`?([\w-]+)`?/is', $query, $maybe ) ) {
+                       return $maybe[1];
+               }
+
+               // SHOW TABLE STATUS and SHOW TABLES
+               if ( preg_match( '/^\s*(?:'
+                               . 'SHOW\s+TABLE\s+STATUS.+(?:LIKE\s+|WHERE\s+Name\s*=\s*)'
+                               . '|SHOW\s+(?:FULL\s+)?TABLES.+(?:LIKE\s+|WHERE\s+Name\s*=\s*)'
+                               . ')\W([\w-]+)\W/is', $query, $maybe ) ) {
+                       return $maybe[1];
+               }
+
+               // Big pattern for the rest of the table-related queries.
+               if ( preg_match( '/^\s*(?:'
+                               . '(?:EXPLAIN\s+(?:EXTENDED\s+)?)?SELECT.*?\s+FROM'
+                               . '|DESCRIBE|DESC|EXPLAIN|HANDLER'
+                               . '|(?:LOCK|UNLOCK)\s+TABLE(?:S)?'
+                               . '|(?:RENAME|OPTIMIZE|BACKUP|RESTORE|CHECK|CHECKSUM|ANALYZE|REPAIR).*\s+TABLE'
+                               . '|TRUNCATE(?:\s+TABLE)?'
+                               . '|CREATE(?:\s+TEMPORARY)?\s+TABLE(?:\s+IF\s+NOT\s+EXISTS)?'
+                               . '|ALTER(?:\s+IGNORE)?\s+TABLE'
+                               . '|DROP\s+TABLE(?:\s+IF\s+EXISTS)?'
+                               . '|CREATE(?:\s+\w+)?\s+INDEX.*\s+ON'
+                               . '|DROP\s+INDEX.*\s+ON'
+                               . '|LOAD\s+DATA.*INFILE.*INTO\s+TABLE'
+                               . '|(?:GRANT|REVOKE).*ON\s+TABLE'
+                               . '|SHOW\s+(?:.*FROM|.*TABLE)'
+                               . ')\s+\(*\s*`?([\w-]+)`?\s*\)*/is', $query, $maybe ) ) {
+                       return $maybe[1];
+               }
+
+               return false;
+       }
+
        /**
         * Load the column metadata from the last query.
         *