+/**
+ * Search only inside HTML elements for shortcodes and process them.
+ *
+ * Any [ or ] characters remaining inside elements will be HTML encoded
+ * to prevent interference with shortcodes that are outside the elements.
+ * Assumes $content processed by KSES already. Users with unfiltered_html
+ * capability may get unexpected output if angle braces are nested in tags.
+ *
+ * @since 4.2.3
+ *
+ * @param string $content Content to search for shortcodes
+ * @param bool $ignore_html When true, all square braces inside elements will be encoded.
+ * @return string Content with shortcodes filtered out.
+ */
+function do_shortcodes_in_html_tags( $content, $ignore_html ) {
+ // Normalize entities in unfiltered HTML before adding placeholders.
+ $trans = array( '[' => '[', ']' => ']' );
+ $content = strtr( $content, $trans );
+ $trans = array( '[' => '[', ']' => ']' );
+
+ $pattern = get_shortcode_regex();
+
+ $comment_regex =
+ '!' // Start of comment, after the <.
+ . '(?:' // Unroll the loop: Consume everything until --> is found.
+ . '-(?!->)' // Dash not followed by end of comment.
+ . '[^\-]*+' // Consume non-dashes.
+ . ')*+' // Loop possessively.
+ . '(?:-->)?'; // End of comment. If not found, match all input.
+
+ $regex =
+ '/(' // Capture the entire match.
+ . '<' // Find start of element.
+ . '(?(?=!--)' // Is this a comment?
+ . $comment_regex // Find end of comment.
+ . '|'
+ . '[^>]*>?' // Find end of element. If not found, match all input.
+ . ')'
+ . ')/s';
+
+ $textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
+
+ foreach ( $textarr as &$element ) {
+ if ( '<' !== $element[0] ) {
+ continue;
+ }
+
+ $noopen = false === strpos( $element, '[' );
+ $noclose = false === strpos( $element, ']' );
+ if ( $noopen || $noclose ) {
+ // This element does not contain shortcodes.
+ if ( $noopen xor $noclose ) {
+ // Need to encode stray [ or ] chars.
+ $element = strtr( $element, $trans );
+ }
+ continue;
+ }
+
+ if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
+ // Encode all [ and ] chars.
+ $element = strtr( $element, $trans );
+ continue;
+ }
+
+ $attributes = wp_kses_attr_parse( $element );
+ if ( false === $attributes ) {
+ // Looks like we found some crazy unfiltered HTML. Skipping it for sanity.
+ $element = strtr( $element, $trans );
+ continue;
+ }
+
+ // Get element name
+ $front = array_shift( $attributes );
+ $back = array_pop( $attributes );
+ $matches = array();
+ preg_match('%[a-zA-Z0-9]+%', $front, $matches);
+ $elname = $matches[0];
+
+ // Look for shortcodes in each attribute separately.
+ foreach ( $attributes as &$attr ) {
+ $open = strpos( $attr, '[' );
+ $close = strpos( $attr, ']' );
+ if ( false === $open || false === $close ) {
+ continue; // Go to next attribute. Square braces will be escaped at end of loop.
+ }
+ $double = strpos( $attr, '"' );
+ $single = strpos( $attr, "'" );
+ if ( ( false === $single || $open < $single ) && ( false === $double || $open < $double ) ) {
+ // $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html.
+ // In this specific situation we assume KSES did not run because the input
+ // was written by an administrator, so we should avoid changing the output
+ // and we do not need to run KSES here.
+ $attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr );
+ } else {
+ // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
+ // We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
+ $count = 0;
+ $new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count );
+ if ( $count > 0 ) {
+ // Sanitize the shortcode output using KSES.
+ $new_attr = wp_kses_one_attr( $new_attr, $elname );
+ if ( '' !== $new_attr ) {
+ // The shortcode is safe to use now.
+ $attr = $new_attr;
+ }
+ }
+ }
+ }
+ $element = $front . implode( '', $attributes ) . $back;
+
+ // Now encode any remaining [ or ] chars.
+ $element = strtr( $element, $trans );
+ }
+
+ $content = implode( '', $textarr );
+
+ return $content;
+}
+
+/**
+ * Remove placeholders added by do_shortcodes_in_html_tags().
+ *
+ * @since 4.2.3
+ *
+ * @param string $content Content to search for placeholders.
+ * @return string Content with placeholders removed.
+ */
+function unescape_invalid_shortcodes( $content ) {
+ // Clean up entire string, avoids re-parsing HTML.
+ $trans = array( '[' => '[', ']' => ']' );
+ $content = strtr( $content, $trans );
+
+ return $content;
+}
+