+/**
+ * Separate HTML elements and comments from the text.
+ *
+ * @since 4.2.4
+ *
+ * @param string $input The text which has to be formatted.
+ * @return array The formatted text.
+ */
+function wp_html_split( $input ) {
+ return preg_split( get_html_split_regex(), $input, -1, PREG_SPLIT_DELIM_CAPTURE );
+}
+
+/**
+ * Retrieve the regular expression for an HTML element.
+ *
+ * @since 4.4.0
+ *
+ * @return string The regular expression
+ */
+function get_html_split_regex() {
+ static $regex;
+
+ if ( ! isset( $regex ) ) {
+ $comments =
+ '!' // Start of comment, after the <.
+ . '(?:' // Unroll the loop: Consume everything until --> is found.
+ . '-(?!->)' // Dash not followed by end of comment.
+ . '[^\-]*+' // Consume non-dashes.
+ . ')*+' // Loop possessively.
+ . '(?:-->)?'; // End of comment. If not found, match all input.
+
+ $cdata =
+ '!\[CDATA\[' // Start of comment, after the <.
+ . '[^\]]*+' // Consume non-].
+ . '(?:' // Unroll the loop: Consume everything until ]]> is found.
+ . '](?!]>)' // One ] not followed by end of comment.
+ . '[^\]]*+' // Consume non-].
+ . ')*+' // Loop possessively.
+ . '(?:]]>)?'; // End of comment. If not found, match all input.
+
+ $escaped =
+ '(?=' // Is the element escaped?
+ . '!--'
+ . '|'
+ . '!\[CDATA\['
+ . ')'
+ . '(?(?=!-)' // If yes, which type?
+ . $comments
+ . '|'
+ . $cdata
+ . ')';
+
+ $regex =
+ '/(' // Capture the entire match.
+ . '<' // Find start of element.
+ . '(?' // Conditional expression follows.
+ . $escaped // Find end of escaped element.
+ . '|' // ... else ...
+ . '[^>]*>?' // Find end of normal element.
+ . ')'
+ . ')/';
+ }
+
+ return $regex;
+}
+
+/**
+ * Retrieve the combined regular expression for HTML and shortcodes.
+ *
+ * @access private
+ * @ignore
+ * @internal This function will be removed in 4.5.0 per Shortcode API Roadmap.
+ * @since 4.4.0
+ *
+ * @param string $shortcode_regex The result from _get_wptexturize_shortcode_regex(). Optional.
+ * @return string The regular expression
+ */
+function _get_wptexturize_split_regex( $shortcode_regex = '' ) {
+ static $html_regex;
+
+ if ( ! isset( $html_regex ) ) {
+ $comment_regex =
+ '!' // Start of comment, after the <.
+ . '(?:' // Unroll the loop: Consume everything until --> is found.
+ . '-(?!->)' // Dash not followed by end of comment.
+ . '[^\-]*+' // Consume non-dashes.
+ . ')*+' // Loop possessively.
+ . '(?:-->)?'; // End of comment. If not found, match all input.
+
+ $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap.
+ '<' // Find start of element.
+ . '(?(?=!--)' // Is this a comment?
+ . $comment_regex // Find end of comment.
+ . '|'
+ . '[^>]*>?' // Find end of element. If not found, match all input.
+ . ')';
+ }
+
+ if ( empty( $shortcode_regex ) ) {
+ $regex = '/(' . $html_regex . ')/';
+ } else {
+ $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/';
+ }
+
+ return $regex;
+}
+
+/**
+ * Retrieve the regular expression for shortcodes.
+ *
+ * @access private
+ * @ignore
+ * @internal This function will be removed in 4.5.0 per Shortcode API Roadmap.
+ * @since 4.4.0
+ *
+ * @param array $tagnames List of shortcodes to find.
+ * @return string The regular expression
+ */
+function _get_wptexturize_shortcode_regex( $tagnames ) {
+ $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
+ $tagregexp = "(?:$tagregexp)(?=[\\s\\]\\/])"; // Excerpt of get_shortcode_regex().
+ $regex =
+ '\[' // Find start of shortcode.
+ . '[\/\[]?' // Shortcodes may begin with [/ or [[
+ . $tagregexp // Only match registered shortcodes, because performance.
+ . '(?:'
+ . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical.
+ . '|'
+ . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.
+ . ')*+' // Possessive critical.
+ . '\]' // Find end of shortcode.
+ . '\]?'; // Shortcodes may end with ]]
+
+ return $regex;
+}
+