X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/wordpress.git/blobdiff_plain/96bc8e88cf39086a9e0a883b8e2c311fe82a5e97..9c2096d803812dacbdf6cf8efe90053e39f00b96:/wp-includes/formatting.php diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 3452ed2a..d43e848f 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -1350,9 +1350,17 @@ function antispambot($emailaddy, $mailto=0) { */ function _make_url_clickable_cb($matches) { $url = $matches[2]; - $suffix = ''; - /** Include parentheses in the URL only if paired **/ + if ( ')' == $matches[3] && strpos( $url, '(' ) ) { + // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, add the closing parenthesis to the URL. + // Then we can let the parenthesis balancer do its thing below. + $url .= $matches[3]; + $suffix = ''; + } else { + $suffix = $matches[3]; + } + + // Include parentheses in the URL only if paired while ( substr_count( $url, '(' ) < substr_count( $url, ')' ) ) { $suffix = strrchr( $url, ')' ) . $suffix; $url = substr( $url, 0, strrpos( $url, ')' ) ); @@ -1418,23 +1426,117 @@ function _make_email_clickable_cb($matches) { * * @since 0.71 * - * @param string $ret Content to convert URIs. + * @param string $text Content to convert URIs. * @return string Content with converted URIs. */ -function make_clickable($ret) { - $ret = ' ' . $ret; - // in testing, using arrays here was found to be faster - $save = @ini_set('pcre.recursion_limit', 10000); - $retval = preg_replace_callback('#(?])(\()?([\w]+?://(?:[\w\\x80-\\xff\#%~/?@\[\]-]{1,2000}|[\'*(+.,;:!=&$](?![\b\)]|(\))?([\s]|$))|(?(1)\)(?![\s<.,;:]|$)|\)))+)#is', '_make_url_clickable_cb', $ret); - if (null !== $retval ) - $ret = $retval; - @ini_set('pcre.recursion_limit', $save); - $ret = preg_replace_callback('#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret); - $ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret); - // this one is not in an array because we need it to run last, for cleanup of accidental links within links - $ret = preg_replace("#(]+?>|>))]+?>([^>]+?)#i", "$1$3", $ret); - $ret = trim($ret); - return $ret; +function make_clickable( $text ) { + $r = ''; + $textarr = preg_split( '/(<[^<>]+>)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // split out HTML tags + foreach ( $textarr as $piece ) { + if ( empty( $piece ) || ( $piece[0] == '<' && ! preg_match('|^<\s*[\w]{1,20}+://|', $piece) ) ) { + $r .= $piece; + continue; + } + + // Long strings might contain expensive edge cases ... + if ( 10000 < strlen( $piece ) ) { + // ... break it up + foreach ( _split_str_by_whitespace( $piece, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses + if ( 2101 < strlen( $chunk ) ) { + $r .= $chunk; // Too big, no whitespace: bail. + } else { + $r .= make_clickable( $chunk ); + } + } + } else { + $ret = " $piece "; // Pad with whitespace to simplify the regexes + + $url_clickable = '~ + ([\\s(<.,;:!?]) # 1: Leading whitespace, or punctuation + ( # 2: URL + [\\w]{1,20}+:// # Scheme and hier-part prefix + (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long + [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character + (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character + [\'.,;:!?)] # Punctuation URL character + [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character + )* + ) + (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing) + ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character. + // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times. + + $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret ); + + $ret = preg_replace_callback( '#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret ); + $ret = preg_replace_callback( '#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret ); + + $ret = substr( $ret, 1, -1 ); // Remove our whitespace padding. + $r .= $ret; + } + } + + // Cleanup of accidental links within links + $r = preg_replace( '#(]+?>|>))]+?>([^>]+?)#i', "$1$3", $r ); + return $r; +} + +/** + * Breaks a string into chunks by splitting at whitespace characters. + * The length of each returned chunk is as close to the specified length goal as possible, + * with the caveat that each chunk includes its trailing delimiter. + * Chunks longer than the goal are guaranteed to not have any inner whitespace. + * + * Joining the returned chunks with empty delimiters reconstructs the input string losslessly. + * + * Input string must have no null characters (or eventual transformations on output chunks must not care about null characters) + * + * + * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234 890 123456789 1234567890a 45678 1 3 5 7 90 ", 10 ) == + * array ( + * 0 => '1234 67890 ', // 11 characters: Perfect split + * 1 => '1234 ', // 5 characters: '1234 67890a' was too long + * 2 => '67890a cd ', // 10 characters: '67890a cd 1234' was too long + * 3 => '1234 890 ', // 11 characters: Perfect split + * 4 => '123456789 ', // 10 characters: '123456789 1234567890a' was too long + * 5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split + * 6 => ' 45678 ', // 11 characters: Perfect split + * 7 => '1 3 5 7 9', // 9 characters: End of $string + * ); + * + * + * @since 3.4.0 + * @access private + * + * @param string $string The string to split + * @param int $goal The desired chunk length. + * @return array Numeric array of chunks. + */ +function _split_str_by_whitespace( $string, $goal ) { + $chunks = array(); + + $string_nullspace = strtr( $string, "\r\n\t\v\f ", "\000\000\000\000\000\000" ); + + while ( $goal < strlen( $string_nullspace ) ) { + $pos = strrpos( substr( $string_nullspace, 0, $goal + 1 ), "\000" ); + + if ( false === $pos ) { + $pos = strpos( $string_nullspace, "\000", $goal + 1 ); + if ( false === $pos ) { + break; + } + } + + $chunks[] = substr( $string, 0, $pos + 1 ); + $string = substr( $string, $pos + 1 ); + $string_nullspace = substr( $string_nullspace, $pos + 1 ); + } + + if ( $string ) { + $chunks[] = $string; + } + + return $chunks; } /**