]> scripts.mit.edu Git - autoinstalls/wordpress.git/blobdiff - wp-includes/canonical.php
Wordpress 2.8-scripts
[autoinstalls/wordpress.git] / wp-includes / canonical.php
index e4a5c5309f15ae5bf270aef0b1d9ba12ccffd988..c17e2f13258049fecfe19bc08c4c0ff98a54a618 100644 (file)
@@ -1,15 +1,50 @@
 <?php
-// Based on "Permalink Redirect" from Scott Yang and "Enforce www. Preference" by Mark Jaquith
+/**
+ * Canonical API to handle WordPress Redirecting
+ *
+ * Based on "Permalink Redirect" from Scott Yang and "Enforce www. Preference"
+ * by Mark Jaquith
+ *
+ * @author Scott Yang
+ * @author Mark Jaquith
+ * @package WordPress
+ * @since 2.3.0
+ */
 
-function redirect_canonical($requested_url=NULL, $do_redirect=true) {
-       global $wp_rewrite, $posts, $is_IIS;
+/**
+ * Redirects incoming links to the proper URL based on the site url.
+ *
+ * Search engines consider www.somedomain.com and somedomain.com to be two
+ * different URLs when they both go to the same location. This SEO enhancement
+ * prevents penality for duplicate content by redirecting all incoming links to
+ * one or the other.
+ *
+ * Prevents redirection for feeds, trackbacks, searches, comment popup, and
+ * admin URLs. Does not redirect on IIS, page/post previews, and on form data.
+ *
+ * Will also attempt to find the correct link when a user enters a URL that does
+ * not exist based on exact WordPress query. Will instead try to parse the URL
+ * or query in an attempt to figure the correct page to go to.
+ *
+ * @since 2.3.0
+ * @uses $wp_rewrite
+ * @uses $is_IIS
+ *
+ * @param string $requested_url Optional. The URL that was requested, used to
+ *             figure if redirect is needed.
+ * @param bool $do_redirect Optional. Redirect to the new URL.
+ * @return null|false|string Null, if redirect not needed. False, if redirect
+ *             not needed or the string of the URL
+ */
+function redirect_canonical($requested_url=null, $do_redirect=true) {
+       global $wp_rewrite, $is_IIS, $wp_query, $wpdb;
 
-       if ( is_feed() || is_trackback() || is_search() || is_comments_popup() || is_admin() || $is_IIS || ( isset($_POST) && count($_POST) ) || is_preview() )
+       if ( is_trackback() || is_search() || is_comments_popup() || is_admin() || $is_IIS || ( isset($_POST) && count($_POST) ) || is_preview() || is_robots() )
                return;
 
        if ( !$requested_url ) {
                // build the URL in the address bar
-               $requested_url  = ( isset($_SERVER['HTTPS'] ) && strtolower($_SERVER['HTTPS']) == 'on' ) ? 'https://' : 'http://';
+               $requested_url  = ( !empty($_SERVER['HTTPS'] ) && strtolower($_SERVER['HTTPS']) == 'on' ) ? 'https://' : 'http://';
                $requested_url .= $_SERVER['HTTP_HOST'];
                $requested_url .= $_SERVER['REQUEST_URI'];
        }
@@ -19,23 +54,47 @@ function redirect_canonical($requested_url=NULL, $do_redirect=true) {
                return;
 
        // Some PHP setups turn requests for / into /index.php in REQUEST_URI
-       $original['path'] = preg_replace('|/index\.php$|', '/', $original['path']);
+       // See: http://trac.wordpress.org/ticket/5017
+       // See: http://trac.wordpress.org/ticket/7173
+       // Disabled, for now:
+       // $original['path'] = preg_replace('|/index\.php$|', '/', $original['path']);
 
        $redirect = $original;
        $redirect_url = false;
 
+       // Notice fixing
+       if ( !isset($redirect['path']) )  $redirect['path'] = '';
+       if ( !isset($redirect['query']) ) $redirect['query'] = '';
+
+       if ( is_singular() && 1 > $wp_query->post_count && ($id = get_query_var('p')) ) {
+
+               $vars = $wpdb->get_results( $wpdb->prepare("SELECT post_type, post_parent FROM $wpdb->posts WHERE ID = %d", $id) );
+
+               if ( isset($vars[0]) && $vars = $vars[0] ) {
+                       if ( 'revision' == $vars->post_type && $vars->post_parent > 0 )
+                               $id = $vars->post_parent;
+
+                       if ( $redirect_url = get_permalink($id) )
+                               $redirect['query'] = remove_query_arg(array('p', 'page_id', 'attachment_id'), $redirect['query']);
+               }
+       }
+
        // These tests give us a WP-generated permalink
        if ( is_404() ) {
                $redirect_url = redirect_guess_404_permalink();
        } elseif ( is_object($wp_rewrite) && $wp_rewrite->using_permalinks() ) {
                // rewriting of old ?p=X, ?m=2004, ?m=200401, ?m=20040101
-               if ( is_single() && isset($_GET['p']) ) {
+               if ( is_single() && !empty($_GET['p']) && ! $redirect_url ) {
                        if ( $redirect_url = get_permalink(get_query_var('p')) )
                                $redirect['query'] = remove_query_arg('p', $redirect['query']);
-               } elseif ( is_page() && isset($_GET['page_id']) ) {
+                       if ( get_query_var( 'page' ) ) {
+                               $redirect_url = trailingslashit( $redirect_url ) . user_trailingslashit( get_query_var( 'page' ), 'single_paged' );
+                               $redirect['query'] = remove_query_arg( 'page', $redirect['query'] );
+                       }
+               } elseif ( is_page() && !empty($_GET['page_id']) && ! $redirect_url ) {
                        if ( $redirect_url = get_permalink(get_query_var('page_id')) )
                                $redirect['query'] = remove_query_arg('page_id', $redirect['query']);
-               } elseif ( isset($_GET['m']) && ( is_year() || is_month() || is_day() ) ) {
+               } elseif ( !empty($_GET['m']) && ( is_year() || is_month() || is_day() ) ) {
                        $m = get_query_var('m');
                        switch ( strlen($m) ) {
                                case 4: // Yearly
@@ -51,49 +110,77 @@ function redirect_canonical($requested_url=NULL, $do_redirect=true) {
                        if ( $redirect_url )
                                $redirect['query'] = remove_query_arg('m', $redirect['query']);
                // now moving on to non ?m=X year/month/day links
-               } elseif ( is_day() && get_query_var('year') && get_query_var('monthnum') && isset($_GET['day']) ) {
+               } elseif ( is_day() && get_query_var('year') && get_query_var('monthnum') && !empty($_GET['day']) ) {
                        if ( $redirect_url = get_day_link(get_query_var('year'), get_query_var('monthnum'), get_query_var('day')) )
                                $redirect['query'] = remove_query_arg(array('year', 'monthnum', 'day'), $redirect['query']);
-               } elseif ( is_month() && get_query_var('year') && isset($_GET['monthnum']) ) {
+               } elseif ( is_month() && get_query_var('year') && !empty($_GET['monthnum']) ) {
                        if ( $redirect_url = get_month_link(get_query_var('year'), get_query_var('monthnum')) )
                                $redirect['query'] = remove_query_arg(array('year', 'monthnum'), $redirect['query']);
-               } elseif ( is_year() && isset($_GET['year']) ) {
+               } elseif ( is_year() && !empty($_GET['year']) ) {
                        if ( $redirect_url = get_year_link(get_query_var('year')) )
                                $redirect['query'] = remove_query_arg('year', $redirect['query']);
-               } elseif ( is_category() && isset($_GET['cat']) ) {
+               } elseif ( is_category() && !empty($_GET['cat']) && preg_match( '|^[0-9]+$|', $_GET['cat'] ) ) {
                        if ( $redirect_url = get_category_link(get_query_var('cat')) )
                                $redirect['query'] = remove_query_arg('cat', $redirect['query']);
-               } elseif ( is_author() && isset($_GET['author']) ) {
+               } elseif ( is_author() && !empty($_GET['author']) ) {
                        $author = get_userdata(get_query_var('author'));
-                       if ( false !== $author && $redirect_url = get_author_link(false, $author->ID, $author->user_nicename) )
+                       if ( false !== $author && $redirect_url = get_author_posts_url($author->ID, $author->user_nicename) )
                                $redirect['query'] = remove_query_arg('author', $redirect['author']);
                }
 
-       // paging
-               if ( $paged = get_query_var('paged') ) {
-                       if ( $paged > 0 ) {
-                               if ( !$redirect_url )
-                                       $redirect_url = $requested_url;
-                               $paged_redirect = @parse_url($redirect_url);
-                               $paged_redirect['path'] = preg_replace('|/page/[0-9]+?(/+)?$|', '/', $paged_redirect['path']); // strip off any existing paging
-                               $paged_redirect['path'] = preg_replace('|/index.php/?$|', '/', $paged_redirect['path']); // strip off trailing /index.php/
-                               if ( $paged > 1 && !is_single() ) {
-                                       $paged_redirect['path'] = trailingslashit($paged_redirect['path']);
-                                       if ( $wp_rewrite->using_index_permalinks() && strpos($paged_redirect['path'], '/index.php/') === false )
-                                               $paged_redirect['path'] .= 'index.php/';
-                                       $paged_redirect['path'] .= user_trailingslashit("page/$paged", 'paged');
-                               } elseif ( !is_home() && !is_single() ){
-                                       $paged_redirect['path'] = user_trailingslashit($paged_redirect['path'], 'paged');
+       // paging and feeds
+               if ( get_query_var('paged') || is_feed() || get_query_var('cpage') ) {
+                       if ( !$redirect_url )
+                               $redirect_url = $requested_url;
+                       $paged_redirect = @parse_url($redirect_url);
+                       while ( preg_match( '#/page/[0-9]+?(/+)?$#', $paged_redirect['path'] ) || preg_match( '#/(comments/?)?(feed|rss|rdf|atom|rss2)(/+)?$#', $paged_redirect['path'] ) || preg_match( '#/comment-page-[0-9]+(/+)?$#', $paged_redirect['path'] ) ) {
+                               // Strip off paging and feed
+                               $paged_redirect['path'] = preg_replace('#/page/[0-9]+?(/+)?$#', '/', $paged_redirect['path']); // strip off any existing paging
+                               $paged_redirect['path'] = preg_replace('#/(comments/?)?(feed|rss2?|rdf|atom)(/+|$)#', '/', $paged_redirect['path']); // strip off feed endings
+                               $paged_redirect['path'] = preg_replace('#/comment-page-[0-9]+?(/+)?$#', '/', $paged_redirect['path']); // strip off any existing comment paging
+                       }
+
+                       $addl_path = '';
+                       if ( is_feed() ) {
+                               $addl_path = !empty( $addl_path ) ? trailingslashit($addl_path) : '';
+                               if ( get_query_var( 'withcomments' ) )
+                                       $addl_path .= 'comments/';
+                               $addl_path .= user_trailingslashit( 'feed/' . ( ( 'rss2' ==  get_query_var('feed') || 'feed' == get_query_var('feed') ) ? '' : get_query_var('feed') ), 'feed' );
+                               $redirect['query'] = remove_query_arg( 'feed', $redirect['query'] );
+                       }
+
+                       if ( get_query_var('paged') > 0 ) {
+                               $paged = get_query_var('paged');
+                               $redirect['query'] = remove_query_arg( 'paged', $redirect['query'] );
+                               if ( !is_feed() ) {
+                                       if ( $paged > 1 && !is_single() ) {
+                                               $addl_path = ( !empty( $addl_path ) ? trailingslashit($addl_path) : '' ) . user_trailingslashit("page/$paged", 'paged');
+                                       } elseif ( !is_single() ) {
+                                               $addl_path = ( !empty( $addl_path ) ? trailingslashit($addl_path) : '' ) . user_trailingslashit($paged_redirect['path'], 'paged');
+                                       }
+                               } elseif ( $paged > 1 ) {
+                                       $redirect['query'] = add_query_arg( 'paged', $paged, $redirect['query'] );
                                }
-                               $redirect_url = $paged_redirect['scheme'] . '://' . $paged_redirect['host'] . $paged_redirect['path'];
-                               $redirect['path'] = $paged_redirect['path'];
                        }
-                       $redirect['query'] = remove_query_arg('paged', $redirect['query']);
+
+                       if ( get_option('page_comments') && ( ( 'newest' == get_option('default_comments_page') && get_query_var('cpage') > 0 ) || ( 'newest' != get_option('default_comments_page') && get_query_var('cpage') > 1 ) ) ) {
+                               $addl_path = ( !empty( $addl_path ) ? trailingslashit($addl_path) : '' ) . user_trailingslashit( 'comment-page-' . get_query_var('cpage'), 'commentpaged' );
+                               $redirect['query'] = remove_query_arg( 'cpage', $redirect['query'] );
+                       }
+
+                       $paged_redirect['path'] = user_trailingslashit( preg_replace('|/index.php/?$|', '/', $paged_redirect['path']) ); // strip off trailing /index.php/
+                       if ( !empty( $addl_path ) && $wp_rewrite->using_index_permalinks() && strpos($paged_redirect['path'], '/index.php/') === false )
+                               $paged_redirect['path'] = trailingslashit($paged_redirect['path']) . 'index.php/';
+                       if ( !empty( $addl_path ) )
+                               $paged_redirect['path'] = trailingslashit($paged_redirect['path']) . $addl_path;
+                       $redirect_url = $paged_redirect['scheme'] . '://' . $paged_redirect['host'] . $paged_redirect['path'];
+                       $redirect['path'] = $paged_redirect['path'];
                }
        }
 
        // tack on any additional query vars
-       if ( $redirect_url && $redirect['query'] ) {
+       $redirect['query'] = preg_replace( '#^\??&*?#', '', $redirect['query'] );
+       if ( $redirect_url && !empty($redirect['query']) ) {
                if ( strpos($redirect_url, '?') !== false )
                        $redirect_url .= '&';
                else
@@ -106,89 +193,140 @@ function redirect_canonical($requested_url=NULL, $do_redirect=true) {
 
        // www.example.com vs example.com
        $user_home = @parse_url(get_option('home'));
-       $redirect['host'] = $user_home['host'];
+       if ( !empty($user_home['host']) )
+               $redirect['host'] = $user_home['host'];
+       if ( empty($user_home['path']) )
+               $user_home['path'] = '/';
 
        // Handle ports
-       if ( isset($user_home['port']) )
+       if ( !empty($user_home['port']) )
                $redirect['port'] = $user_home['port'];
        else
                unset($redirect['port']);
 
-       // trailing /index.php/
-       $redirect['path'] = preg_replace('|/index.php/$|', '/', $redirect['path']);
+       // trailing /index.php
+       $redirect['path'] = preg_replace('|/index.php/*?$|', '/', $redirect['path']);
+
+       // Remove trailing spaces from the path
+       $redirect['path'] = preg_replace( '#(%20| )+$#', '', $redirect['path'] );
+
+       if ( !empty( $redirect['query'] ) ) {
+               // Remove trailing spaces from certain terminating query string args
+               $redirect['query'] = preg_replace( '#((p|page_id|cat|tag)=[^&]*?)(%20| )+$#', '$1', $redirect['query'] );
+
+               // Clean up empty query strings
+               $redirect['query'] = trim(preg_replace( '#(^|&)(p|page_id|cat|tag)=?(&|$)#', '&', $redirect['query']), '&');
+
+               // Remove redundant leading ampersands
+               $redirect['query'] = preg_replace( '#^\??&*?#', '', $redirect['query'] );
+       }
 
        // strip /index.php/ when we're not using PATHINFO permalinks
        if ( !$wp_rewrite->using_index_permalinks() )
                $redirect['path'] = str_replace('/index.php/', '/', $redirect['path']);
 
        // trailing slashes
-       if ( is_object($wp_rewrite) && $wp_rewrite->using_permalinks() && !is_404() && (!is_home() || ( is_home() && (get_query_var('paged') > 1) ) ) ) {
+       if ( is_object($wp_rewrite) && $wp_rewrite->using_permalinks() && !is_404() && (!is_front_page() || ( is_front_page() && (get_query_var('paged') > 1) ) ) ) {
                $user_ts_type = '';
                if ( get_query_var('paged') > 0 ) {
                        $user_ts_type = 'paged';
                } else {
-                       foreach ( array('single', 'category', 'page', 'day', 'month', 'year') as $type ) {
+                       foreach ( array('single', 'category', 'page', 'day', 'month', 'year', 'home') as $type ) {
                                $func = 'is_' . $type;
-                               if ( call_user_func($func) )
+                               if ( call_user_func($func) ) {
                                        $user_ts_type = $type;
                                        break;
                                }
                        }
+               }
                $redirect['path'] = user_trailingslashit($redirect['path'], $user_ts_type);
-       } elseif ( is_home() ) {
+       } elseif ( is_front_page() ) {
                $redirect['path'] = trailingslashit($redirect['path']);
        }
 
-       // Always trailing slash the 'home' URL
-       if ( $redirect['path'] == $user_home['path'] )
+       // Always trailing slash the Front Page URL
+       if ( trailingslashit( $redirect['path'] ) == trailingslashit( $user_home['path'] ) )
                $redirect['path'] = trailingslashit($redirect['path']);
 
        // Ignore differences in host capitalization, as this can lead to infinite redirects
-       if ( strtolower($original['host']) == strtolower($redirect['host']) )
+       // Only redirect no-www <=> yes-www
+       if ( strtolower($original['host']) == strtolower($redirect['host']) ||
+               ( strtolower($original['host']) != 'www.' . strtolower($redirect['host']) && 'www.' . strtolower($original['host']) != strtolower($redirect['host']) ) )
                $redirect['host'] = $original['host'];
 
-       if ( array($original['host'], $original['port'], $original['path'], $original['query']) !== array($redirect['host'], $redirect['port'], $redirect['path'], $redirect['query']) ) {
+       $compare_original = array($original['host'], $original['path']);
+
+       if ( !empty( $original['port'] ) )
+               $compare_original[] = $original['port'];
+
+       if ( !empty( $original['query'] ) )
+               $compare_original[] = $original['query'];
+
+       $compare_redirect = array($redirect['host'], $redirect['path']);
+
+       if ( !empty( $redirect['port'] ) )
+               $compare_redirect[] = $redirect['port'];
+
+       if ( !empty( $redirect['query'] ) )
+               $compare_redirect[] = $redirect['query'];
+
+       if ( $compare_original !== $compare_redirect ) {
                $redirect_url = $redirect['scheme'] . '://' . $redirect['host'];
-               if ( isset($redirect['port']) )
-                       $redirect_url .= ':' . $redirect['port'];
+               if ( !empty($redirect['port']) )
+                       $redirect_url .= ':' . $redirect['port'];
                $redirect_url .= $redirect['path'];
-               if ( $redirect['query'] )
+               if ( !empty($redirect['query']) )
                        $redirect_url .= '?' . $redirect['query'];
        }
 
-       if ( $redirect_url && $redirect_url != $requested_url ) {
-               // var_dump($redirect_url); die();
-               $redirect_url = apply_filters('redirect_canonical', $redirect_url, $requested_url);
-               if ( $do_redirect) {
-                       // protect against chained redirects
-                       if ( !redirect_canonical($redirect_url, false) ) {
-                               wp_redirect($redirect_url, 301);
-                               exit();
-                       } else {
-                               return false;
-                       }
+       if ( $redirect_url == $requested_url )
+               return false;
+
+       // Note that you can use the "redirect_canonical" filter to cancel a canonical redirect for whatever reason by returning FALSE
+       $redirect_url = apply_filters('redirect_canonical', $redirect_url, $requested_url);
+
+       if ( !$redirect_url || $redirect_url == $requested_url ) // yes, again -- in case the filter aborted the request
+               return false;
+
+       if ( $do_redirect ) {
+               // protect against chained redirects
+               if ( !redirect_canonical($redirect_url, false) ) {
+                       wp_redirect($redirect_url, 301);
+                       exit();
                } else {
-                       return $redirect_url;
+                       // Debug
+                       // die("1: $redirect_url<br />2: " . redirect_canonical( $redirect_url, false ) );
+                       return false;
                }
        } else {
-               return false;
+               return $redirect_url;
        }
 }
 
+/**
+ * Attempts to guess correct post based on query vars.
+ *
+ * @since 2.3.0
+ * @uses $wpdb
+ *
+ * @return bool|string Returns False, if it can't find post, returns correct
+ *             location on success.
+ */
 function redirect_guess_404_permalink() {
-       global $wp_query, $wpdb;
+       global $wpdb;
+
        if ( !get_query_var('name') )
                return false;
 
-       $where = "post_name LIKE '" . $wpdb->escape(get_query_var('name')) . "%'";
+       $where = $wpdb->prepare("post_name LIKE %s", get_query_var('name') . '%');
 
        // if any of year, monthnum, or day are set, use them to refine the query
        if ( get_query_var('year') )
-               $where .= " AND YEAR(post_date) = '" . $wpdb->escape(get_query_var('year')) . "'";
+               $where .= $wpdb->prepare(" AND YEAR(post_date) = %d", get_query_var('year'));
        if ( get_query_var('monthnum') )
-               $where .= " AND MONTH(post_date) = '" . $wpdb->escape(get_query_var('monthnum')) . "'";
+               $where .= $wpdb->prepare(" AND MONTH(post_date) = %d", get_query_var('monthnum'));
        if ( get_query_var('day') )
-               $where .= " AND DAYOFMONTH(post_date) = '" . $wpdb->escape(get_query_var('day')) . "'";
+               $where .= $wpdb->prepare(" AND DAYOFMONTH(post_date) = %d", get_query_var('day'));
 
        $post_id = $wpdb->get_var("SELECT ID FROM $wpdb->posts WHERE $where AND post_status = 'publish'");
        if ( !$post_id )
@@ -198,4 +336,4 @@ function redirect_guess_404_permalink() {
 
 add_action('template_redirect', 'redirect_canonical');
 
-?>
\ No newline at end of file
+?>