12 * Will process the WordPress eXtended RSS files that you upload from the export
19 var $post_ids_processed = array ();
20 var $orphans = array ();
23 var $mtnames = array ();
24 var $newauthornames = array ();
25 var $allauthornames = array ();
27 var $author_ids = array ();
29 var $categories = array ();
32 var $fetch_attachments = false;
33 var $url_remap = array ();
36 echo '<div class="wrap">';
38 echo '<h2>'.__('Import WordPress').'</h2>';
45 function unhtmlentities($string) { // From php.net for < 4.3 compat
46 $trans_tbl = get_html_translation_table(HTML_ENTITIES);
47 $trans_tbl = array_flip($trans_tbl);
48 return strtr($string, $trans_tbl);
52 echo '<div class="narrow">';
53 echo '<p>'.__('Howdy! Upload your WordPress eXtended RSS (WXR) file and we’ll import the posts, pages, comments, custom fields, categories, and tags into this blog.').'</p>';
54 echo '<p>'.__('Choose a WordPress WXR file to upload, then click Upload file and import.').'</p>';
55 wp_import_upload_form("admin.php?import=wordpress&step=1");
59 function get_tag( $string, $tag ) {
61 preg_match("|<$tag.*?>(.*?)</$tag>|is", $string, $return);
62 $return = preg_replace('|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1]);
63 $return = $wpdb->escape( trim( $return ) );
68 return is_callable('gzopen');
71 function fopen($filename, $mode='r') {
72 if ( $this->has_gzip() )
73 return gzopen($filename, $mode);
74 return fopen($filename, $mode);
78 if ( $this->has_gzip() )
83 function fgets($fp, $len=8192) {
84 if ( $this->has_gzip() )
85 return gzgets($fp, $len);
86 return fgets($fp, $len);
89 function fclose($fp) {
90 if ( $this->has_gzip() )
95 function get_entries($process_post_func=NULL) {
96 set_magic_quotes_runtime(0);
101 $fp = $this->fopen($this->file, 'r');
103 while ( !$this->feof($fp) ) {
104 $importline = rtrim($this->fgets($fp));
106 // this doesn't check that the file is perfectly valid but will at least confirm that it's not the wrong format altogether
107 if ( !$is_wxr_file && preg_match('|xmlns:wp="http://wordpress[.]org/export/\d+[.]\d+/"|', $importline) )
110 if ( false !== strpos($importline, '<wp:base_site_url>') ) {
111 preg_match('|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url);
112 $this->base_url = $url[1];
115 if ( false !== strpos($importline, '<wp:category>') ) {
116 preg_match('|<wp:category>(.*?)</wp:category>|is', $importline, $category);
117 $this->categories[] = $category[1];
120 if ( false !== strpos($importline, '<wp:tag>') ) {
121 preg_match('|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag);
122 $this->tags[] = $tag[1];
125 if ( false !== strpos($importline, '<item>') ) {
130 if ( false !== strpos($importline, '</item>') ) {
131 $doing_entry = false;
132 if ($process_post_func)
133 call_user_func($process_post_func, $this->post);
136 if ( $doing_entry ) {
137 $this->post .= $importline . "\n";
148 function get_wp_authors() {
149 // We need to find unique values of author names, while preserving the order, so this function emulates the unique_value(); php function, without the sorting.
150 $temp = $this->allauthornames;
151 $authors[0] = array_shift($temp);
152 $y = count($temp) + 1;
153 for ($x = 1; $x < $y; $x ++) {
154 $next = array_shift($temp);
155 if (!(in_array($next, $authors)))
156 array_push($authors, "$next");
162 function get_authors_from_post() {
163 global $current_user;
165 // this will populate $this->author_ids with a list of author_names => user_ids
167 foreach ( $_POST['author_in'] as $i => $in_author_name ) {
169 if ( !empty($_POST['user_select'][$i]) ) {
170 // an existing user was selected in the dropdown list
171 $user = get_userdata( intval($_POST['user_select'][$i]) );
172 if ( isset($user->ID) )
173 $this->author_ids[$in_author_name] = $user->ID;
175 elseif ( $this->allow_create_users() ) {
176 // nothing was selected in the dropdown list, so we'll use the name in the text field
178 $new_author_name = trim($_POST['user_create'][$i]);
179 // if the user didn't enter a name, assume they want to use the same name as in the import file
180 if ( empty($new_author_name) )
181 $new_author_name = $in_author_name;
183 $user_id = username_exists($new_author_name);
185 $user_id = wp_create_user($new_author_name, wp_generate_password());
188 $this->author_ids[$in_author_name] = $user_id;
191 // failsafe: if the user_id was invalid, default to the current user
192 if ( empty($this->author_ids[$in_author_name]) ) {
193 $this->author_ids[$in_author_name] = intval($current_user->ID);
199 function wp_authors_form() {
201 <?php screen_icon(); ?>
202 <h2><?php _e('Assign Authors'); ?></h2>
203 <p><?php _e('To make it easier for you to edit and save the imported posts and drafts, you may want to change the name of the author of the posts. For example, you may want to import all the entries as <code>admin</code>s entries.'); ?></p>
205 if ( $this->allow_create_users() ) {
206 echo '<p>'.__('If a new user is created by WordPress, a password will be randomly generated. Manually change the user\'s details if necessary.')."</p>\n";
210 $authors = $this->get_wp_authors();
211 echo '<form action="?import=wordpress&step=2&id=' . $this->id . '" method="post">';
212 wp_nonce_field('import-wordpress');
213 echo '<ol id="authors">';
215 foreach ($authors as $author) {
217 echo '<li>'.__('Import author:').' <strong>'.$author.'</strong><br />';
218 $this->users_form($j, $author);
222 if ( $this->allow_fetch_attachments() ) {
225 <?php screen_icon(); ?>
226 <h2><?php _e('Import Attachments'); ?></h2>
228 <input type="checkbox" value="1" name="attachments" id="import-attachments" />
229 <label for="import-attachments"><?php _e('Download and import file attachments') ?></label>
235 echo '<p class="submit">';
236 echo '<input type="submit" class="button" value="'.attribute_escape( __('Submit') ).'" />'.'<br />';
242 function users_form($n, $author) {
244 if ( $this->allow_create_users() ) {
245 printf('<label>'.__('Create user %1$s or map to existing'), ' <input type="text" value="'.$author.'" name="'.'user_create['.intval($n).']'.'" maxlength="30" /></label> <br />');
248 echo __('Map to existing').'<br />';
251 // keep track of $n => $author name
252 echo '<input type="hidden" name="author_in['.intval($n).']" value="'.htmlspecialchars($author).'" />';
254 $users = get_users_of_blog();
255 ?><select name="user_select[<?php echo $n; ?>]">
256 <option value="0"><?php _e('- Select -'); ?></option>
258 foreach ($users as $user) {
259 echo '<option value="'.$user->user_id.'">'.$user->user_login.'</option>';
266 function select_authors() {
267 $is_wxr_file = $this->get_entries(array(&$this, 'process_author'));
268 if ( $is_wxr_file ) {
269 $this->wp_authors_form();
272 echo '<h2>'.__('Invalid file').'</h2>';
273 echo '<p>'.__('Please upload a valid WXR (WordPress eXtended RSS) export file.').'</p>';
277 // fetch the user ID for a given author name, respecting the mapping preferences
278 function checkauthor($author) {
279 global $current_user;
281 if ( !empty($this->author_ids[$author]) )
282 return $this->author_ids[$author];
284 // failsafe: map to the current user
285 return $current_user->ID;
290 function process_categories() {
293 $cat_names = (array) get_terms('category', 'fields=names');
295 while ( $c = array_shift($this->categories) ) {
296 $cat_name = trim($this->get_tag( $c, 'wp:cat_name' ));
298 // If the category exists we leave it alone
299 if ( in_array($cat_name, $cat_names) )
302 $category_nicename = $this->get_tag( $c, 'wp:category_nicename' );
303 $posts_private = (int) $this->get_tag( $c, 'wp:posts_private' );
304 $links_private = (int) $this->get_tag( $c, 'wp:links_private' );
306 $parent = $this->get_tag( $c, 'wp:category_parent' );
308 if ( empty($parent) )
309 $category_parent = '0';
311 $category_parent = category_exists($parent);
313 $catarr = compact('category_nicename', 'category_parent', 'posts_private', 'links_private', 'posts_private', 'cat_name');
315 $cat_ID = wp_insert_category($catarr);
319 function process_tags() {
322 $tag_names = (array) get_terms('post_tag', 'fields=names');
324 while ( $c = array_shift($this->tags) ) {
325 $tag_name = trim($this->get_tag( $c, 'wp:tag_name' ));
327 // If the category exists we leave it alone
328 if ( in_array($tag_name, $tag_names) )
331 $slug = $this->get_tag( $c, 'wp:tag_slug' );
332 $description = $this->get_tag( $c, 'wp:tag_description' );
334 $tagarr = compact('slug', 'description');
336 $tag_ID = wp_insert_term($tag_name, 'post_tag', $tagarr);
340 function process_author($post) {
341 $author = $this->get_tag( $post, 'dc:creator' );
343 $this->allauthornames[] = $author;
346 function process_posts() {
349 $this->get_entries(array(&$this, 'process_post'));
353 wp_import_cleanup($this->id);
354 do_action('import_done', 'wordpress');
356 echo '<h3>'.sprintf(__('All done.').' <a href="%s">'.__('Have fun!').'</a>', get_option('home')).'</h3>';
359 function process_post($post) {
362 $post_ID = (int) $this->get_tag( $post, 'wp:post_id' );
363 if ( $post_ID && !empty($this->post_ids_processed[$post_ID]) ) // Processed already
366 set_time_limit( 60 );
368 // There are only ever one of these
369 $post_title = $this->get_tag( $post, 'title' );
370 $post_date = $this->get_tag( $post, 'wp:post_date' );
371 $post_date_gmt = $this->get_tag( $post, 'wp:post_date_gmt' );
372 $comment_status = $this->get_tag( $post, 'wp:comment_status' );
373 $ping_status = $this->get_tag( $post, 'wp:ping_status' );
374 $post_status = $this->get_tag( $post, 'wp:status' );
375 $post_name = $this->get_tag( $post, 'wp:post_name' );
376 $post_parent = $this->get_tag( $post, 'wp:post_parent' );
377 $menu_order = $this->get_tag( $post, 'wp:menu_order' );
378 $post_type = $this->get_tag( $post, 'wp:post_type' );
379 $post_password = $this->get_tag( $post, 'wp:post_password' );
380 $guid = $this->get_tag( $post, 'guid' );
381 $post_author = $this->get_tag( $post, 'dc:creator' );
383 $post_excerpt = $this->get_tag( $post, 'excerpt:encoded' );
384 $post_excerpt = preg_replace_callback('|<(/?[A-Z]+)|', create_function('$match', 'return "<" . strtolower($match[1]);'), $post_excerpt);
385 $post_excerpt = str_replace('<br>', '<br />', $post_excerpt);
386 $post_excerpt = str_replace('<hr>', '<hr />', $post_excerpt);
388 $post_content = $this->get_tag( $post, 'content:encoded' );
389 $post_content = preg_replace_callback('|<(/?[A-Z]+)|', create_function('$match', 'return "<" . strtolower($match[1]);'), $post_content);
390 $post_content = str_replace('<br>', '<br />', $post_content);
391 $post_content = str_replace('<hr>', '<hr />', $post_content);
393 preg_match_all('|<category domain="tag">(.*?)</category>|is', $post, $tags);
397 foreach ($tags as $tag) {
398 $tags[$tag_index] = $wpdb->escape($this->unhtmlentities(str_replace(array ('<![CDATA[', ']]>'), '', $tag)));
402 preg_match_all('|<category>(.*?)</category>|is', $post, $categories);
403 $categories = $categories[1];
406 foreach ($categories as $category) {
407 $categories[$cat_index] = $wpdb->escape($this->unhtmlentities(str_replace(array ('<![CDATA[', ']]>'), '', $category)));
411 $post_exists = post_exists($post_title, '', $post_date);
413 if ( $post_exists ) {
415 printf(__('Post <em>%s</em> already exists.'), stripslashes($post_title));
416 $comment_post_ID = $post_id = $post_exists;
419 // If it has parent, process parent first.
420 $post_parent = (int) $post_parent;
422 // if we already know the parent, map it to the local ID
423 if ( $parent = $this->post_ids_processed[$post_parent] ) {
424 $post_parent = $parent; // new ID of the parent
427 // record the parent for later
428 $this->orphans[intval($post_ID)] = $post_parent;
434 $post_author = $this->checkauthor($post_author); //just so that if a post already exists, new users are not created by checkauthor
436 $postdata = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt', 'post_title', 'post_status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', 'menu_order', 'post_type', 'post_password');
437 $postdata['import_id'] = $post_ID;
438 if ($post_type == 'attachment') {
439 $remote_url = $this->get_tag( $post, 'wp:attachment_url' );
443 $comment_post_ID = $post_id = $this->process_attachment($postdata, $remote_url);
444 if ( !$post_id or is_wp_error($post_id) )
448 printf(__('Importing post <em>%s</em>...'), stripslashes($post_title));
449 $comment_post_ID = $post_id = wp_insert_post($postdata);
452 if ( is_wp_error( $post_id ) )
455 // Memorize old and new ID.
456 if ( $post_id && $post_ID ) {
457 $this->post_ids_processed[intval($post_ID)] = intval($post_id);
461 if (count($categories) > 0) {
462 $post_cats = array();
463 foreach ($categories as $category) {
464 if ( '' == $category )
466 $slug = sanitize_term_field('slug', $category, 0, 'category', 'db');
467 $cat = get_term_by('slug', $slug, 'category');
470 $cat_ID = $cat->term_id;
472 $category = $wpdb->escape($category);
473 $cat_ID = wp_insert_category(array('cat_name' => $category));
474 if ( is_wp_error($cat_ID) )
477 $post_cats[] = $cat_ID;
479 wp_set_post_categories($post_id, $post_cats);
483 if (count($tags) > 0) {
484 $post_tags = array();
485 foreach ($tags as $tag) {
488 $slug = sanitize_term_field('slug', $tag, 0, 'post_tag', 'db');
489 $tag_obj = get_term_by('slug', $slug, 'post_tag');
491 if ( ! empty($tag_obj) )
492 $tag_id = $tag_obj->term_id;
493 if ( $tag_id == 0 ) {
494 $tag = $wpdb->escape($tag);
495 $tag_id = wp_insert_term($tag, 'post_tag');
496 if ( is_wp_error($tag_id) )
498 $tag_id = $tag_id['term_id'];
500 $post_tags[] = intval($tag_id);
502 wp_set_post_tags($post_id, $post_tags);
507 preg_match_all('|<wp:comment>(.*?)</wp:comment>|is', $post, $comments);
508 $comments = $comments[1];
510 if ( $comments) { foreach ($comments as $comment) {
511 $comment_author = $this->get_tag( $comment, 'wp:comment_author');
512 $comment_author_email = $this->get_tag( $comment, 'wp:comment_author_email');
513 $comment_author_IP = $this->get_tag( $comment, 'wp:comment_author_IP');
514 $comment_author_url = $this->get_tag( $comment, 'wp:comment_author_url');
515 $comment_date = $this->get_tag( $comment, 'wp:comment_date');
516 $comment_date_gmt = $this->get_tag( $comment, 'wp:comment_date_gmt');
517 $comment_content = $this->get_tag( $comment, 'wp:comment_content');
518 $comment_approved = $this->get_tag( $comment, 'wp:comment_approved');
519 $comment_type = $this->get_tag( $comment, 'wp:comment_type');
520 $comment_parent = $this->get_tag( $comment, 'wp:comment_parent');
522 // if this is a new post we can skip the comment_exists() check
523 if ( !$post_exists || !comment_exists($comment_author, $comment_date) ) {
524 $commentdata = compact('comment_post_ID', 'comment_author', 'comment_author_url', 'comment_author_email', 'comment_author_IP', 'comment_date', 'comment_date_gmt', 'comment_content', 'comment_approved', 'comment_type', 'comment_parent');
525 wp_insert_comment($commentdata);
531 printf(' '.__ngettext('(%s comment)', '(%s comments)', $num_comments), $num_comments);
534 preg_match_all('|<wp:postmeta>(.*?)</wp:postmeta>|is', $post, $postmeta);
535 $postmeta = $postmeta[1];
536 if ( $postmeta) { foreach ($postmeta as $p) {
537 $key = $this->get_tag( $p, 'wp:meta_key' );
538 $value = $this->get_tag( $p, 'wp:meta_value' );
539 $value = stripslashes($value); // add_post_meta() will escape.
541 $this->process_post_meta($post_id, $key, $value);
545 do_action('import_post_added', $post_id);
549 function process_post_meta($post_id, $key, $value) {
550 // the filter can return false to skip a particular metadata key
551 $_key = apply_filters('import_post_meta_key', $key);
553 add_post_meta( $post_id, $_key, $value );
554 do_action('import_post_meta', $post_id, $_key, $value);
558 function process_attachment($postdata, $remote_url) {
559 if ($this->fetch_attachments and $remote_url) {
560 printf( __('Importing attachment <em>%s</em>... '), htmlspecialchars($remote_url) );
562 // If the URL is absolute, but does not contain http, upload it assuming the base_site_url variable
563 if ( preg_match('/^\/[\w\W]+$/', $remote_url) )
564 $remote_url = rtrim($this->base_url,'/').$remote_url;
566 $upload = $this->fetch_remote_file($postdata, $remote_url);
567 if ( is_wp_error($upload) ) {
568 printf( __('Remote file error: %s'), htmlspecialchars($upload->get_error_message()) );
572 print '('.size_format(filesize($upload['file'])).')';
575 if ( $info = wp_check_filetype($upload['file']) ) {
576 $postdata['post_mime_type'] = $info['type'];
579 print __('Invalid file type');
583 $postdata['guid'] = $upload['url'];
585 // as per wp-admin/includes/upload.php
586 $post_id = wp_insert_attachment($postdata, $upload['file']);
587 wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) );
589 // remap the thumbnail url. this isn't perfect because we're just guessing the original url.
590 if ( preg_match('@^image/@', $info['type']) && $thumb_url = wp_get_attachment_thumb_url($post_id) ) {
591 $parts = pathinfo($remote_url);
592 $ext = $parts['extension'];
593 $name = basename($parts['basename'], ".{$ext}");
594 $this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url;
600 printf( __('Skipping attachment <em>%s</em>'), htmlspecialchars($remote_url) );
604 function fetch_remote_file($post, $url) {
605 $upload = wp_upload_dir($post['post_date']);
607 // extract the file name and extension from the url
608 $file_name = basename($url);
610 // get placeholder file in the upload dir with a unique sanitized filename
611 $upload = wp_upload_bits( $file_name, 0, '', $post['post_date']);
612 if ( $upload['error'] ) {
613 echo $upload['error'];
614 return new WP_Error( 'upload_dir_error', $upload['error'] );
617 // fetch the remote url and write it to the placeholder file
618 $headers = wp_get_http($url, $upload['file']);
622 @unlink($upload['file']);
623 return new WP_Error( 'import_file_error', __('Remote server did not respond') );
626 // make sure the fetch was successful
627 if ( $headers['response'] != '200' ) {
628 @unlink($upload['file']);
629 return new WP_Error( 'import_file_error', sprintf(__('Remote file returned error response %1$d %2$s'), $headers['response'], get_status_header_desc($headers['response']) ) );
631 elseif ( isset($headers['content-length']) && filesize($upload['file']) != $headers['content-length'] ) {
632 @unlink($upload['file']);
633 return new WP_Error( 'import_file_error', __('Remote file is incorrect size') );
636 $max_size = $this->max_attachment_size();
637 if ( !empty($max_size) and filesize($upload['file']) > $max_size ) {
638 @unlink($upload['file']);
639 return new WP_Error( 'import_file_error', sprintf(__('Remote file is too large, limit is %s', size_format($max_size))) );
642 // keep track of the old and new urls so we can substitute them later
643 $this->url_remap[$url] = $upload['url'];
644 // if the remote url is redirected somewhere else, keep track of the destination too
645 if ( $headers['x-final-location'] != $url )
646 $this->url_remap[$headers['x-final-location']] = $upload['url'];
652 // sort by strlen, longest string first
653 function cmpr_strlen($a, $b) {
654 return strlen($b) - strlen($a);
657 // update url references in post bodies to point to the new local files
658 function backfill_attachment_urls() {
660 // make sure we do the longest urls first, in case one is a substring of another
661 uksort($this->url_remap, array(&$this, 'cmpr_strlen'));
664 foreach ($this->url_remap as $from_url => $to_url) {
665 // remap urls in post_content
666 $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, '%s', '%s')", $from_url, $to_url) );
667 // remap enclosure urls
668 $result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, '%s', '%s') WHERE meta_key='enclosure'", $from_url, $to_url) );
672 // update the post_parent of orphans now that we know the local id's of all parents
673 function backfill_parents() {
676 foreach ($this->orphans as $child_id => $parent_id) {
677 $local_child_id = $this->post_ids_processed[$child_id];
678 $local_parent_id = $this->post_ids_processed[$parent_id];
679 if ($local_child_id and $local_parent_id) {
680 $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_parent = %d WHERE ID = %d", $local_parent_id, $local_child_id));
685 function is_valid_meta_key($key) {
686 // skip _wp_attached_file metadata since we'll regenerate it from scratch
687 if ( $key == '_wp_attached_file' )
692 // give the user the option of creating new users to represent authors in the import file?
693 function allow_create_users() {
694 return apply_filters('import_allow_create_users', true);
697 // give the user the option of downloading and importing attached files
698 function allow_fetch_attachments() {
699 return apply_filters('import_allow_fetch_attachments', true);
702 function max_attachment_size() {
703 // can be overridden with a filter - 0 means no limit
704 return apply_filters('import_attachment_size_limit', 0);
707 function import_start() {
708 wp_defer_term_counting(true);
709 wp_defer_comment_counting(true);
710 do_action('import_start');
713 function import_end() {
714 do_action('import_end');
716 // clear the caches after backfilling
717 foreach ($this->post_ids_processed as $post_id)
718 clean_post_cache($post_id);
720 wp_defer_term_counting(false);
721 wp_defer_comment_counting(false);
724 function import($id, $fetch_attachments = false) {
725 $this->id = (int) $id;
726 $this->fetch_attachments = ($this->allow_fetch_attachments() && (bool) $fetch_attachments);
728 add_filter('import_post_meta_key', array($this, 'is_valid_meta_key'));
729 $file = get_attached_file($this->id);
730 $this->import_file($file);
733 function import_file($file) {
736 $this->import_start();
737 $this->get_authors_from_post();
738 wp_suspend_cache_invalidation(true);
739 $this->get_entries();
740 $this->process_categories();
741 $this->process_tags();
742 $result = $this->process_posts();
743 wp_suspend_cache_invalidation(false);
744 $this->backfill_parents();
745 $this->backfill_attachment_urls();
748 if ( is_wp_error( $result ) )
752 function handle_upload() {
753 $file = wp_import_handle_upload();
754 if ( isset($file['error']) ) {
755 echo '<p>'.__('Sorry, there has been an error.').'</p>';
756 echo '<p><strong>' . $file['error'] . '</strong></p>';
759 $this->file = $file['file'];
760 $this->id = (int) $file['id'];
764 function dispatch() {
765 if (empty ($_GET['step']))
768 $step = (int) $_GET['step'];
776 check_admin_referer('import-upload');
777 if ( $this->handle_upload() )
778 $this->select_authors();
781 check_admin_referer('import-wordpress');
782 $result = $this->import( $_GET['id'], $_POST['attachments'] );
783 if ( is_wp_error( $result ) )
784 echo $result->get_error_message();
790 function WP_Import() {
796 * Register WordPress Importer
802 $wp_import = new WP_Import();
804 register_importer('wordpress', 'WordPress', __('Import <strong>posts, pages, comments, custom fields, categories, and tags</strong> from a WordPress export file.'), array ($wp_import, 'dispatch'));