5 var $post_ids_processed = array ();
6 var $orphans = array ();
9 var $mtnames = array ();
10 var $newauthornames = array ();
11 var $allauthornames = array ();
13 var $author_ids = array ();
15 var $categories = array ();
18 var $fetch_attachments = false;
19 var $url_remap = array ();
22 echo '<div class="wrap">';
23 echo '<h2>'.__('Import WordPress').'</h2>';
30 function unhtmlentities($string) { // From php.net for < 4.3 compat
31 $trans_tbl = get_html_translation_table(HTML_ENTITIES);
32 $trans_tbl = array_flip($trans_tbl);
33 return strtr($string, $trans_tbl);
37 echo '<div class="narrow">';
38 echo '<p>'.__('Howdy! Upload your WordPress eXtended RSS (WXR) file and we’ll import the posts, comments, custom fields, and categories into this blog.').'</p>';
39 echo '<p>'.__('Choose a WordPress WXR file to upload, then click Upload file and import.').'</p>';
40 wp_import_upload_form("admin.php?import=wordpress&step=1");
44 function get_tag( $string, $tag ) {
46 preg_match("|<$tag.*?>(.*?)</$tag>|is", $string, $return);
47 $return = preg_replace('|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1]);
48 $return = $wpdb->escape( trim( $return ) );
53 return is_callable('gzopen');
56 function fopen($filename, $mode='r') {
57 if ( $this->has_gzip() )
58 return gzopen($filename, $mode);
59 return fopen($filename, $mode);
63 if ( $this->has_gzip() )
68 function fgets($fp, $len=8192) {
69 if ( $this->has_gzip() )
70 return gzgets($fp, $len);
71 return fgets($fp, $len);
74 function fclose($fp) {
75 if ( $this->has_gzip() )
80 function get_entries($process_post_func=NULL) {
81 set_magic_quotes_runtime(0);
86 $fp = $this->fopen($this->file, 'r');
88 while ( !$this->feof($fp) ) {
89 $importline = rtrim($this->fgets($fp));
91 // this doesn't check that the file is perfectly valid but will at least confirm that it's not the wrong format altogether
92 if ( !$is_wxr_file && preg_match('|xmlns:wp="http://wordpress[.]org/export/\d+[.]\d+/"|', $importline) )
95 if ( false !== strpos($importline, '<wp:category>') ) {
96 preg_match('|<wp:category>(.*?)</wp:category>|is', $importline, $category);
97 $this->categories[] = $category[1];
100 if ( false !== strpos($importline, '<wp:tag>') ) {
101 preg_match('|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag);
102 $this->tags[] = $tag[1];
105 if ( false !== strpos($importline, '<item>') ) {
110 if ( false !== strpos($importline, '</item>') ) {
111 $doing_entry = false;
112 if ($process_post_func)
113 call_user_func($process_post_func, $this->post);
116 if ( $doing_entry ) {
117 $this->post .= $importline . "\n";
128 function get_wp_authors() {
129 // We need to find unique values of author names, while preserving the order, so this function emulates the unique_value(); php function, without the sorting.
130 $temp = $this->allauthornames;
131 $authors[0] = array_shift($temp);
132 $y = count($temp) + 1;
133 for ($x = 1; $x < $y; $x ++) {
134 $next = array_shift($temp);
135 if (!(in_array($next, $authors)))
136 array_push($authors, "$next");
142 function get_authors_from_post() {
143 global $current_user;
145 // this will populate $this->author_ids with a list of author_names => user_ids
147 foreach ( $_POST['author_in'] as $i => $in_author_name ) {
149 if ( !empty($_POST['user_select'][$i]) ) {
150 // an existing user was selected in the dropdown list
151 $user = get_userdata( intval($_POST['user_select'][$i]) );
152 if ( isset($user->ID) )
153 $this->author_ids[$in_author_name] = $user->ID;
155 elseif ( $this->allow_create_users() ) {
156 // nothing was selected in the dropdown list, so we'll use the name in the text field
158 $new_author_name = trim($_POST['user_create'][$i]);
159 // if the user didn't enter a name, assume they want to use the same name as in the import file
160 if ( empty($new_author_name) )
161 $new_author_name = $in_author_name;
163 $user_id = username_exists($new_author_name);
165 $user_id = wp_create_user($new_author_name, wp_generate_password());
168 $this->author_ids[$in_author_name] = $user_id;
171 // failsafe: if the user_id was invalid, default to the current user
172 if ( empty($this->author_ids[$in_author_name]) ) {
173 $this->author_ids[$in_author_name] = intval($current_user->ID);
179 function wp_authors_form() {
181 <h2><?php _e('Assign Authors'); ?></h2>
182 <p><?php _e('To make it easier for you to edit and save the imported posts and drafts, you may want to change the name of the author of the posts. For example, you may want to import all the entries as <code>admin</code>s entries.'); ?></p>
184 if ( $this->allow_create_users() ) {
185 echo '<p>'.__('If a new user is created by WordPress, a password will be randomly generated. Manually change the user\'s details if necessary.')."</p>\n";
189 $authors = $this->get_wp_authors();
190 echo '<ol id="authors">';
191 echo '<form action="?import=wordpress&step=2&id=' . $this->id . '" method="post">';
192 wp_nonce_field('import-wordpress');
194 foreach ($authors as $author) {
196 echo '<li>'.__('Import author:').' <strong>'.$author.'</strong><br />';
197 $this->users_form($j, $author);
201 if ( $this->allow_fetch_attachments() ) {
204 <h2><?php _e('Import Attachments'); ?></h2>
206 <input type="checkbox" value="1" name="attachments" id="import-attachments" />
207 <label for="import-attachments"><?php _e('Download and import file attachments') ?></label>
213 echo '<input type="submit" value="'.attribute_escape( __('Submit') ).'">'.'<br />';
218 function users_form($n, $author) {
220 if ( $this->allow_create_users() ) {
221 printf('<label>'.__('Create user %1$s or map to existing'), ' <input type="text" value="'.$author.'" name="'.'user_create['.intval($n).']'.'" maxlength="30"></label> <br />');
224 echo __('Map to existing').'<br />';
227 // keep track of $n => $author name
228 echo '<input type="hidden" name="author_in['.intval($n).']" value="'.htmlspecialchars($author).'" />';
230 $users = get_users_of_blog();
231 ?><select name="user_select[<?php echo $n; ?>]">
232 <option value="0"><?php _e('- Select -'); ?></option>
234 foreach ($users as $user) {
235 echo '<option value="'.$user->user_id.'">'.$user->user_login.'</option>';
242 function select_authors() {
243 $is_wxr_file = $this->get_entries(array(&$this, 'process_author'));
244 if ( $is_wxr_file ) {
245 $this->wp_authors_form();
248 echo '<h2>'.__('Invalid file').'</h2>';
249 echo '<p>'.__('Please upload a valid WXR (WordPress eXtended RSS) export file.').'</p>';
253 // fetch the user ID for a given author name, respecting the mapping preferences
254 function checkauthor($author) {
255 global $current_user;
257 if ( !empty($this->author_ids[$author]) )
258 return $this->author_ids[$author];
260 // failsafe: map to the current user
261 return $current_user->ID;
266 function process_categories() {
269 $cat_names = (array) get_terms('category', 'fields=names');
271 while ( $c = array_shift($this->categories) ) {
272 $cat_name = trim($this->get_tag( $c, 'wp:cat_name' ));
274 // If the category exists we leave it alone
275 if ( in_array($cat_name, $cat_names) )
278 $category_nicename = $this->get_tag( $c, 'wp:category_nicename' );
279 $posts_private = (int) $this->get_tag( $c, 'wp:posts_private' );
280 $links_private = (int) $this->get_tag( $c, 'wp:links_private' );
282 $parent = $this->get_tag( $c, 'wp:category_parent' );
284 if ( empty($parent) )
285 $category_parent = '0';
287 $category_parent = category_exists($parent);
289 $catarr = compact('category_nicename', 'category_parent', 'posts_private', 'links_private', 'posts_private', 'cat_name');
291 $cat_ID = wp_insert_category($catarr);
295 function process_tags() {
298 $tag_names = (array) get_terms('post_tag', 'fields=names');
300 while ( $c = array_shift($this->tags) ) {
301 $tag_name = trim($this->get_tag( $c, 'wp:tag_name' ));
303 // If the category exists we leave it alone
304 if ( in_array($tag_name, $tag_names) )
307 $slug = $this->get_tag( $c, 'wp:tag_slug' );
308 $description = $this->get_tag( $c, 'wp:tag_description' );
310 $tagarr = compact('slug', 'description');
312 $tag_ID = wp_insert_term($tag_name, 'post_tag', $tagarr);
316 function process_author($post) {
317 $author = $this->get_tag( $post, 'dc:creator' );
319 $this->allauthornames[] = $author;
322 function process_posts() {
326 $this->get_entries(array(&$this, 'process_post'));
330 wp_import_cleanup($this->id);
331 do_action('import_done', 'wordpress');
333 echo '<h3>'.sprintf(__('All done.').' <a href="%s">'.__('Have fun!').'</a>', get_option('home')).'</h3>';
336 function process_post($post) {
339 $post_ID = (int) $this->get_tag( $post, 'wp:post_id' );
340 if ( $post_ID && !empty($this->post_ids_processed[$post_ID]) ) // Processed already
343 set_time_limit( 60 );
345 // There are only ever one of these
346 $post_title = $this->get_tag( $post, 'title' );
347 $post_date = $this->get_tag( $post, 'wp:post_date' );
348 $post_date_gmt = $this->get_tag( $post, 'wp:post_date_gmt' );
349 $comment_status = $this->get_tag( $post, 'wp:comment_status' );
350 $ping_status = $this->get_tag( $post, 'wp:ping_status' );
351 $post_status = $this->get_tag( $post, 'wp:status' );
352 $post_name = $this->get_tag( $post, 'wp:post_name' );
353 $post_parent = $this->get_tag( $post, 'wp:post_parent' );
354 $menu_order = $this->get_tag( $post, 'wp:menu_order' );
355 $post_type = $this->get_tag( $post, 'wp:post_type' );
356 $post_password = $this->get_tag( $post, 'wp:post_password' );
357 $guid = $this->get_tag( $post, 'guid' );
358 $post_author = $this->get_tag( $post, 'dc:creator' );
360 $post_excerpt = $this->get_tag( $post, 'excerpt:encoded' );
361 $post_excerpt = preg_replace('|<(/?[A-Z]+)|e', "'<' . strtolower('$1')", $post_excerpt);
362 $post_excerpt = str_replace('<br>', '<br />', $post_excerpt);
363 $post_excerpt = str_replace('<hr>', '<hr />', $post_excerpt);
365 $post_content = $this->get_tag( $post, 'content:encoded' );
366 $post_content = preg_replace('|<(/?[A-Z]+)|e', "'<' . strtolower('$1')", $post_content);
367 $post_content = str_replace('<br>', '<br />', $post_content);
368 $post_content = str_replace('<hr>', '<hr />', $post_content);
370 preg_match_all('|<category domain="tag">(.*?)</category>|is', $post, $tags);
374 foreach ($tags as $tag) {
375 $tags[$tag_index] = $wpdb->escape($this->unhtmlentities(str_replace(array ('<![CDATA[', ']]>'), '', $tag)));
379 preg_match_all('|<category>(.*?)</category>|is', $post, $categories);
380 $categories = $categories[1];
383 foreach ($categories as $category) {
384 $categories[$cat_index] = $wpdb->escape($this->unhtmlentities(str_replace(array ('<![CDATA[', ']]>'), '', $category)));
388 $post_exists = post_exists($post_title, '', $post_date);
390 if ( $post_exists ) {
392 printf(__('Post <em>%s</em> already exists.'), stripslashes($post_title));
395 // If it has parent, process parent first.
396 $post_parent = (int) $post_parent;
398 // if we already know the parent, map it to the local ID
399 if ( $parent = $this->post_ids_processed[$post_parent] ) {
400 $post_parent = $parent; // new ID of the parent
403 // record the parent for later
404 $this->orphans[intval($post_ID)] = $post_parent;
410 $post_author = $this->checkauthor($post_author); //just so that if a post already exists, new users are not created by checkauthor
412 $postdata = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt', 'post_title', 'post_status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', 'menu_order', 'post_type', 'post_password');
413 if ($post_type == 'attachment') {
414 $remote_url = $this->get_tag( $post, 'wp:attachment_url' );
418 $comment_post_ID = $post_id = $this->process_attachment($postdata, $remote_url);
419 if ( !$post_id or is_wp_error($post_id) )
423 printf(__('Importing post <em>%s</em>...'), stripslashes($post_title));
424 $comment_post_ID = $post_id = wp_insert_post($postdata);
427 if ( is_wp_error( $post_id ) )
430 // Memorize old and new ID.
431 if ( $post_id && $post_ID ) {
432 $this->post_ids_processed[intval($post_ID)] = intval($post_id);
436 if (count($categories) > 0) {
437 $post_cats = array();
438 foreach ($categories as $category) {
439 $slug = sanitize_term_field('slug', $category, 0, 'category', 'db');
440 $cat = get_term_by('slug', $slug, 'category');
443 $cat_ID = $cat->term_id;
445 $category = $wpdb->escape($category);
446 $cat_ID = wp_insert_category(array('cat_name' => $category));
448 $post_cats[] = $cat_ID;
450 wp_set_post_categories($post_id, $post_cats);
454 if (count($tags) > 0) {
455 $post_tags = array();
456 foreach ($tags as $tag) {
457 $slug = sanitize_term_field('slug', $tag, 0, 'post_tag', 'db');
458 $tag_obj = get_term_by('slug', $slug, 'post_tag');
460 if ( ! empty($tag_obj) )
461 $tag_id = $tag_obj->term_id;
462 if ( $tag_id == 0 ) {
463 $tag = $wpdb->escape($tag);
464 $tag_id = wp_insert_term($tag, 'post_tag');
465 $tag_id = $tag_id['term_id'];
467 $post_tags[] = intval($tag_id);
469 wp_set_post_tags($post_id, $post_tags);
474 preg_match_all('|<wp:comment>(.*?)</wp:comment>|is', $post, $comments);
475 $comments = $comments[1];
477 if ( $comments) { foreach ($comments as $comment) {
478 $comment_author = $this->get_tag( $comment, 'wp:comment_author');
479 $comment_author_email = $this->get_tag( $comment, 'wp:comment_author_email');
480 $comment_author_IP = $this->get_tag( $comment, 'wp:comment_author_IP');
481 $comment_author_url = $this->get_tag( $comment, 'wp:comment_author_url');
482 $comment_date = $this->get_tag( $comment, 'wp:comment_date');
483 $comment_date_gmt = $this->get_tag( $comment, 'wp:comment_date_gmt');
484 $comment_content = $this->get_tag( $comment, 'wp:comment_content');
485 $comment_approved = $this->get_tag( $comment, 'wp:comment_approved');
486 $comment_type = $this->get_tag( $comment, 'wp:comment_type');
487 $comment_parent = $this->get_tag( $comment, 'wp:comment_parent');
489 // if this is a new post we can skip the comment_exists() check
490 if ( !$post_exists || !comment_exists($comment_author, $comment_date) ) {
491 $commentdata = compact('comment_post_ID', 'comment_author', 'comment_author_url', 'comment_author_email', 'comment_author_IP', 'comment_date', 'comment_date_gmt', 'comment_content', 'comment_approved', 'comment_type', 'comment_parent');
492 wp_insert_comment($commentdata);
498 printf(' '.__ngettext('(%s comment)', '(%s comments)', $num_comments), $num_comments);
501 preg_match_all('|<wp:postmeta>(.*?)</wp:postmeta>|is', $post, $postmeta);
502 $postmeta = $postmeta[1];
503 if ( $postmeta) { foreach ($postmeta as $p) {
504 $key = $this->get_tag( $p, 'wp:meta_key' );
505 $value = $this->get_tag( $p, 'wp:meta_value' );
506 $value = stripslashes($value); // add_post_meta() will escape.
508 $this->process_post_meta($post_id, $key, $value);
512 do_action('import_post_added', $post_id);
516 function process_post_meta($post_id, $key, $value) {
517 // the filter can return false to skip a particular metadata key
518 $_key = apply_filters('import_post_meta_key', $key);
520 add_post_meta( $post_id, $_key, $value );
521 do_action('import_post_meta', $post_id, $_key, $value);
525 function process_attachment($postdata, $remote_url) {
526 if ($this->fetch_attachments and $remote_url) {
527 printf( __('Importing attachment <em>%s</em>... '), htmlspecialchars($remote_url) );
528 $upload = $this->fetch_remote_file($postdata, $remote_url);
529 if ( is_wp_error($upload) ) {
530 printf( __('Remote file error: %s'), htmlspecialchars($upload->get_error_message()) );
534 print '('.size_format(filesize($upload['file'])).')';
537 if ( $info = wp_check_filetype($upload['file']) ) {
538 $postdata['post_mime_type'] = $info['type'];
541 print __('Invalid file type');
545 $postdata['guid'] = $upload['url'];
547 // as per wp-admin/includes/upload.php
548 $post_id = wp_insert_attachment($postdata, $upload['file']);
549 wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) );
551 // remap the thumbnail url. this isn't perfect because we're just guessing the original url.
552 if ( preg_match('@^image/@', $info['type']) && $thumb_url = wp_get_attachment_thumb_url($post_id) ) {
553 $parts = pathinfo($remote_url);
554 $ext = $parts['extension'];
555 $name = basename($parts['basename'], ".{$ext}");
556 $this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url;
562 printf( __('Skipping attachment <em>%s</em>'), htmlspecialchars($remote_url) );
566 function fetch_remote_file($post, $url) {
567 $upload = wp_upload_dir($post['post_date']);
569 // extract the file name and extension from the url
570 $file_name = basename($url);
572 // get placeholder file in the upload dir with a unique sanitized filename
573 $upload = wp_upload_bits( $file_name, 0, '', $post['post_date']);
574 if ( $upload['error'] ) {
575 echo $upload['error'];
576 return new WP_Error( 'upload_dir_error', $upload['error'] );
579 // fetch the remote url and write it to the placeholder file
580 $headers = wp_get_http($url, $upload['file']);
582 // make sure the fetch was successful
583 if ( $headers['response'] != '200' ) {
584 @unlink($upload['file']);
585 return new WP_Error( 'import_file_error', sprintf(__('Remote file returned error response %d'), intval($headers['response'])) );
587 elseif ( isset($headers['content-length']) && filesize($upload['file']) != $headers['content-length'] ) {
588 @unlink($upload['file']);
589 return new WP_Error( 'import_file_error', __('Remote file is incorrect size') );
592 $max_size = $this->max_attachment_size();
593 if ( !empty($max_size) and filesize($upload['file']) > $max_size ) {
594 @unlink($upload['file']);
595 return new WP_Error( 'import_file_error', sprintf(__('Remote file is too large, limit is %s', size_format($max_size))) );
598 // keep track of the old and new urls so we can substitute them later
599 $this->url_remap[$url] = $upload['url'];
600 // if the remote url is redirected somewhere else, keep track of the destination too
601 if ( $headers['x-final-location'] != $url )
602 $this->url_remap[$headers['x-final-location']] = $upload['url'];
608 // sort by strlen, longest string first
609 function cmpr_strlen($a, $b) {
610 return strlen($b) - strlen($a);
613 // update url references in post bodies to point to the new local files
614 function backfill_attachment_urls() {
616 // make sure we do the longest urls first, in case one is a substring of another
617 uksort($this->url_remap, array(&$this, 'cmpr_strlen'));
620 foreach ($this->url_remap as $from_url => $to_url) {
621 // remap urls in post_content
622 $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, '%s', '%s')", $from_url, $to_url) );
623 // remap enclosure urls
624 $result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, '%s', '%s') WHERE meta_key='enclosure'", $from_url, $to_url) );
628 // update the post_parent of orphans now that we know the local id's of all parents
629 function backfill_parents() {
632 foreach ($this->orphans as $child_id => $parent_id) {
633 $local_child_id = $this->post_ids_processed[$child_id];
634 $local_parent_id = $this->post_ids_processed[$parent_id];
635 if ($local_child_id and $local_parent_id) {
636 $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_parent = %d WHERE ID = %d", $local_parent_id, $local_child_id));
641 function is_valid_meta_key($key) {
642 // skip _wp_attached_file metadata since we'll regenerate it from scratch
643 if ( $key == '_wp_attached_file' )
648 // give the user the option of creating new users to represent authors in the import file?
649 function allow_create_users() {
650 return apply_filters('import_allow_create_users', true);
653 // give the user the option of downloading and importing attached files
654 function allow_fetch_attachments() {
655 return apply_filters('import_allow_fetch_attachments', true);
658 function max_attachment_size() {
659 // can be overridden with a filter - 0 means no limit
660 return apply_filters('import_attachment_size_limit', 0);
663 function import_start() {
664 wp_defer_term_counting(true);
665 wp_defer_comment_counting(true);
666 do_action('import_start');
669 function import_end() {
670 do_action('import_end');
672 // clear the caches after backfilling
673 foreach ($this->post_ids_processed as $post_id)
674 clean_post_cache($post_id);
676 wp_defer_term_counting(false);
677 wp_defer_comment_counting(false);
680 function import($id, $fetch_attachments = false) {
681 $this->id = (int) $id;
682 $this->fetch_attachments = ($this->allow_fetch_attachments() && (bool) $fetch_attachments);
684 add_filter('import_post_meta_key', array($this, 'is_valid_meta_key'));
685 $file = get_attached_file($this->id);
686 $this->import_file($file);
689 function import_file($file) {
692 $this->import_start();
693 $this->get_authors_from_post();
694 $this->get_entries();
695 $this->process_categories();
696 $this->process_tags();
697 $result = $this->process_posts();
698 $this->backfill_parents();
699 $this->backfill_attachment_urls();
702 if ( is_wp_error( $result ) )
706 function handle_upload() {
707 $file = wp_import_handle_upload();
708 if ( isset($file['error']) ) {
709 echo '<p>'.__('Sorry, there has been an error.').'</p>';
710 echo '<p><strong>' . $file['error'] . '</strong></p>';
713 $this->file = $file['file'];
714 $this->id = (int) $file['id'];
718 function dispatch() {
719 if (empty ($_GET['step']))
722 $step = (int) $_GET['step'];
730 check_admin_referer('import-upload');
731 if ( $this->handle_upload() )
732 $this->select_authors();
735 check_admin_referer('import-wordpress');
736 $result = $this->import( $_GET['id'], $_POST['attachments'] );
737 if ( is_wp_error( $result ) )
738 echo $result->get_error_message();
744 function WP_Import() {
749 $wp_import = new WP_Import();
751 register_importer('wordpress', 'WordPress', __('Import <strong>posts, comments, custom fields, pages, and categories</strong> from a WordPress export file.'), array ($wp_import, 'dispatch'));