5 var $post_ids_processed = array ();
6 var $orphans = array ();
9 var $mtnames = array ();
10 var $newauthornames = array ();
11 var $allauthornames = array ();
13 var $author_ids = array ();
15 var $categories = array ();
18 var $fetch_attachments = false;
19 var $url_remap = array ();
22 echo '<div class="wrap">';
23 echo '<h2>'.__('Import WordPress').'</h2>';
30 function unhtmlentities($string) { // From php.net for < 4.3 compat
31 $trans_tbl = get_html_translation_table(HTML_ENTITIES);
32 $trans_tbl = array_flip($trans_tbl);
33 return strtr($string, $trans_tbl);
37 echo '<div class="narrow">';
38 echo '<p>'.__('Howdy! Upload your WordPress eXtended RSS (WXR) file and we’ll import the posts, comments, custom fields, and categories into this blog.').'</p>';
39 echo '<p>'.__('Choose a WordPress WXR file to upload, then click Upload file and import.').'</p>';
40 wp_import_upload_form("admin.php?import=wordpress&step=1");
44 function get_tag( $string, $tag ) {
46 preg_match("|<$tag.*?>(.*?)</$tag>|is", $string, $return);
47 $return = preg_replace('|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1]);
48 $return = $wpdb->escape( trim( $return ) );
53 return is_callable('gzopen');
56 function fopen($filename, $mode='r') {
57 if ( $this->has_gzip() )
58 return gzopen($filename, $mode);
59 return fopen($filename, $mode);
63 if ( $this->has_gzip() )
68 function fgets($fp, $len=8192) {
69 if ( $this->has_gzip() )
70 return gzgets($fp, $len);
71 return fgets($fp, $len);
74 function fclose($fp) {
75 if ( $this->has_gzip() )
80 function get_entries($process_post_func=NULL) {
81 set_magic_quotes_runtime(0);
86 $fp = $this->fopen($this->file, 'r');
88 while ( !$this->feof($fp) ) {
89 $importline = rtrim($this->fgets($fp));
91 // this doesn't check that the file is perfectly valid but will at least confirm that it's not the wrong format altogether
92 if ( !$is_wxr_file && preg_match('|xmlns:wp="http://wordpress[.]org/export/\d+[.]\d+/"|', $importline) )
95 if ( false !== strpos($importline, '<wp:category>') ) {
96 preg_match('|<wp:category>(.*?)</wp:category>|is', $importline, $category);
97 $this->categories[] = $category[1];
100 if ( false !== strpos($importline, '<wp:tag>') ) {
101 preg_match('|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag);
102 $this->tags[] = $tag[1];
105 if ( false !== strpos($importline, '<item>') ) {
110 if ( false !== strpos($importline, '</item>') ) {
111 $doing_entry = false;
112 if ($process_post_func)
113 call_user_func($process_post_func, $this->post);
116 if ( $doing_entry ) {
117 $this->post .= $importline . "\n";
128 function get_wp_authors() {
129 // We need to find unique values of author names, while preserving the order, so this function emulates the unique_value(); php function, without the sorting.
130 $temp = $this->allauthornames;
131 $authors[0] = array_shift($temp);
132 $y = count($temp) + 1;
133 for ($x = 1; $x < $y; $x ++) {
134 $next = array_shift($temp);
135 if (!(in_array($next, $authors)))
136 array_push($authors, "$next");
142 function get_authors_from_post() {
143 global $current_user;
145 // this will populate $this->author_ids with a list of author_names => user_ids
147 foreach ( $_POST['author_in'] as $i => $in_author_name ) {
149 if ( !empty($_POST['user_select'][$i]) ) {
150 // an existing user was selected in the dropdown list
151 $user = get_userdata( intval($_POST['user_select'][$i]) );
152 if ( isset($user->ID) )
153 $this->author_ids[$in_author_name] = $user->ID;
155 elseif ( $this->allow_create_users() ) {
156 // nothing was selected in the dropdown list, so we'll use the name in the text field
158 $new_author_name = trim($_POST['user_create'][$i]);
159 // if the user didn't enter a name, assume they want to use the same name as in the import file
160 if ( empty($new_author_name) )
161 $new_author_name = $in_author_name;
163 $user_id = username_exists($new_author_name);
165 $user_id = wp_create_user($new_author_name, wp_generate_password());
168 $this->author_ids[$in_author_name] = $user_id;
171 // failsafe: if the user_id was invalid, default to the current user
172 if ( empty($this->author_ids[$in_author_name]) ) {
173 $this->author_ids[$in_author_name] = intval($current_user->ID);
179 function wp_authors_form() {
181 <h2><?php _e('Assign Authors'); ?></h2>
182 <p><?php _e('To make it easier for you to edit and save the imported posts and drafts, you may want to change the name of the author of the posts. For example, you may want to import all the entries as <code>admin</code>s entries.'); ?></p>
184 if ( $this->allow_create_users() ) {
185 echo '<p>'.__('If a new user is created by WordPress, a password will be randomly generated. Manually change the user\'s details if necessary.')."</p>\n";
189 $authors = $this->get_wp_authors();
190 echo '<ol id="authors">';
191 echo '<form action="?import=wordpress&step=2&id=' . $this->id . '" method="post">';
192 wp_nonce_field('import-wordpress');
194 foreach ($authors as $author) {
196 echo '<li>'.__('Import author:').' <strong>'.$author.'</strong><br />';
197 $this->users_form($j, $author);
201 if ( $this->allow_fetch_attachments() ) {
204 <h2><?php _e('Import Attachments'); ?></h2>
206 <input type="checkbox" value="1" name="attachments" id="import-attachments" />
207 <label for="import-attachments"><?php _e('Download and import file attachments') ?></label>
213 echo '<input type="submit" value="'.attribute_escape( __('Submit') ).'">'.'<br />';
218 function users_form($n, $author) {
220 if ( $this->allow_create_users() ) {
221 printf(__('Create user %1$s or map to existing'), ' <input type="text" value="'.$author.'" name="'.'user_create['.intval($n).']'.'" maxlength="30"> <br />');
224 echo __('Map to existing').'<br />';
227 // keep track of $n => $author name
228 echo '<input type="hidden" name="author_in['.intval($n).']" value="'.htmlspecialchars($author).'" />';
230 $users = get_users_of_blog();
231 ?><select name="user_select[<?php echo $n; ?>]">
232 <option value="0"><?php _e('- Select -'); ?></option>
234 foreach ($users as $user) {
235 echo '<option value="'.$user->user_id.'">'.$user->user_login.'</option>';
242 function select_authors() {
243 $is_wxr_file = $this->get_entries(array(&$this, 'process_author'));
244 if ( $is_wxr_file ) {
245 $this->wp_authors_form();
248 echo '<h2>'.__('Invalid file').'</h2>';
249 echo '<p>'.__('Please upload a valid WXR (WordPress eXtended RSS) export file.').'</p>';
253 // fetch the user ID for a given author name, respecting the mapping preferences
254 function checkauthor($author) {
255 global $current_user;
257 if ( !empty($this->author_ids[$author]) )
258 return $this->author_ids[$author];
260 // failsafe: map to the current user
261 return $current_user->ID;
266 function process_categories() {
269 $cat_names = (array) get_terms('category', 'fields=names');
271 while ( $c = array_shift($this->categories) ) {
272 $cat_name = trim($this->get_tag( $c, 'wp:cat_name' ));
274 // If the category exists we leave it alone
275 if ( in_array($cat_name, $cat_names) )
278 $category_nicename = $this->get_tag( $c, 'wp:category_nicename' );
279 $posts_private = (int) $this->get_tag( $c, 'wp:posts_private' );
280 $links_private = (int) $this->get_tag( $c, 'wp:links_private' );
282 $parent = $this->get_tag( $c, 'wp:category_parent' );
284 if ( empty($parent) )
285 $category_parent = '0';
287 $category_parent = category_exists($parent);
289 $catarr = compact('category_nicename', 'category_parent', 'posts_private', 'links_private', 'posts_private', 'cat_name');
291 $cat_ID = wp_insert_category($catarr);
295 function process_tags() {
298 $tag_names = (array) get_terms('post_tag', 'fields=names');
300 while ( $c = array_shift($this->tags) ) {
301 $tag_name = trim($this->get_tag( $c, 'wp:tag_name' ));
303 // If the category exists we leave it alone
304 if ( in_array($tag_name, $tag_names) )
307 $slug = $this->get_tag( $c, 'wp:tag_slug' );
308 $description = $this->get_tag( $c, 'wp:tag_description' );
310 $tagarr = compact('slug', 'description');
312 $tag_ID = wp_insert_term($tag_name, 'post_tag', $tagarr);
316 function process_author($post) {
317 $author = $this->get_tag( $post, 'dc:creator' );
319 $this->allauthornames[] = $author;
322 function process_posts() {
326 $this->get_entries(array(&$this, 'process_post'));
330 wp_import_cleanup($this->id);
331 do_action('import_done', 'wordpress');
333 echo '<h3>'.sprintf(__('All done.').' <a href="%s">'.__('Have fun!').'</a>', get_option('home')).'</h3>';
336 function process_post($post) {
339 $post_ID = (int) $this->get_tag( $post, 'wp:post_id' );
340 if ( $post_ID && !empty($this->post_ids_processed[$post_ID]) ) // Processed already
343 set_time_limit( 60 );
345 // There are only ever one of these
346 $post_title = $this->get_tag( $post, 'title' );
347 $post_date = $this->get_tag( $post, 'wp:post_date' );
348 $post_date_gmt = $this->get_tag( $post, 'wp:post_date_gmt' );
349 $comment_status = $this->get_tag( $post, 'wp:comment_status' );
350 $ping_status = $this->get_tag( $post, 'wp:ping_status' );
351 $post_status = $this->get_tag( $post, 'wp:status' );
352 $post_name = $this->get_tag( $post, 'wp:post_name' );
353 $post_parent = $this->get_tag( $post, 'wp:post_parent' );
354 $menu_order = $this->get_tag( $post, 'wp:menu_order' );
355 $post_type = $this->get_tag( $post, 'wp:post_type' );
356 $post_password = $this->get_tag( $post, 'wp:post_password' );
357 $guid = $this->get_tag( $post, 'guid' );
358 $post_author = $this->get_tag( $post, 'dc:creator' );
360 $post_content = $this->get_tag( $post, 'content:encoded' );
361 $post_content = preg_replace('|<(/?[A-Z]+)|e', "'<' . strtolower('$1')", $post_content);
362 $post_content = str_replace('<br>', '<br />', $post_content);
363 $post_content = str_replace('<hr>', '<hr />', $post_content);
365 preg_match_all('|<category domain="tag">(.*?)</category>|is', $post, $tags);
369 foreach ($tags as $tag) {
370 $tags[$tag_index] = $wpdb->escape($this->unhtmlentities(str_replace(array ('<![CDATA[', ']]>'), '', $tag)));
374 preg_match_all('|<category>(.*?)</category>|is', $post, $categories);
375 $categories = $categories[1];
378 foreach ($categories as $category) {
379 $categories[$cat_index] = $wpdb->escape($this->unhtmlentities(str_replace(array ('<![CDATA[', ']]>'), '', $category)));
383 $post_exists = post_exists($post_title, '', $post_date);
385 if ( $post_exists ) {
387 printf(__('Post <em>%s</em> already exists.'), stripslashes($post_title));
390 // If it has parent, process parent first.
391 $post_parent = (int) $post_parent;
393 // if we already know the parent, map it to the local ID
394 if ( $parent = $this->post_ids_processed[$post_parent] ) {
395 $post_parent = $parent; // new ID of the parent
398 // record the parent for later
399 $this->orphans[intval($post_ID)] = $post_parent;
405 $post_author = $this->checkauthor($post_author); //just so that if a post already exists, new users are not created by checkauthor
407 $postdata = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_title', 'post_status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', 'menu_order', 'post_type', 'post_password');
408 if ($post_type == 'attachment') {
409 $remote_url = $this->get_tag( $post, 'wp:attachment_url' );
413 $comment_post_ID = $post_id = $this->process_attachment($postdata, $remote_url);
414 if ( !$post_id or is_wp_error($post_id) )
418 printf(__('Importing post <em>%s</em>...'), stripslashes($post_title));
419 $comment_post_ID = $post_id = wp_insert_post($postdata);
422 if ( is_wp_error( $post_id ) )
425 // Memorize old and new ID.
426 if ( $post_id && $post_ID ) {
427 $this->post_ids_processed[intval($post_ID)] = intval($post_id);
431 if (count($categories) > 0) {
432 $post_cats = array();
433 foreach ($categories as $category) {
434 $slug = sanitize_term_field('slug', $category, 0, 'category', 'db');
435 $cat = get_term_by('slug', $slug, 'category');
438 $cat_ID = $cat->term_id;
440 $category = $wpdb->escape($category);
441 $cat_ID = wp_insert_category(array('cat_name' => $category));
443 $post_cats[] = $cat_ID;
445 wp_set_post_categories($post_id, $post_cats);
449 if (count($tags) > 0) {
450 $post_tags = array();
451 foreach ($tags as $tag) {
452 $slug = sanitize_term_field('slug', $tag, 0, 'post_tag', 'db');
453 $tag_obj = get_term_by('slug', $slug, 'post_tag');
455 if ( ! empty($tag_obj) )
456 $tag_id = $tag_obj->term_id;
457 if ( $tag_id == 0 ) {
458 $tag = $wpdb->escape($tag);
459 $tag_id = wp_insert_term($tag, 'post_tag');
460 $tag_id = $tag_id['term_id'];
462 $post_tags[] = intval($tag_id);
464 wp_set_post_tags($post_id, $post_tags);
469 preg_match_all('|<wp:comment>(.*?)</wp:comment>|is', $post, $comments);
470 $comments = $comments[1];
472 if ( $comments) { foreach ($comments as $comment) {
473 $comment_author = $this->get_tag( $comment, 'wp:comment_author');
474 $comment_author_email = $this->get_tag( $comment, 'wp:comment_author_email');
475 $comment_author_IP = $this->get_tag( $comment, 'wp:comment_author_IP');
476 $comment_author_url = $this->get_tag( $comment, 'wp:comment_author_url');
477 $comment_date = $this->get_tag( $comment, 'wp:comment_date');
478 $comment_date_gmt = $this->get_tag( $comment, 'wp:comment_date_gmt');
479 $comment_content = $this->get_tag( $comment, 'wp:comment_content');
480 $comment_approved = $this->get_tag( $comment, 'wp:comment_approved');
481 $comment_type = $this->get_tag( $comment, 'wp:comment_type');
482 $comment_parent = $this->get_tag( $comment, 'wp:comment_parent');
484 // if this is a new post we can skip the comment_exists() check
485 if ( !$post_exists || !comment_exists($comment_author, $comment_date) ) {
486 $commentdata = compact('comment_post_ID', 'comment_author', 'comment_author_url', 'comment_author_email', 'comment_author_IP', 'comment_date', 'comment_date_gmt', 'comment_content', 'comment_approved', 'comment_type', 'comment_parent');
487 wp_insert_comment($commentdata);
493 printf(' '.__ngettext('(%s comment)', '(%s comments)', $num_comments), $num_comments);
496 preg_match_all('|<wp:postmeta>(.*?)</wp:postmeta>|is', $post, $postmeta);
497 $postmeta = $postmeta[1];
498 if ( $postmeta) { foreach ($postmeta as $p) {
499 $key = $this->get_tag( $p, 'wp:meta_key' );
500 $value = $this->get_tag( $p, 'wp:meta_value' );
501 $value = stripslashes($value); // add_post_meta() will escape.
503 $this->process_post_meta($post_id, $key, $value);
507 do_action('import_post_added', $post_id);
511 function process_post_meta($post_id, $key, $value) {
512 // the filter can return false to skip a particular metadata key
513 $_key = apply_filters('import_post_meta_key', $key);
515 add_post_meta( $post_id, $_key, $value );
516 do_action('import_post_meta', $post_id, $_key, $value);
520 function process_attachment($postdata, $remote_url) {
521 if ($this->fetch_attachments and $remote_url) {
522 printf( __('Importing attachment <em>%s</em>... '), htmlspecialchars($remote_url) );
523 $upload = $this->fetch_remote_file($postdata, $remote_url);
524 if ( is_wp_error($upload) ) {
525 printf( __('Remote file error: %s'), htmlspecialchars($upload->get_error_message()) );
529 print '('.size_format(filesize($upload['file'])).')';
532 if ( $info = wp_check_filetype($upload['file']) ) {
533 $postdata['post_mime_type'] = $info['type'];
536 print __('Invalid file type');
540 $postdata['guid'] = $upload['url'];
542 // as per wp-admin/includes/upload.php
543 $post_id = wp_insert_attachment($postdata, $upload['file']);
544 wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) );
546 // remap the thumbnail url. this isn't perfect because we're just guessing the original url.
547 if ( preg_match('@^image/@', $info['type']) && $thumb_url = wp_get_attachment_thumb_url($post_id) ) {
548 $parts = pathinfo($remote_url);
549 $ext = $parts['extension'];
550 $name = basename($parts['basename'], ".{$ext}");
551 $this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url;
557 printf( __('Skipping attachment <em>%s</em>'), htmlspecialchars($remote_url) );
561 function fetch_remote_file($post, $url) {
562 $upload = wp_upload_dir($post['post_date']);
564 // extract the file name and extension from the url
565 $file_name = basename($url);
567 // get placeholder file in the upload dir with a unique sanitized filename
568 $upload = wp_upload_bits( $file_name, 0, '', $post['post_date']);
569 if ( $upload['error'] ) {
570 echo $upload['error'];
571 return new WP_Error( 'upload_dir_error', $upload['error'] );
574 // fetch the remote url and write it to the placeholder file
575 $headers = wp_get_http($url, $upload['file']);
577 // make sure the fetch was successful
578 if ( $headers['response'] != '200' ) {
579 @unlink($upload['file']);
580 return new WP_Error( 'import_file_error', sprintf(__('Remote file returned error response %d'), intval($headers['response'])) );
582 elseif ( isset($headers['content-length']) && filesize($upload['file']) != $headers['content-length'] ) {
583 @unlink($upload['file']);
584 return new WP_Error( 'import_file_error', __('Remote file is incorrect size') );
587 $max_size = $this->max_attachment_size();
588 if ( !empty($max_size) and filesize($upload['file']) > $max_size ) {
589 @unlink($upload['file']);
590 return new WP_Error( 'import_file_error', sprintf(__('Remote file is too large, limit is %s', size_format($max_size))) );
593 // keep track of the old and new urls so we can substitute them later
594 $this->url_remap[$url] = $upload['url'];
595 // if the remote url is redirected somewhere else, keep track of the destination too
596 if ( $headers['x-final-location'] != $url )
597 $this->url_remap[$headers['x-final-location']] = $upload['url'];
603 // sort by strlen, longest string first
604 function cmpr_strlen($a, $b) {
605 return strlen($b) - strlen($a);
608 // update url references in post bodies to point to the new local files
609 function backfill_attachment_urls() {
611 // make sure we do the longest urls first, in case one is a substring of another
612 uksort($this->url_remap, array(&$this, 'cmpr_strlen'));
615 foreach ($this->url_remap as $from_url => $to_url) {
616 // remap urls in post_content
617 $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, '%s', '%s')", $from_url, $to_url) );
618 // remap enclosure urls
619 $result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, '%s', '%s') WHERE meta_key='enclosure'", $from_url, $to_url) );
623 // update the post_parent of orphans now that we know the local id's of all parents
624 function backfill_parents() {
627 foreach ($this->orphans as $child_id => $parent_id) {
628 $local_child_id = $this->post_ids_processed[$child_id];
629 $local_parent_id = $this->post_ids_processed[$parent_id];
630 if ($local_child_id and $local_parent_id) {
631 $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_parent = %d WHERE ID = %d", $local_parent_id, $local_child_id));
636 function is_valid_meta_key($key) {
637 // skip _wp_attached_file metadata since we'll regenerate it from scratch
638 if ( $key == '_wp_attached_file' )
643 // give the user the option of creating new users to represent authors in the import file?
644 function allow_create_users() {
645 return apply_filters('import_allow_create_users', true);
648 // give the user the option of downloading and importing attached files
649 function allow_fetch_attachments() {
650 return apply_filters('import_allow_fetch_attachments', true);
653 function max_attachment_size() {
654 // can be overridden with a filter - 0 means no limit
655 return apply_filters('import_attachment_size_limit', 0);
658 function import_start() {
659 wp_defer_term_counting(true);
660 wp_defer_comment_counting(true);
661 do_action('import_start');
664 function import_end() {
665 do_action('import_end');
667 // clear the caches after backfilling
668 foreach ($this->post_ids_processed as $post_id)
669 clean_post_cache($post_id);
671 wp_defer_term_counting(false);
672 wp_defer_comment_counting(false);
675 function import($id, $fetch_attachments = false) {
676 $this->id = (int) $id;
677 $this->fetch_attachments = ($this->allow_fetch_attachments() && (bool) $fetch_attachments);
679 add_filter('import_post_meta_key', array($this, 'is_valid_meta_key'));
680 $file = get_attached_file($this->id);
681 $this->import_file($file);
684 function import_file($file) {
687 $this->import_start();
688 $this->get_authors_from_post();
689 $this->get_entries();
690 $this->process_categories();
691 $this->process_tags();
692 $result = $this->process_posts();
693 $this->backfill_parents();
694 $this->backfill_attachment_urls();
697 if ( is_wp_error( $result ) )
701 function handle_upload() {
702 $file = wp_import_handle_upload();
703 if ( isset($file['error']) ) {
704 echo '<p>'.__('Sorry, there has been an error.').'</p>';
705 echo '<p><strong>' . $file['error'] . '</strong></p>';
708 $this->file = $file['file'];
709 $this->id = (int) $file['id'];
713 function dispatch() {
714 if (empty ($_GET['step']))
717 $step = (int) $_GET['step'];
725 check_admin_referer('import-upload');
726 if ( $this->handle_upload() )
727 $this->select_authors();
730 check_admin_referer('import-wordpress');
731 $result = $this->import( $_GET['id'], $_POST['attachments'] );
732 if ( is_wp_error( $result ) )
733 echo $result->get_error_message();
739 function WP_Import() {
744 $wp_import = new WP_Import();
746 register_importer('wordpress', 'WordPress', __('Import <strong>posts, comments, custom fields, pages, and categories</strong> from a WordPress export file.'), array ($wp_import, 'dispatch'));