Typo 6.1 Exporter for WordPress

I wrote two rake scripts to dump posts from Typo as planned in my last post.

Here’re the steps.

1. Dump Typo’s posts into blog.xml

$ cp words.rake export.rake /path/to/typo/lib/tasks/
$ cd /path/to/typo/
$ rake words[tags.rb]
$ vi lib/tasks/tags.rb # edit generated tags hash
$ rake export[blog.xml]

2. Go to WordPress admin page and activate WordPress Importer tool
3. Upload blog.xml file and follow the import instruction

words.rake

desc "dump a hash of words"
task :words, [:file] => [:environment] do |t, args|
  articles = Article.find(:all)
  words = {}
  articles.each do |a|
    a.title.split(/[^a-zA-Z]/).each do |w|
      l = w.downcase
      words[l] = w.capitalize if l != '' && !words[l]
    end if a.title
  end

  sorted = words.keys.sort

  # dump words for manual selection
  File.open(File.join(File.dirname(__FILE__), args.file), "w") do |f|
    f.write("$tags = {\n");
    sorted.each do |k|
      f.write("'#{k}'=>'#{words[k]}',\n");
    end
    f.write("}\n");
  end
end

tags.rb

$tags = {
'acer'=>'Acer',
'adsense'=>'Adsense',
'apache'=>'Apache',
'architecture'=>'Architecture',
'bears'=>'Bears',
'bees'=>'Bees'
}

export.rake

desc "export to wordpress xml file"
task :export, [:file] => [:environment] do |t, args|
  require File.join(File.dirname(__FILE__), 'tags')

  b = Blog.default
  s = b.settings;
  articles = Article.find(:all)
 
  File.open(args.file, "w") do |f|
    f.write('<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
   xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
   xmlns:content="http://purl.org/rss/1.0/modules/content/"
   xmlns:wfw="http://wellformedweb.org/CommentAPI/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:wp="http://wordpress.org/export/1.2/"
>
<channel>');
    f.write("\n<title>#{s["blog_name"]}</title>\n");
    f.write("<link>#{b.base_url}</link>\n");
    f.write("<description>#{s["blog_subtitle"]}</description>\n");
    f.write("<pubDate>#{DateTime.now}</pubDate>\n");
    f.write("<language>en-US</language>\n");
    f.write("<wp:wxr_version>1.2</wp:wxr_version>\n");
    f.write("<wp:base_site_url>#{b.base_url}</wp:base_site_url>\n");
    f.write("<wp:base_blog_url>#{b.base_url}</wp:base_blog_url>\n");

    users = User.find(:all)
    users.each do |u|
      f.write("<wp:author>\n");
      f.write("<wp:author_id>#{u.id}</wp:author_id>\n");
      f.write("<wp:author_login>#{u.login}</wp:author_login>\n");
      f.write("<wp:author_email>#{u.email}</wp:author_email>\n");
      f.write("<wp:author_display_name>#{u.name}</wp:author_display_name>\n");
      f.write("</wp:author>\n");
    end

    i = 1
    categories = Category.find(:all)
    categories.each do |c|
      f.write("<wp:category><wp:term_id>#{i}</wp:term_id><wp:category_nicename>#{c.name.downcase}</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[#{c.name}]]></wp:cat_name></wp:category>\n");
      i += 1
    end

    $tags.each do |k,v|
      f.write("<wp:tag><wp:term_id>#{i}</wp:term_id><wp:tag_slug>#{k}</wp:tag_slug><wp:tag_name><![CDATA[#{v}]]></wp:tag_name></wp:tag>\n");
      i += 1
    end

    f.write("<generator>http://competo.com</generator>\n");

    articles.each do |a|
      f.write("<item>\n");
      f.write("<title>#{a.title}</title>\n");
      #f.write("<link>#{a.permalink}</link>\n");
      f.write("<pubDate>#{a.published_at}</pubDate>\n");
      f.write("<dc:creator>#{a.author}</dc:creator>\n");
      f.write('<guid isPermalink="false">' + "#{a.permalink}</guid>\n");
      f.write("<description></description>\n");

      # change embeded images path
      content = a.body.gsub('src="/archives/', 'src="/wp-content/uploads/')

      f.write("<content:encoded><![CDATA[#{content}]]></content:encoded>\n");
      f.write("<excerpt:encoded><![CDATA[#{a.excerpt}]]></excerpt:encoded>\n");
      f.write("<wp:post_id>#{a.id}</wp:post_id>\n");
      f.write("<wp:post_date>#{a.created_at}</wp:post_date>\n");
      f.write("<wp:comment_status>closed</wp:comment_status>\n");
      f.write("<wp:ping_status>closed</wp:ping_status>\n");
      f.write("<wp:post_name>#{a.permalink}</wp:post_name>\n");
      f.write("<wp:status>publish</wp:status>\n");
      f.write("<wp:post_parent>0</wp:post_parent>\n");
      f.write("<wp:menu_order>0</wp:menu_order>\n");
      f.write("<wp:post_type>post</wp:post_type>\n");
      f.write("<wp:post_password></wp:post_password>\n");
      f.write("<wp:is_sticky>0</wp:is_sticky>\n");

      a.categories.each do |c|
        f.write('<category domain="category" nicename="' + "#{c.name.downcase}" + '"><![CDATA[' + "#{c.name}]]></category>\n");
      end if a.categories

      aa = a.body.split(/[^a-zA-Z]/).map{|x| x.downcase}.uniq
      aa.each do |w|
        t = $tags[w]
        f.write('<category domain="post_tag" nicename="' + "#{w}" + '"><![CDATA[' + "#{t}]]></category>\n") if t
      end if a.body

      # ignore comments
      f.write("</item>\n");
    end
    f.write("</channel>\n");
    f.write("</rss>\n");
  end
end

About rp8

Specialized in building sophisticated systems for trading & risks in commodity, exotics, commodity index & structured products. Specially interested in using open source stacks and cloud computing to build new generation of services and apps. Enjoy mountain biking & photography. github
This entry was posted in General and tagged , , , . Bookmark the permalink.