I wrote two rake scripts to dump posts from Typo as planned in my last post.
Here’re the steps.
1. Dump Typo’s posts into blog.xml
$ cp words.rake export.rake /path/to/typo/lib/tasks/
$ cd /path/to/typo/
$ rake words[tags.rb]
$ vi lib/tasks/tags.rb # edit generated tags hash
$ rake export[blog.xml]
$ cd /path/to/typo/
$ rake words[tags.rb]
$ vi lib/tasks/tags.rb # edit generated tags hash
$ rake export[blog.xml]
2. Go to WordPress admin page and activate WordPress Importer tool
3. Upload blog.xml file and follow the import instruction
words.rake
desc "dump a hash of words"
task :words, [:file] => [:environment] do |t, args|
articles = Article.find(:all)
words = {}
articles.each do |a|
a.title.split(/[^a-zA-Z]/).each do |w|
l = w.downcase
words[l] = w.capitalize if l != '' && !words[l]
end if a.title
end
sorted = words.keys.sort
# dump words for manual selection
File.open(File.join(File.dirname(__FILE__), args.file), "w") do |f|
f.write("$tags = {\n");
sorted.each do |k|
f.write("'#{k}'=>'#{words[k]}',\n");
end
f.write("}\n");
end
end
task :words, [:file] => [:environment] do |t, args|
articles = Article.find(:all)
words = {}
articles.each do |a|
a.title.split(/[^a-zA-Z]/).each do |w|
l = w.downcase
words[l] = w.capitalize if l != '' && !words[l]
end if a.title
end
sorted = words.keys.sort
# dump words for manual selection
File.open(File.join(File.dirname(__FILE__), args.file), "w") do |f|
f.write("$tags = {\n");
sorted.each do |k|
f.write("'#{k}'=>'#{words[k]}',\n");
end
f.write("}\n");
end
end
tags.rb
$tags = {
'acer'=>'Acer',
'adsense'=>'Adsense',
'apache'=>'Apache',
'architecture'=>'Architecture',
'bears'=>'Bears',
'bees'=>'Bees'
}
'acer'=>'Acer',
'adsense'=>'Adsense',
'apache'=>'Apache',
'architecture'=>'Architecture',
'bears'=>'Bears',
'bees'=>'Bees'
}
export.rake
desc "export to wordpress xml file"
task :export, [:file] => [:environment] do |t, args|
require File.join(File.dirname(__FILE__), 'tags')
b = Blog.default
s = b.settings;
articles = Article.find(:all)
File.open(args.file, "w") do |f|
f.write('<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/"
>
<channel>');
f.write("\n<title>#{s["blog_name"]}</title>\n");
f.write("<link>#{b.base_url}</link>\n");
f.write("<description>#{s["blog_subtitle"]}</description>\n");
f.write("<pubDate>#{DateTime.now}</pubDate>\n");
f.write("<language>en-US</language>\n");
f.write("<wp:wxr_version>1.2</wp:wxr_version>\n");
f.write("<wp:base_site_url>#{b.base_url}</wp:base_site_url>\n");
f.write("<wp:base_blog_url>#{b.base_url}</wp:base_blog_url>\n");
users = User.find(:all)
users.each do |u|
f.write("<wp:author>\n");
f.write("<wp:author_id>#{u.id}</wp:author_id>\n");
f.write("<wp:author_login>#{u.login}</wp:author_login>\n");
f.write("<wp:author_email>#{u.email}</wp:author_email>\n");
f.write("<wp:author_display_name>#{u.name}</wp:author_display_name>\n");
f.write("</wp:author>\n");
end
i = 1
categories = Category.find(:all)
categories.each do |c|
f.write("<wp:category><wp:term_id>#{i}</wp:term_id><wp:category_nicename>#{c.name.downcase}</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[#{c.name}]]></wp:cat_name></wp:category>\n");
i += 1
end
$tags.each do |k,v|
f.write("<wp:tag><wp:term_id>#{i}</wp:term_id><wp:tag_slug>#{k}</wp:tag_slug><wp:tag_name><![CDATA[#{v}]]></wp:tag_name></wp:tag>\n");
i += 1
end
f.write("<generator>http://competo.com</generator>\n");
articles.each do |a|
f.write("<item>\n");
f.write("<title>#{a.title}</title>\n");
#f.write("<link>#{a.permalink}</link>\n");
f.write("<pubDate>#{a.published_at}</pubDate>\n");
f.write("<dc:creator>#{a.author}</dc:creator>\n");
f.write('<guid isPermalink="false">' + "#{a.permalink}</guid>\n");
f.write("<description></description>\n");
# change embeded images path
content = a.body.gsub('src="/archives/', 'src="/wp-content/uploads/')
f.write("<content:encoded><![CDATA[#{content}]]></content:encoded>\n");
f.write("<excerpt:encoded><![CDATA[#{a.excerpt}]]></excerpt:encoded>\n");
f.write("<wp:post_id>#{a.id}</wp:post_id>\n");
f.write("<wp:post_date>#{a.created_at}</wp:post_date>\n");
f.write("<wp:comment_status>closed</wp:comment_status>\n");
f.write("<wp:ping_status>closed</wp:ping_status>\n");
f.write("<wp:post_name>#{a.permalink}</wp:post_name>\n");
f.write("<wp:status>publish</wp:status>\n");
f.write("<wp:post_parent>0</wp:post_parent>\n");
f.write("<wp:menu_order>0</wp:menu_order>\n");
f.write("<wp:post_type>post</wp:post_type>\n");
f.write("<wp:post_password></wp:post_password>\n");
f.write("<wp:is_sticky>0</wp:is_sticky>\n");
a.categories.each do |c|
f.write('<category domain="category" nicename="' + "#{c.name.downcase}" + '"><![CDATA[' + "#{c.name}]]></category>\n");
end if a.categories
aa = a.body.split(/[^a-zA-Z]/).map{|x| x.downcase}.uniq
aa.each do |w|
t = $tags[w]
f.write('<category domain="post_tag" nicename="' + "#{w}" + '"><![CDATA[' + "#{t}]]></category>\n") if t
end if a.body
# ignore comments
f.write("</item>\n");
end
f.write("</channel>\n");
f.write("</rss>\n");
end
end
task :export, [:file] => [:environment] do |t, args|
require File.join(File.dirname(__FILE__), 'tags')
b = Blog.default
s = b.settings;
articles = Article.find(:all)
File.open(args.file, "w") do |f|
f.write('<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/"
>
<channel>');
f.write("\n<title>#{s["blog_name"]}</title>\n");
f.write("<link>#{b.base_url}</link>\n");
f.write("<description>#{s["blog_subtitle"]}</description>\n");
f.write("<pubDate>#{DateTime.now}</pubDate>\n");
f.write("<language>en-US</language>\n");
f.write("<wp:wxr_version>1.2</wp:wxr_version>\n");
f.write("<wp:base_site_url>#{b.base_url}</wp:base_site_url>\n");
f.write("<wp:base_blog_url>#{b.base_url}</wp:base_blog_url>\n");
users = User.find(:all)
users.each do |u|
f.write("<wp:author>\n");
f.write("<wp:author_id>#{u.id}</wp:author_id>\n");
f.write("<wp:author_login>#{u.login}</wp:author_login>\n");
f.write("<wp:author_email>#{u.email}</wp:author_email>\n");
f.write("<wp:author_display_name>#{u.name}</wp:author_display_name>\n");
f.write("</wp:author>\n");
end
i = 1
categories = Category.find(:all)
categories.each do |c|
f.write("<wp:category><wp:term_id>#{i}</wp:term_id><wp:category_nicename>#{c.name.downcase}</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[#{c.name}]]></wp:cat_name></wp:category>\n");
i += 1
end
$tags.each do |k,v|
f.write("<wp:tag><wp:term_id>#{i}</wp:term_id><wp:tag_slug>#{k}</wp:tag_slug><wp:tag_name><![CDATA[#{v}]]></wp:tag_name></wp:tag>\n");
i += 1
end
f.write("<generator>http://competo.com</generator>\n");
articles.each do |a|
f.write("<item>\n");
f.write("<title>#{a.title}</title>\n");
#f.write("<link>#{a.permalink}</link>\n");
f.write("<pubDate>#{a.published_at}</pubDate>\n");
f.write("<dc:creator>#{a.author}</dc:creator>\n");
f.write('<guid isPermalink="false">' + "#{a.permalink}</guid>\n");
f.write("<description></description>\n");
# change embeded images path
content = a.body.gsub('src="/archives/', 'src="/wp-content/uploads/')
f.write("<content:encoded><![CDATA[#{content}]]></content:encoded>\n");
f.write("<excerpt:encoded><![CDATA[#{a.excerpt}]]></excerpt:encoded>\n");
f.write("<wp:post_id>#{a.id}</wp:post_id>\n");
f.write("<wp:post_date>#{a.created_at}</wp:post_date>\n");
f.write("<wp:comment_status>closed</wp:comment_status>\n");
f.write("<wp:ping_status>closed</wp:ping_status>\n");
f.write("<wp:post_name>#{a.permalink}</wp:post_name>\n");
f.write("<wp:status>publish</wp:status>\n");
f.write("<wp:post_parent>0</wp:post_parent>\n");
f.write("<wp:menu_order>0</wp:menu_order>\n");
f.write("<wp:post_type>post</wp:post_type>\n");
f.write("<wp:post_password></wp:post_password>\n");
f.write("<wp:is_sticky>0</wp:is_sticky>\n");
a.categories.each do |c|
f.write('<category domain="category" nicename="' + "#{c.name.downcase}" + '"><![CDATA[' + "#{c.name}]]></category>\n");
end if a.categories
aa = a.body.split(/[^a-zA-Z]/).map{|x| x.downcase}.uniq
aa.each do |w|
t = $tags[w]
f.write('<category domain="post_tag" nicename="' + "#{w}" + '"><![CDATA[' + "#{t}]]></category>\n") if t
end if a.body
# ignore comments
f.write("</item>\n");
end
f.write("</channel>\n");
f.write("</rss>\n");
end
end