-
-
Save juniorz/1564581 to your computer and use it in GitHub Desktop.
| require 'rubygems' | |
| require 'nokogiri' | |
| require 'fileutils' | |
| require 'date' | |
| require 'uri' | |
| # usage: ruby import.rb my-blog.xml | |
| # my-blog.xml is a file from Settings -> Basic -> Export in blogger. | |
| data = File.read ARGV[0] | |
| doc = Nokogiri::XML(data) | |
| @posts = {} | |
| @drafts = {} | |
| def add(node) | |
| id = node.search('id').first.content | |
| type = node.search('category').first.attr('term').split('#').last | |
| case type | |
| when 'post' | |
| if published?(node) | |
| @posts[id] = Post.new(node) | |
| else | |
| @drafts[id] = Post.new(node) | |
| end | |
| when 'comment' | |
| reply_to = node.children.find {|c| c.name == 'in-reply-to' } | |
| post_id = reply_to.attr('ref') | |
| #post_id = node.search('thr').first.attr('ref') | |
| @posts[post_id].add_comment(Comment.new(node)) | |
| when 'template', 'settings', 'page' | |
| else | |
| raise 'dunno '+type | |
| end | |
| end | |
| def published?(node) | |
| node.at_css('app|control app|draft', 'app' => 'http://purl.org/atom/app#').nil? | |
| end | |
| def write(post, path='_posts') | |
| puts "Post [#{post.title}] has #{post.comments.count} comments" | |
| puts "writing #{post.file_name}" | |
| File.open(File.join(path, post.file_name), 'w') do |file| | |
| file.write post.header | |
| file.write "\n\n" | |
| #file.write "<h1>{{ page.title }}</h1>\n" | |
| file.write "<div class='post'>\n" | |
| file.write post.content | |
| file.write "</div>\n" | |
| unless post.comments.empty? | |
| file.write "<h2>Comments</h2>\n" | |
| file.write "<div class='comments'>\n" | |
| post.comments.each do |comment| | |
| file.write "<div class='comment'>\n" | |
| file.write "<div class='author'>" | |
| file.write comment.author | |
| file.write "</div>\n" | |
| file.write "<div class='content'>\n" | |
| file.write comment.content | |
| file.write "</div>\n" | |
| file.write "</div>\n" | |
| end | |
| file.write "</div>\n" | |
| end | |
| end | |
| end | |
| class Post | |
| attr_reader :comments | |
| def initialize(node) | |
| @node = node | |
| @comments = [] | |
| end | |
| def add_comment(comment) | |
| @comments.unshift comment | |
| end | |
| def title | |
| @title ||= @node.at_css('title').content | |
| end | |
| def content | |
| @content ||= @node.at_css('content').content | |
| end | |
| def creation_date | |
| @creation_date ||= creation_datetime.strftime("%Y-%m-%d") | |
| end | |
| def creation_datetime | |
| @creation_datetime ||= Date.parse(@node.search('published').first.content) | |
| end | |
| def permalink | |
| return @permalink unless @permalink.nil? | |
| link_node = @node.at_css('link[rel=alternate]') | |
| @permalink = link_node && link_node.attr('href') | |
| end | |
| def param_name | |
| if permalink.nil? | |
| title.split(/[^a-zA-Z0-9]+/).join('-').downcase | |
| else | |
| File.basename(URI(permalink).path, '.*') | |
| end | |
| end | |
| def file_name | |
| %{#{creation_date}-#{param_name}.html} | |
| end | |
| def header | |
| [ | |
| '---', | |
| %{layout: post}, | |
| %{title: "#{title}"}, | |
| %{date: #{creation_datetime}}, | |
| %{comments: false}, | |
| categories, | |
| '---' | |
| ].compact.join("\n") | |
| end | |
| def categories | |
| terms = @node.search('category[scheme="http://www.blogger.com/atom/ns#"]') | |
| unless Array(terms).empty? | |
| [ | |
| 'categories:', | |
| terms.map{ |t| t.attr('term') && " - #{t.attr('term')}" }.compact.join("\n"), | |
| ].join("\n") | |
| end | |
| end | |
| end | |
| class Comment | |
| def initialize(node) | |
| @node = node | |
| end | |
| def author | |
| @node.search('author name').first.content | |
| end | |
| def content | |
| @node.search('content').first.content | |
| end | |
| end | |
| entries = {} | |
| doc.search('entry').each do |entry| | |
| add entry | |
| end | |
| puts "** Writing PUBLISHED posts" | |
| FileUtils.rm_rf('_posts') | |
| Dir.mkdir("_posts") unless File.directory?("_posts") | |
| @posts.each do |id, post| | |
| write post | |
| end | |
| puts "\n" | |
| puts "** Writing DRAFT posts" | |
| FileUtils.rm_rf('_drafts') | |
| Dir.mkdir("_drafts") unless File.directory?("_drafts") | |
| @drafts.each do |id, post| | |
| write post, '_drafts' | |
| end |
Thanks for this work! It worked well, except that it doesn't handle double quotes or backslashes in titles, such that a "rake generate" will fail with YAML errors. For example, " needs to become " and \ needs to become .
Thanks a lot. It worked well.
I tried your script to migrate my Blogger blog. I installed ruby via pacman -S ruby, and Nokogiri via gem install nokogiri on Arch Linux.
But your script fails with the following:
[orschiro@thinkpad Blogger to Github]$ ruby import.rb blog-08-03-2013.xml
WARNING: Nokogiri was built against LibXML version 2.8.0, but has dynamically loaded 2.9.1
import.rb:27:in `add': dunno page (RuntimeError)
from import.rb:119:in `block in <main>'
from /home/orschiro/.gem/ruby/2.0.0/gems/nokogiri-1.6.0/lib/nokogiri/xml/node_set.rb:237:in `block in each'
from /home/orschiro/.gem/ruby/2.0.0/gems/nokogiri-1.6.0/lib/nokogiri/xml/node_set.rb:236:in `upto'
from /home/orschiro/.gem/ruby/2.0.0/gems/nokogiri-1.6.0/lib/nokogiri/xml/node_set.rb:236:in `each'
from import.rb:118:in `<main>'
Can you help me, please?
EDIT: Sorry, this did not happen with your script but with the original script you had forked from.
I'm seeing the same error as @IQAndreas - gisted here: https://gist.github.com/nbieber/a2c469538bdb79d2c520
Thanks for this. It converted almost all of my posts; the last one causes the script to crash Ruby entirely from line 42. I think it's because the post has some non-ascii in the name: "Some Quốc Ngữ History".
I have nokogiri installed, but I can't do it:
import.rb:30:in `add': undefined method `add_comment' for nil:NilClass (NoMethodError)
from import.rb:158:in `block in <main>'
from /var/lib/gems/2.1.0/gems/nokogiri-1.6.6.2/lib/nokogiri/xml/node_set.rb:187:in `block in each'
from /var/lib/gems/2.1.0/gems/nokogiri-1.6.6.2/lib/nokogiri/xml/node_set.rb:186:in `upto'
from /var/lib/gems/2.1.0/gems/nokogiri-1.6.6.2/lib/nokogiri/xml/node_set.rb:186:in `each'
from import.rb:157:in `<main>'
Thank you. Fixed. I'll try to import pages too.