|
# This Script imports articles from Wordpress database to Voog site. |
|
# |
|
# It loads requested articles from Wordpress database (table "db_posts") |
|
# and imports them to Voog site. |
|
# |
|
# It also converts referred image urls to Voog structure and fixes basic broken HTML tags. |
|
# |
|
# NB! All non images links are getting wrong url and should be changed manually after migration |
|
# from "/photos" to "/files". |
|
# |
|
# NB! All images should be uploaded to your Voog site BEFORE running this script. |
|
# |
|
# Use "wordpress_assets_downloader.rb" script to download all requested assets. |
|
# You can download it from: https://gist.github.com/tanelj/fadf23ea1f020dfbed5f |
|
# |
|
# ruby wordpress_assets_downloader.rb download |
|
# |
|
# Use "voog_assets_uploader.rb" script to upload assets to your Voog site. |
|
# |
|
# NB! Modify "Configuration" section variables before running this script. |
|
# |
|
# You can get correct values for "@assets_folder_map" from output of this script: |
|
# |
|
# ruby wordpress_assets_downloader.rb show-used-folders |
|
# |
|
# Run this script: |
|
# |
|
# ruby wordpress_to_voog_migrator.rb |
|
# |
|
# Required gems: |
|
# gem install sequel (tested 4.31.0) |
|
# gem install mysql2 |
|
# gem install nokogiri |
|
# gem install voog_api (min v0.0.11) |
|
# |
|
# More about Voog API: http://www.voog.com/developers/api/ |
|
require 'rubygems' |
|
require 'sequel' |
|
require 'mysql2' |
|
require 'voog_api' |
|
require 'nokogiri' |
|
|
|
# === Configuration |
|
@host = 'http://www.mycompany.com' |
|
|
|
# Your Voog site host |
|
@voog_host = 'mycompany.voog.com' |
|
# Your Voog API token host |
|
# Read more: http://www.voog.com/support/guides/developers/developer-account-basics#generate-api-token |
|
@voog_token = 'xxxxxxxxxxxx' |
|
# Image prefix for assets in Voog site |
|
@images_prefix = '/photos' |
|
# If you know your Voog site images prefix then use this instead |
|
# You can find it out when you upload some image to your Voog site and check its url. |
|
# @images_prefix = '//media.voog.com/0000/0000/0001/photos' |
|
|
|
# Migration config where "source_term_path" is value language of "path" column in "db_terms" table |
|
# for migratable articles |
|
# "target_blog_path" is your article path in your Voog site (should be exist). |
|
@migration_conf = [ |
|
{ |
|
target_blog_path: 'blog-test', |
|
source_term_path: 'en', |
|
} |
|
] |
|
|
|
# Database name for Wordpress |
|
@database_name = 'my_wp_database' |
|
|
|
# Setup connection to Wordpress database |
|
DB = Sequel.mysql2(@database_name, host: 'localhost', user: 'root', password: '') |
|
|
|
# Map for fixing assets reference in new site |
|
# Use "ruby wordpress_assets_downloader.rb show-used-folders" to generate this list. |
|
@assets_folder_map = [ |
|
"http://mycompany.com/wp-content/uploads/2016/01", |
|
"http://mycompany.com/wp-content/uploads/2015/12", |
|
"http://mycompany.com/wp-content/uploads/2015/11", |
|
"http://mycompany.com/wp-content/uploads/2015/10", |
|
"http://mycompany.com/wp-content/uploads/2015/09", |
|
"http://mycompany.com/wp-content/uploads/2015/08", |
|
"http://mycompany.com/wp-content/uploads/2015/06", |
|
"http://mycompany.com/wp-content/uploads/2015/05", |
|
"http://mycompany.com/wp-content/uploads/2015/04", |
|
"http://mycompany.com/wp-content/uploads/2015/03", |
|
"http://mycompany.com/wp-content/uploads/2015/02", |
|
"http://mycompany.com/wp-content/uploads/2015/01", |
|
"wp-content/uploads/2016/01", |
|
"wp-content/uploads/2015/12", |
|
"wp-content/uploads/2015/11", |
|
"wp-content/uploads/2015/10", |
|
"wp-content/uploads/2015/09", |
|
"wp-content/uploads/2015/08", |
|
"wp-content/uploads/2015/06", |
|
"wp-content/uploads/2015/05", |
|
"wp-content/uploads/2015/04", |
|
"wp-content/uploads/2015/03", |
|
"wp-content/uploads/2015/02", |
|
"wp-content/uploads/2015/01" |
|
] |
|
|
|
@cleanup_strings = [ |
|
/\<!DOCTYPE.*\>/i, |
|
/\<html.*\>/i, |
|
/\<\/html\>/i, |
|
/\<head.*\>.*?\<\/head\>/m, |
|
/\<body.*\>/i, |
|
/\<\/body\>/i, |
|
] |
|
|
|
# Cleanup text. Also removes tags given by @cleanup_strings. |
|
def cleanup_text(source_text) |
|
text = source_text.dup |
|
@cleanup_strings.each do |regex| |
|
text.gsub!(regex, '') |
|
end |
|
text.to_s.gsub(/\r/, "\n").gsub(/^[[:space:]]*$/, "\n").gsub(/\n{3,}/, "\n\n").strip |
|
end |
|
|
|
# Cleanup text and fix broken or partial HTML tags. |
|
def cleanup_text_with_nokogiri(source_text) |
|
parsed = Nokogiri::HTML::DocumentFragment.parse(source_text.dup) |
|
text = parsed.at('body') ? parsed.at('body').inner_html : parsed.to_s |
|
text.to_s.gsub(/\r/, "\n").gsub(/^[[:space:]]*$/, "\n").gsub(/\n{3,}/, "\n\n").strip |
|
end |
|
|
|
# Convert local links in "href" and image "src" attributes to be suitable for target site. |
|
def convert_links(source_text) |
|
text = source_text.dup |
|
@assets_folder_map.each do |str| |
|
text.gsub!("\"#{str}/", "\"#{@images_prefix}/") |
|
end |
|
text |
|
end |
|
|
|
# Return Voog API client |
|
def client |
|
@client ||= Voog::Client.new(@voog_host, @voog_token, protocol: :http, auto_paginate: true, raise_on_error: true) |
|
end |
|
|
|
# Database base query for Wordpress database |
|
# It returns database records form content table ("db_posts") and filters out only published ("post_status = publish"). |
|
def database_base_query |
|
@database_base_query ||= DB[:db_posts___p] |
|
.join(:db_term_relationships___trs, trs__object_id: :p__ID) |
|
.join(:db_term_taxonomy___tt, tt__term_taxonomy_id: :trs__term_taxonomy_id, tt__taxonomy: 'language') |
|
.join(:db_terms___t, t__term_id: :tt__term_id) |
|
.where(p__post_status: 'publish', p__post_type: 'post') |
|
end |
|
|
|
# Database base query for Wordpress tags (terms) |
|
def database_tags_base_query |
|
@database_tags_base_query ||= DB[:db_terms___t] |
|
.join(:db_term_taxonomy___tt, tt__term_id: :t__term_id, tt__taxonomy: 'category') |
|
.join(:db_term_relationships___trs, trs__term_taxonomy_id: :tt__term_taxonomy_id) |
|
end |
|
|
|
# Migrate all articles defined in @migration_conf form Joomla database to Voog site. |
|
def migrate_articles! |
|
puts "=== Migrating articles" |
|
|
|
@migration_conf.each do |h| |
|
batch_step = 50 |
|
|
|
puts "\n\n--- Processing articles for '#{h[:target_blog_path]}'" |
|
puts "Fetching information for target blog page..." |
|
blog = client.pages(path: h[:target_blog_path], content_type: 'blog').first |
|
|
|
if blog |
|
puts "Migrating articles to #{blog.public_url}" |
|
|
|
query = database_base_query.where(t__slug: h[:source_term_path]).order(:post_date) |
|
puts "Total: #{query.count}" |
|
|
|
query.all.each.with_index(1) do |item, index| |
|
puts "--> #{index}: (ID=#{item[:ID]}) #{item[:post_title]} (#{item[:post_name]})" |
|
sleep 5 if index % batch_step == 0 |
|
|
|
tags = database_tags_base_query.where(trs__object_id: item[:ID]).map(:name) |
|
|
|
article = client.create_article( |
|
page_id: blog.id, |
|
path: item[:post_name], |
|
autosaved_title: item[:post_title], |
|
autosaved_excerpt: convert_links(cleanup_text_with_nokogiri(item[:post_excerpt])), |
|
autosaved_body: convert_links(cleanup_text_with_nokogiri(item[:post_content])), |
|
tag_names: tags, |
|
publishing: true, |
|
created_at: item[:post_date].strftime('%d.%m.%Y') |
|
) |
|
if article |
|
puts "<-- #{article.public_url}" |
|
else |
|
puts "ERROR: Something went wrong" |
|
end |
|
end |
|
else |
|
puts "ERROR: Blog page was not found in target server" |
|
end |
|
end |
|
end |
|
|
|
# Run migrate_articles! |
|
migrate_articles! |