Created
August 6, 2013 20:48
-
-
Save synhershko/6168484 to your computer and use it in GitHub Desktop.
Code to import WordPress dump / backup file to NSemble. Create a new Console app, add the following files, and add these as links: BlogPost.cs, Constants.cs, DynamicContent.cs, NSembleUserAuthentication.cs, PostComments.cs, User.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.Linq; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
using System.Web; | |
using NSemble.Core.Models; | |
namespace NSemble.Core.Extensions | |
{ | |
public static class DynamicContentHelpers | |
{ | |
public static string TitleToSlug(string title) | |
{ | |
// 2 - Strip diacritical marks using Michael Kaplan's function or equivalent | |
title = RemoveDiacritics(title); | |
// 3 - Lowercase the string for canonicalization | |
title = title.ToLowerInvariant(); | |
// 4 - Replace all the non-word characters with dashes | |
title = ReplaceNonWordWithDashes(title); | |
// 1 - Trim the string of leading/trailing whitespace | |
title = title.Trim(' ', '-'); | |
return title; | |
} | |
// http://blogs.msdn.com/michkap/archive/2007/05/14/2629747.aspx | |
/// <summary> | |
/// Strips the value from any non English character by replacing those with their English equivalent. | |
/// </summary> | |
/// <param name="value">The string to normalize.</param> | |
/// <returns>A string where all characters are part of the basic English ANSI encoding.</returns> | |
/// <seealso cref="http://stackoverflow.com/questions/249087/how-do-i-remove-diacritics-accents-from-a-string-in-net"/> | |
private static string RemoveDiacritics(string value) | |
{ | |
var stFormD = value.Normalize(NormalizationForm.FormD); | |
var sb = new StringBuilder(); | |
foreach (var t in stFormD) | |
{ | |
var uc = CharUnicodeInfo.GetUnicodeCategory(t); | |
if (uc != UnicodeCategory.NonSpacingMark) | |
{ | |
sb.Append(t); | |
} | |
} | |
return (sb.ToString().Normalize(NormalizationForm.FormC)); | |
} | |
private static string ReplaceNonWordWithDashes(string title) | |
{ | |
// Remove Apostrophe Tags | |
title = Regex.Replace(title, "[’'“”\"&]{1,}", "", RegexOptions.None); | |
// Replaces all non-alphanumeric character by a space | |
var builder = new StringBuilder(); | |
foreach (var t in title) | |
{ | |
builder.Append(char.IsLetterOrDigit(t) ? t : ' '); | |
} | |
title = builder.ToString(); | |
// Replace multiple spaces to a single dash | |
title = Regex.Replace(title, @"\s{1,}", "-", RegexOptions.None); | |
return title; | |
} | |
static readonly Regex CodeBlockFinder = new Regex(@"\[code lang=(.+?)\s*\](.*?)\[/code\]", RegexOptions.Compiled | RegexOptions.Singleline); | |
static readonly Regex FirstLineSpacesFinder = new Regex(@"^(\s|\t)+", RegexOptions.Compiled); | |
private static string GenerateCodeBlock(string lang, string code) | |
{ | |
code = HttpUtility.HtmlDecode(code); | |
return string.Format("{0}{1}{0}", Environment.NewLine, | |
ConvertMarkdownCodeStatment(code)//.Replace("<", "<"), // to support syntax highlighting on pre tags | |
, lang | |
); | |
} | |
private static string ConvertMarkdownCodeStatment(string code) | |
{ | |
var line = code.Split(new[] { Environment.NewLine }, StringSplitOptions.None); | |
var firstLineSpaces = GetFirstLineSpaces(line.FirstOrDefault()); | |
var firstLineSpacesLength = firstLineSpaces.Length; | |
var formattedLines = line.Select(l => string.Format(" {0}", l.Substring(l.Length < firstLineSpacesLength ? 0 : firstLineSpacesLength))); | |
return string.Join(Environment.NewLine, formattedLines); | |
} | |
private static string GetFirstLineSpaces(string firstLine) | |
{ | |
if (firstLine == null) | |
return string.Empty; | |
var match = FirstLineSpacesFinder.Match(firstLine); | |
if (match.Success) | |
{ | |
return firstLine.Substring(0, match.Length); | |
} | |
return string.Empty; | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Main(string[] args) | |
{ | |
var reader = new WordPressExportReader(); | |
var posts = reader.Read(@"C:\wordpress.2013-08-02.xml"); | |
var redirectsTable = new RedirectsTable(); | |
using (var store = new DocumentStore {Url = "http://localhost:8080", DefaultDatabase = "NSemble"}.Initialize()) | |
{ | |
using (var session = store.OpenSession()) | |
{ | |
var user = new User { UserName = "[email protected]", FirstName = "Foo", LastName = "Bar", Claims = new List<string> { "admin" } }; | |
NSembleUserAuthentication.SetUserPassword(user, "password"); | |
session.Store(user, "users/" + user.UserName); | |
foreach (var post in posts) | |
{ | |
if (post.IsPage) | |
{ | |
continue; | |
} | |
var blogPost = new BlogPost | |
{ | |
AllowComments = true, | |
CommentsCount = post.Comments.Count, | |
Content = post.Content, | |
ContentType = DynamicContentType.Markdown, | |
Title = post.Title, | |
Tags = post.Tags, | |
PublishedAt = post.PublishedAt, | |
LastEditedAt = null, | |
PrivateViewingKey = Guid.NewGuid().ToString(), | |
CurrentState = post.IsDraft ? BlogPost.State.Draft : BlogPost.State.Public, | |
AuthorId = "users/" + user.UserName, | |
}; | |
var comments = new PostComments(); | |
foreach (var c in post.Comments) | |
{ | |
comments.Comments.Add(new PostComments.Comment | |
{ | |
Approved = true, | |
Author = c.AuthorName, | |
Content = c.Content, | |
CreatedAt = c.PostedAt, | |
Email = c.AuthorEmail, | |
UserHostAddress = c.AuthorIP, | |
Website = c.AuthorUrl, | |
Replies = new List<PostComments.Comment>(), | |
}); | |
} | |
if (string.IsNullOrWhiteSpace(blogPost.Title) || string.IsNullOrWhiteSpace(blogPost.Content)) | |
{ | |
blogPost.CurrentState = BlogPost.State.Private; | |
} | |
session.Store(blogPost); | |
session.Store(comments, blogPost.Id + "/comments"); | |
if (blogPost.CurrentState == BlogPost.State.Public) | |
{ | |
redirectsTable.theTable.Add( | |
string.Format("/blog/{0}/{1}/{2}", post.PublishedAt.Year,post.PublishedAt.Month.ToString("D2"), post.Slug), | |
new RedirectsTable.RedirectCommand | |
{ | |
HttpStatusCode = HttpStatusCode.MovedPermanently, | |
NewRoute = string.Format("/blog/{0}/{1}/{3}-{2}", post.PublishedAt.Year, post.PublishedAt.Month.ToString("D2"), post.Slug, blogPost.Id.Substring(blogPost.Id.IndexOf('/') + 1)), | |
}); | |
} | |
} | |
redirectsTable.theTable.Add("/", new RedirectsTable.RedirectCommand {HttpStatusCode = HttpStatusCode.SeeOther, NewRoute = "/blog"}); | |
session.Store(redirectsTable, Constants.RedirectsTableDocumentId); | |
session.Store(new Dictionary<string, AreaConfigs> | |
{ | |
{"/blog", new AreaConfigs { AreaName = "MyBlog", ModuleName = "Blog" }}, | |
{"/", new AreaConfigs { AreaName = "MyContent", ModuleName = "ContentPages" }}, | |
{"/auth", new AreaConfigs { AreaName = "Auth", ModuleName = "Membership" }} | |
}, Constants.AreasDocumentName); | |
session.SaveChanges(); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Xml; | |
namespace WordPressExportReader | |
{ | |
public class WordPressExportReader | |
{ | |
public List<WordPressPost> Read(string exportPath) | |
{ | |
var doc = new XmlDocument(); | |
doc.Load(exportPath); | |
var manager = new XmlNamespaceManager(doc.NameTable); | |
manager.AddNamespace("wp", "http://wordpress.org/export/1.1/"); | |
manager.AddNamespace("dc", "http://purl.org/dc/elements/1.1/"); | |
manager.AddNamespace("wfw", "http://wellformedweb.org/CommentAPI/"); | |
manager.AddNamespace("content", "http://purl.org/rss/1.0/modules/content/"); | |
var nav = doc.CreateNavigator(); | |
// Compile a standard XPath expression | |
var expr = nav.Compile("rss/channel/item"); | |
var iterator = nav.Select(expr); | |
var results = new List<WordPressPost>(); | |
while (iterator.MoveNext()) | |
{ | |
var p = new WordPressPost(); | |
switch (iterator.Current.SelectSingleNode("wp:status", manager).Value) | |
{ | |
case "publish": | |
case "inherit": | |
break; | |
case "trash": | |
continue; | |
case "draft": | |
p.IsDraft = true; | |
break; | |
} | |
if ("page".Equals(iterator.Current.SelectSingleNode("wp:post_type", manager).Value)) | |
{ | |
p.IsPage = true; | |
} | |
p.Title = iterator.Current.SelectSingleNode("title").Value; | |
p.Content = iterator.Current.SelectSingleNode("content:encoded", manager).Value; | |
p.Slug = iterator.Current.SelectSingleNode("wp:post_name", manager).Value; | |
p.PublishedAt = DateTimeOffset.Parse(iterator.Current.SelectSingleNode("pubDate").Value); | |
p.OriginalUrl = iterator.Current.SelectSingleNode("link").Value; | |
var tagsIterator = iterator.Current.Select("category"); | |
p.Tags = new List<string>(); | |
while (tagsIterator.MoveNext()) | |
{ | |
p.Tags.Add(tagsIterator.Current.Value); | |
} | |
p.Comments = new List<WordPressPost.Comment>(); | |
var commentsIterator = iterator.Current.Select("wp:comment", manager); | |
while (commentsIterator.MoveNext()) | |
{ | |
var approved = "1".Equals(commentsIterator.Current.SelectSingleNode("wp:comment_approved", manager).Value); | |
if (!approved) continue; | |
var parent = int.Parse(commentsIterator.Current.SelectSingleNode("wp:comment_parent", manager).Value); | |
var comment = new WordPressPost.Comment | |
{ | |
AuthorName = commentsIterator.Current.SelectSingleNode("wp:comment_author", manager).Value, | |
AuthorEmail = commentsIterator.Current.SelectSingleNode("wp:comment_author_email", manager).Value, | |
AuthorIP = commentsIterator.Current.SelectSingleNode("wp:comment_author_IP", manager).Value, | |
PostedAt = DateTimeOffset.Parse(commentsIterator.Current.SelectSingleNode("wp:comment_date_gmt", manager).Value), | |
AuthorUrl = commentsIterator.Current.SelectSingleNode("wp:comment_author_url", manager).Value, | |
Content = commentsIterator.Current.SelectSingleNode("wp:comment_content", manager).Value, | |
}; | |
var commentType = commentsIterator.Current.SelectSingleNode("wp:comment_type", manager).Value; | |
if (!string.IsNullOrWhiteSpace(commentType)) comment.Type = commentType; | |
if (parent == 0) | |
{ | |
comment.Id = int.Parse(commentsIterator.Current.SelectSingleNode("wp:comment_id", manager).Value); | |
comment.Replies = new List<WordPressPost.Comment>(); | |
p.Comments.Add(comment); | |
} | |
else | |
{ | |
foreach (var c in p.Comments) | |
{ | |
if (c.Id == parent) | |
{ | |
c.Replies.Add(comment); | |
break; | |
} | |
} | |
} | |
} | |
results.Add(p); | |
} | |
return results; | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Text; | |
namespace WordPressExportReader | |
{ | |
public class WordPressPost | |
{ | |
public class Comment | |
{ | |
public int Id { get; set; } | |
public string AuthorName { get; set; } | |
public string AuthorEmail { get; set; } | |
public string AuthorUrl { get; set; } | |
public string AuthorIP { get; set; } | |
public string Content { get; set; } | |
public string Type { get; set; } | |
public DateTimeOffset PostedAt { get; set; } | |
public List<Comment> Replies { get; set; } | |
} | |
public string Title { get; set; } | |
public string Content { get; set; } | |
public string Slug { get; set; } | |
public string OriginalUrl { get; set; } | |
public List<string> Tags { get; set; } | |
public bool IsPage { get; set; } | |
public bool IsDraft { get; set; } | |
public DateTimeOffset PublishedAt { get; set; } | |
public List<Comment> Comments = new List<Comment>(); | |
public override string ToString() | |
{ | |
var sb = new StringBuilder(); | |
if (IsDraft) sb.Append("[!!Draft!!]"); | |
if (IsPage ) sb.Append("[Page]"); | |
sb.Append(' '); | |
sb.Append(Slug); | |
sb.Append(' '); | |
sb.Append(Title); | |
sb.Append(string.Format(" ({0} comments)", Comments.Count)); | |
return sb.ToString(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment