Created
March 28, 2020 00:07
-
-
Save cameronapak/27db31f1a564a0cb167e3ddd6cbd1604 to your computer and use it in GitHub Desktop.
Bible.com - Strips all HTML that comes back from devotional content and adds spacing and escaping in its proper place.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Converts HTML from a Bible plan to String | |
* - REQUIRES: entities | |
*/ | |
const entities = require('entities') | |
/* source: https://stackoverflow.com/a/1144788 */ | |
function replaceAll(str, find, replace) { | |
return str.replace(new RegExp(find, 'g'), replace) | |
} | |
function replaceBlockTagsWithNewLines(content) { | |
// puts a new line at the end of all block content tags | |
return replaceAll(content, '</div>|</p>|</h[1-6]+>| *<br ?/?>+|</li>', '\n') | |
} | |
function replaceListTagsWithBullets(content) { | |
return replaceAll(content, '<li>', '• ') | |
} | |
function stripHTMLTags(content) { | |
return replaceAll(content, '<[^>]+>', '') | |
} | |
function cleanupSpacing(content) { | |
let updatedContent = content | |
/* replace new lines | |
* \r = CR (Carriage Return) → Used as a new line character in Mac OS before X | |
* \n = LF(Line Feed) → Used as a new line character in Unix / Mac OS X | |
* \r\n = CR + LF → Used as a new line character in Windows | |
* source: https://stackoverflow.com/a/15433225 | |
*/ | |
updatedContent = replaceAll(updatedContent, '\r\n|\r', '\n') | |
// replace multiple new lines with only two | |
updatedContent = replaceAll(updatedContent, '\n+', '\n\n') | |
// replace non-breaking spaces or multiple spaces with a regular space | |
updatedContent = replaceAll(updatedContent, '\u00a0| {2,}', ' ') | |
// trim content | |
updatedContent = updatedContent.trim() | |
return updatedContent | |
} | |
/** | |
* Strips all HTML that comes back from devotional content | |
* and adds spacing and escaping in its proper place. | |
* | |
* This is used to convert Bible devotional content to be | |
* ready for the Stories devotional module, so that the | |
* text can closely mimmic the preview on iOS. | |
* | |
* Code from Bryan Montz and converted to JS by Cam Pak. | |
* | |
* @param {string} html | |
*/ | |
function htmlToText(html) { | |
let content = html | |
content = entities.decodeHTML(content) | |
content = replaceBlockTagsWithNewLines(content) | |
content = replaceListTagsWithBullets(content) | |
content = stripHTMLTags(content) | |
content = cleanupSpacing(content) | |
return content | |
} | |
export default htmlToText |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment