Skip to content

Instantly share code, notes, and snippets.

@cameronapak
Created March 28, 2020 00:07
Show Gist options
  • Save cameronapak/27db31f1a564a0cb167e3ddd6cbd1604 to your computer and use it in GitHub Desktop.
Save cameronapak/27db31f1a564a0cb167e3ddd6cbd1604 to your computer and use it in GitHub Desktop.
Bible.com - Strips all HTML that comes back from devotional content and adds spacing and escaping in its proper place.
/**
* Converts HTML from a Bible plan to String
* - REQUIRES: entities
*/
const entities = require('entities')
/* source: https://stackoverflow.com/a/1144788 */
function replaceAll(str, find, replace) {
return str.replace(new RegExp(find, 'g'), replace)
}
function replaceBlockTagsWithNewLines(content) {
// puts a new line at the end of all block content tags
return replaceAll(content, '</div>|</p>|</h[1-6]+>| *<br ?/?>+|</li>', '\n')
}
function replaceListTagsWithBullets(content) {
return replaceAll(content, '<li>', '• ')
}
function stripHTMLTags(content) {
return replaceAll(content, '<[^>]+>', '')
}
function cleanupSpacing(content) {
let updatedContent = content
/* replace new lines
* \r = CR (Carriage Return) → Used as a new line character in Mac OS before X
* \n = LF(Line Feed) → Used as a new line character in Unix / Mac OS X
* \r\n = CR + LF → Used as a new line character in Windows
* source: https://stackoverflow.com/a/15433225
*/
updatedContent = replaceAll(updatedContent, '\r\n|\r', '\n')
// replace multiple new lines with only two
updatedContent = replaceAll(updatedContent, '\n+', '\n\n')
// replace non-breaking spaces or multiple spaces with a regular space
updatedContent = replaceAll(updatedContent, '\u00a0| {2,}', ' ')
// trim content
updatedContent = updatedContent.trim()
return updatedContent
}
/**
* Strips all HTML that comes back from devotional content
* and adds spacing and escaping in its proper place.
*
* This is used to convert Bible devotional content to be
* ready for the Stories devotional module, so that the
* text can closely mimmic the preview on iOS.
*
* Code from Bryan Montz and converted to JS by Cam Pak.
*
* @param {string} html
*/
function htmlToText(html) {
let content = html
content = entities.decodeHTML(content)
content = replaceBlockTagsWithNewLines(content)
content = replaceListTagsWithBullets(content)
content = stripHTMLTags(content)
content = cleanupSpacing(content)
return content
}
export default htmlToText
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment