Created
September 24, 2018 20:14
-
-
Save Amaimersion/8512cf01db83178dcd32254efda25a6a to your computer and use it in GitHub Desktop.
Gets a text value of non-rendered (i.e. that have `display: none`) element (and it's childs). See more – https://stackoverflow.com/questions/52480730/replace-n-in-non-render-non-display-element-text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Returns a text value of the element (and it's childs). | |
* | |
* @param dcmnt {Document} | |
* The `document` where an element will be searched for. | |
* | |
* @param selector {string} | |
* A selector by which will be search. | |
* | |
* @param separator {string} | |
* A separator between the text of an different elements. | |
* Defaults to `" "` (one space). | |
* | |
* @returns {string} | |
* If the element was found, then it's text value, else an empty string. | |
* | |
* Warning! | |
* | |
* This method is pretty slow, because it parse HTML slice, | |
* not just gets a text value. It is necessary because of elements | |
* that was not rendered (i.e. that have `display: none`). | |
* `innerText` and `textContent` will return inappropriate result | |
* for this kind elements. | |
* For more see: | |
* | |
* @see https://stackoverflow.com/questions/52480730/replace-n-in-non-render-non-display-element-text | |
*/ | |
function getTextValue(dcmnt, selector, separator) { | |
separator = separator || " "; | |
const element = dcmnt.querySelector(selector); | |
if (!element) { | |
return ""; | |
} | |
/** | |
* @see https://stackoverflow.com/questions/7394748/whats-the-right-way-to-decode-a-string-that-has-special-html-entities-in-it#7394787 | |
*/ | |
const _decodeEntities = (html) => { | |
const textArea = document.createElement("textarea"); | |
textArea.innerHTML = html; | |
return textArea.value; | |
}; | |
let innerHTML = element.innerHTML; | |
// remove entities from HTML, but keep tags and other stuff. | |
innerHTML = _decodeEntities(innerHTML); | |
// replace HTML stuff with a space. | |
// @see https://stackoverflow.com/questions/6743912/get-the-pure-text-without-html-element-by-javascript#answer-6744068 | |
innerHTML = innerHTML.replace(/<[^>]*>/g, " "); | |
// replace multiple spaces with a single space. | |
innerHTML = innerHTML.replace(/\s+/g, " "); | |
// remove space from beginning and ending. | |
innerHTML = innerHTML.trim(); | |
// for now there only one space between words. | |
// so, we replace a space with the separator. | |
innerHTML = innerHTML.replace(/ /g, separator); | |
return innerHTML; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment