Last active
September 7, 2016 08:04
-
-
Save Warsaalk/8e787cb1067eeb36467235d7a70d7e1d to your computer and use it in GitHub Desktop.
Parse HTML to DOM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function parseHTMLtoDOM (html) { | |
var regexTags = /(<[a-zA-Z1-9]+(?:\s+[a-zA-Z-]+(?:=(?:".*?"|'.*?'))?)*\s*\/?>|<\/[a-zA-Z1-9]+>)/gm, // Match tags (opening incl. attributes & closing) | |
regexAttr = /(?:\s+([a-zA-Z-]+)(?:=(?:"(.*?)"|'(.*?)'))?)/gm; // Match attributes | |
var singletons = ['area','base','br','col','command','embed','hr','img','input','keygen','link','meta','param','source','track','wbr']; // HTML 5 - 6/9/2016 | |
html = html.replace(/ /g, '\u00A0'); // TODO: add more encoding fixes | |
var parts = html.split(regexTags), fragment = document.createDocumentFragment(), lastElement = fragment; | |
for (var i=0, il=parts.length; i<il; i++) { | |
if (parts[i].length > 0) { | |
var tag = parts[i].match(/^<(\/)?([a-zA-Z1-9]+)/); | |
if (tag && tag.length > 0) { // Process tag | |
if (tag[1] === undefined) { // Opening tag | |
var element = document.createElement(tag[2]); // Create tag | |
while ((attributes = regexAttr.exec(parts[i])) !== null) { // Loop attributes | |
var attribute = document.createAttribute(attributes[1]); | |
if (attributes[2] !== undefined) { | |
attribute.value = attributes[2]; // Set attribute value | |
} | |
element.setAttributeNode(attribute); // Add attribute | |
} | |
lastElement.appendChild(element); // Append our element | |
if (singletons.indexOf(tag[2]) === -1) { //Singletons can't have child elements | |
lastElement = element; // Set last element as current element | |
} | |
} else { // Closing tag | |
lastElement = lastElement.parentNode; // Closing tag means we need to go 1 level up again | |
} | |
} else { | |
lastElement.appendChild(document.createTextNode(parts[i])); // Append content to the last element | |
} | |
} | |
} | |
return fragment; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment