Last active
April 10, 2024 17:01
-
-
Save herrstrietzel/aefd72c17919522dc39ebbee481986ea to your computer and use it in GitHub Desktop.
SO: HTML text to SVG
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<title>Convert HTML text to SVG</title> | |
<link href="https://fonts.googleapis.com/css2?family=Noto+Serif:ital,wdth,wght@0,62.5..100,100..900;1,62.5..100,100..900&display=swap" rel="stylesheet" /> | |
<link rel="stylesheet" href="./style.css"> | |
</head> | |
<body> | |
<h1>Convert HTML text to SVG</h1> | |
<p><label for="">Text width:</label><input id="inpWidth" value="50" min="10" max="100" type="range" step="1"></p> | |
<h3>html text</h3> | |
<div lang="de" class="foreignBody resize" xmlns="http://www.w3.org/1999/xhtml" style="margin:0em; padding:1em"> | |
<p style="text-align:center" class="author">Franz Kafka</p> | |
<h1 style="text-align:center">The Metamorphosis</h1> | |
<div class="columns"> | |
<p>One morning, when <strong>Gregor Samsa</strong> woke from <em>troubled</em> dreams, he found | |
himself | |
transformed in | |
his bed into <em style="color:red; letter-spacing:0.1em">a horrible</em> | |
vermin.<sup>1</sup> | |
</p> | |
<p>He lay on his armour-like back, and if he lifted his head a little he could see his brown belly, | |
slightly | |
domed and divided by arches <strong><a href="https://stackoverflow.com/" style="color:green">link: into | |
stiff | |
sections.</a> </strong> The bed­ding was hardly | |
able to | |
cover it and seemed ready to slide off any moment.</p> | |
</div> | |
</div> | |
<h3>Output (svg)</h3> | |
<div id="svgWrp"></div> | |
<h3>SVG markup</h3> | |
<textarea id="output"></textarea> | |
<script src="script.js"></script> | |
<script> | |
// wait for fonts | |
document.fonts.ready.then(function () { | |
const htmlEl = document.querySelector(".foreignBody"); | |
const lineDataEl = document.getElementById("lineDataText"); | |
let svg; | |
let textNodes = getTextNodesInEL(htmlEl); | |
// init | |
htmlEl.style.width = inpWidth.value + '%'; | |
updateSVG(htmlEl, textNodes) | |
// resize by range slider | |
inpWidth.addEventListener("input", (e) => { | |
htmlEl.style.width = e.currentTarget.value + '%'; | |
updateSVG(htmlEl, textNodes) | |
}); | |
// resize | |
htmlEl.addEventListener("mouseup", (e) => { | |
updateSVG(htmlEl, textNodes) | |
}); | |
function updateSVG(htmlEl, textNodes) { | |
svg = html2SvgText(htmlEl, textNodes); | |
svgWrp.innerHTML = ""; | |
svgWrp.append(svg); | |
let svgMarkup = new XMLSerializer().serializeToString(svg). | |
replaceAll('"', '\''). | |
replaceAll('</', '\n</'). | |
replaceAll('><', '>\n<') | |
output.value = svgMarkup; | |
console.log(new XMLSerializer().serializeToString(svg)); | |
} | |
}); | |
</script> | |
</body> | |
</html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Convert HTML text elements | |
* to svg text | |
*/ | |
function html2SvgText(htmlEl, textNodes = null) { | |
// get all text nodes in element: either passed by argument or retrieved from element | |
textNodes = textNodes === null ? getTextNodesInEL(htmlEl) : textNodes; | |
const removeWhiteSpace = (textNode) => { | |
// remove new lines, tabs and leading/trailing space | |
textNode.nodeValue = | |
textNode.nodeValue | |
.replace(/[\n\r\t]/g, " ") | |
//.replace(/[\t]/g, " ") | |
.replace(/\ {2,}/g, " ") | |
.trim() + " "; | |
return textNode; | |
}; | |
// parent boundaries to get global x/y offsets for svg elements | |
let bb = htmlEl.getBoundingClientRect(); | |
// here we'll store all our lines | |
let textNodeObj = { | |
xOffset: bb.x, | |
yOffset: bb.y, | |
width: bb.width, | |
height: bb.height, | |
textNodeData: [], | |
// define style props for rendering | |
styleProps: { | |
fontFamily: [], | |
fontSize: [16], | |
fontWeight: ["400", "normal"], | |
fontStyle: ["normal"], | |
fontStretch: ["100%"], | |
color: ["rgb(0, 0, 0)"], | |
letterSpacing: ["normal"], | |
textDecoration: ["none", "none solid rgb(0, 0, 0)"], | |
textTransform: ["none"] | |
} | |
}; | |
/** | |
* get boundaries of text nodes | |
*/ | |
textNodes.forEach((textNode, i) => { | |
removeWhiteSpace(textNode); | |
let parent = textNode.parentElement; | |
// set parent element id to identify element shifts | |
parent.dataset.id = parent.nodeName.toLowerCase() + "_" + i; | |
getTextNodeBboxes(textNode, textNodeObj); | |
}); | |
/** | |
* translate values to svg baseline offsets | |
*/ | |
let { xOffset, yOffset } = textNodeObj; | |
//count lines | |
let lineNum = 1; | |
textNodeObj.textNodeData.forEach((line, i) => { | |
// approximated descender height: height from bbox top to text baseline | |
let baseLineShift = line.style.fontSize * 0.25; | |
line.x = line.x - xOffset; | |
line.y = line.y - yOffset + line.height - baseLineShift; | |
let linePrev = | |
i > 0 ? textNodeObj.textNodeData[i - 1] : textNodeObj.textNodeData[i]; | |
if (line.y > linePrev.y) { | |
lineNum++; | |
} | |
// add line num | |
line.lineNum = lineNum; | |
}); | |
// render svg | |
let svgEl = renderSVGText(textNodeObj); | |
console.log(textNodeObj); | |
return svgEl; | |
} | |
function getTextNodeBboxes(node, textNodeObj) { | |
let lastYTop, lastLeft; | |
let parentElement = node.parentElement; | |
let parentId = parentElement.dataset.id; | |
let parentType = parentId.split("_")[0]; | |
// weird fix for Firefox - dunno why | |
parentElement.style.display = "inline-block"; | |
parentElement.getBoundingClientRect(); | |
parentElement.style.removeProperty("display"); | |
let words = node.nodeValue.split(" ").filter(Boolean); | |
// get style from parent element | |
let style = window.getComputedStyle(node.parentElement); | |
let { styleProps } = textNodeObj; | |
let textNodeDatatyle = {}; | |
for (propName in styleProps) { | |
let propValue = | |
propName === "fontSize" ? parseFloat(style["fontSize"]) : style[propName]; | |
textNodeDatatyle[propName] = propValue; | |
} | |
// initial position - get line height | |
let range = document.createRange(); | |
range.setStart(node, 0); | |
range.setEnd(node, 1); | |
// bbox from first character: single line height | |
let bb_start = range.getBoundingClientRect(); | |
let word = node.textContent; | |
// has line break? check end of node bbox | |
range.setStart(node, 0); | |
range.setEnd(node, node.length - 1); | |
let bb_end = range.getBoundingClientRect(); | |
// base line height | |
let lineHeight = bb_start.height; | |
let isMultiline = bb_end.height > bb_start.height; | |
// ignore empty strings e.g new lines | |
let isNewLine = /[\n|\r]/g.test(word); | |
//let newLineChar = isNewLine ? '\n' :'' | |
let empty = word.trim() === ""; | |
if (empty && !isNewLine) { | |
return false; | |
} | |
// single line – no hyphenations | |
if (!isMultiline) { | |
textNodeObj.textNodeData.push({ | |
text: word, | |
x: bb_start.left, | |
y: bb_start.top, | |
height: bb_end.height, | |
style: textNodeDatatyle, | |
hyphenated: false, | |
parentId: parentId, | |
href: parentType === "a" ? parentElement.getAttribute("href") : "" | |
}); | |
} | |
// multine: refine search on word layer | |
else { | |
// loop words | |
let start = 0, | |
end = 1; | |
for (let i = 0; i < words.length; i++) { | |
word = words[i]; | |
end = start + word.length; | |
// get range bbox | |
range.setStart(node, start); | |
range.setEnd(node, end); | |
let rangeBB = range.getBoundingClientRect(); | |
// has linebreak? split textNodeData | |
let hasLinebreak = rangeBB.height > lineHeight; | |
// no line breaks = no hyphenation => concatenate | |
if (!hasLinebreak) { | |
let textNodeData = textNodeObj.textNodeData; | |
let prev = textNodeData.length | |
? textNodeData[textNodeData.length - 1] | |
: textNodeData[0]; | |
// no line break – concatenate text | |
if (i > 0 && rangeBB.top === prev.y) { | |
textNodeObj.textNodeData[textNodeData.length - 1].text += word + " "; | |
} | |
// add new item | |
else { | |
textNodeObj.textNodeData.push({ | |
text: word + " ", | |
x: rangeBB.x, | |
y: rangeBB.top, | |
height: rangeBB.height, | |
style: textNodeDatatyle, | |
parentId: parentId, | |
hyphenated: false, | |
href: parentType === "a" ? parentElement.getAttribute("href") : "" | |
}); | |
} | |
} | |
// has line breaks: my contain hyphenations | |
else { | |
let startChar = end - word.length + 1; | |
let endChar = startChar + 1; | |
lastYTop = rangeBB.top; | |
lastLeft = rangeBB.left; | |
let splitIndices = [0]; | |
// loop characters | |
let chars = word.split("").filter(Boolean); | |
let hyphenated = true; | |
let has_hyphenChar = /[-|–]/g.test(word); // hyphen or endash | |
for (let c = 0; c < chars.length - 1; c++) { | |
endChar = startChar; | |
range.setStart(node, startChar); | |
range.setEnd(node, endChar); | |
rangeBB = range.getBoundingClientRect(); | |
// check empty trailing characters | |
let char = chars[c]; | |
let is_empty = char.trim() !== char; | |
// is hyphenated | |
if ((rangeBB.top > lastYTop || rangeBB.top < lastYTop) && !is_empty) { | |
let prevIndex = splitIndices[splitIndices.length - 1]; | |
let sub = word.substr(prevIndex, c - prevIndex); | |
// sub word bbox | |
range.setStart(node, start); | |
range.setEnd(node, start + sub.length); | |
let rangeBB2 = range.getBoundingClientRect(); | |
// webkit fix | |
if (rangeBB2.left === lastLeft || has_hyphenChar) { | |
//hyphenated = false; | |
} | |
// column change | |
if (rangeBB.top < lastYTop) { | |
} | |
textNodeObj.textNodeData.push({ | |
// remove soft hyphens | |
text: sub.replace(/\u00AD/g, ""), | |
x: rangeBB2.left, | |
y: rangeBB2.top, | |
height: rangeBB2.height, | |
style: textNodeDatatyle, | |
parentId: parentId, | |
hyphenated: has_hyphenChar ? false : hyphenated, | |
href: parentType === "a" ? parentElement.getAttribute("href") : "" | |
}); | |
splitIndices.push(c); | |
lastYTop = rangeBB2.top; | |
} | |
lastLeft = lastLeft; | |
lastYTop = rangeBB.top; | |
startChar += 1; | |
} | |
//last | |
textNodeObj.textNodeData.push({ | |
// append space | |
text: | |
word.substr(splitIndices[splitIndices.length - 1], word.length) + | |
" ", | |
x: lastLeft, | |
y: lastYTop, | |
height: rangeBB.height, | |
style: textNodeDatatyle, | |
parentId: parentId, | |
hyphenated: false, | |
href: parentType === "a" ? parentElement.getAttribute("href") : "" | |
}); | |
} | |
//update char pos | |
start = end + 1; | |
} | |
} | |
} | |
/** | |
* render svg text and textNodeData | |
*/ | |
function renderSVGText(textNodeObj, decimals = 1) { | |
//needed to adjust coordinates | |
let { width, height, styleProps, textNodeData } = textNodeObj; | |
[width, height] = [width, height].map((val) => { | |
return Math.ceil(val); | |
}); | |
let item0 = textNodeData[0]; | |
let lastTspanY = item0.y; | |
let lastStyle = item0.style; | |
// create svg elements | |
const ns = "http://www.w3.org/2000/svg"; | |
const svg = document.createElementNS(ns, "svg"); | |
svg.setAttribute("viewBox", [0, 0, width, height].join(" ")); | |
svg.setAttribute("width", width); | |
svg.setAttribute("height", height); | |
// wrap in group | |
const gText = document.createElementNS(ns, "g"); | |
gText.classList.add("gText"); | |
svg.append(gText); | |
// create svg text element to emulate HTML paragraph | |
let svgText = document.createElementNS(ns, "text"); | |
svgText.textContent = ""; | |
svgText.setAttribute( | |
"style", | |
`font-family:${item0.style.fontFamily}; font-size:${item0.style.fontSize}px; font-weight:${item0.style.fontWeight};` | |
); | |
svgText.setAttribute("x", item0.x); | |
svgText.setAttribute("y", item0.y); | |
gText.append(svgText); | |
let tspan = document.createElementNS(ns, "tspan"); | |
svgText.append(tspan); | |
let baseStyle = { | |
fontFamily: item0.style.fontFamily, | |
fontStyle: "normal", | |
fontWeight: 400, | |
fontSize: item0.style.fontSize | |
}; | |
let baseStyleStr = Object.values(baseStyle).join(""); | |
textNodeData.forEach((item, i) => { | |
let prev = i > 0 ? textNodeData[i - 1] : textNodeData[i]; | |
let next = | |
i < textNodeData.length - 2 | |
? textNodeData[i + 1] | |
: textNodeData[textNodeData.length - 1]; | |
let styleStr = Object.values(item.style).join(""); | |
let styleStrPrev = Object.values(lastStyle).join(""); | |
let tspanNew = document.createElementNS(ns, "tspan"); | |
let colBreak = prev.y > item.y; | |
// we need to adjust y values to match the baseline | |
fontSize = item.style.fontSize; | |
svgBaselineY = item.y; | |
//not same line create new tspan | |
let sameStyle = styleStr === styleStrPrev; | |
let sameY = svgBaselineY === lastTspanY; | |
// add links | |
if (item.parent === "a") { | |
// add link and new text el | |
let link = document.createElementNS(ns, "a"); | |
link.setAttribute("href", item.href); | |
svgText = document.createElementNS(ns, "text"); | |
tspanNew = document.createElementNS(ns, "tspan"); | |
tspanNew.textContent = item.text; | |
svgText.setAttribute( | |
"style", | |
`font-family: ${baseStyle.fontFamily}; font-size: ${baseStyle.fontSize}px; font-weight: ${baseStyle.fontWeight};` | |
); | |
//let dy = +(item.y - prev.y).toFixed(1) | |
tspanNew.setAttribute("x", +item.x.toFixed(decimals)); | |
tspanNew.setAttribute("y", item.y); | |
tspanNew.classList.add("tspan-a"); | |
// append link | |
gText.append(link); | |
svgText.append(tspanNew); | |
link.append(svgText); | |
if (next.parent !== "a") { | |
// next text el after link | |
svgText = document.createElementNS(ns, "text"); | |
svgText.classList.add("p-a"); | |
svgText.setAttribute( | |
"style", | |
`font-family: ${baseStyle.fontFamily}; font-size: ${baseStyle.fontSize}px; font-weight: ${baseStyle.fontWeight};` | |
); | |
svgText.setAttribute("x", item.x); | |
svgText.setAttribute("y", item.y); | |
gText.append(svgText); | |
tspan = tspanNew; | |
} | |
tspan = tspanNew; | |
} else if ((i > 0 && !sameY && !item.hypenated) || !sameStyle) { | |
tspanNew.textContent = item.text; | |
let dy = +(item.y - prev.y).toFixed(decimals); | |
// omit x/dy values if on same line and not after column shift | |
if (prev.lineNum !== item.lineNum || prev.parent === "a" || colBreak) { | |
tspanNew.setAttribute("x", +item.x.toFixed(decimals)); | |
} | |
if (dy) { | |
tspanNew.setAttribute("dy", dy); | |
} | |
svgText.append(tspanNew); | |
tspan = tspanNew; | |
} | |
// same line/style – append content to previous tspan | |
else { | |
tspan.textContent += item.text; | |
} | |
// append hyphen tspan | |
let tspanHyphen; | |
if (item.hyphenated) { | |
tspanHyphen = document.createElementNS(ns, "tspan"); | |
tspanHyphen.classList.add("tspanHyphen"); | |
tspanHyphen.setAttribute("aria-hidden", "true"); | |
tspanHyphen.style.userSelect = "none"; | |
tspanHyphen.textContent = "-"; | |
svgText.append(tspanHyphen); | |
} | |
// apply styles if different from base style or previous | |
if (baseStyleStr !== styleStr || styleStrPrev !== styleStr) { | |
for (propName in styleProps) { | |
let propValue = item.style[propName]; | |
propValue = propName === "fontSize" ? parseFloat(propValue) : propValue; | |
let propDefaults = styleProps[propName]; | |
let unit = propName === "fontSize" ? "px" : ""; | |
propName = propName === "color" ? "fill" : propName; | |
//set styles - ignore defaults | |
if ( | |
propDefaults.length && | |
!propDefaults.includes(propValue) && | |
propValue.toString().indexOf("none") === -1 && | |
propValue !== baseStyle[propName] | |
) { | |
tspan.style[propName] = propValue + unit; | |
if (item.hyphenated) { | |
tspanHyphen.style[propName] = propValue + unit; | |
} | |
} | |
} | |
} | |
// update y | |
lastTspanY = item.y; | |
lastStyle = item.style; | |
}); | |
return svg; | |
} | |
// text helpers | |
function getTextNodesInEL(el) { | |
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, null); | |
const nodes = []; | |
while (walker.nextNode()) { | |
nodes.push(walker.currentNode); | |
} | |
return nodes; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
body{ | |
font-family: 'Fira Sans', 'Open Sans','Segoe UI', sans-serif | |
} | |
svg { | |
overflow: visible; | |
border: 1px solid #ccc; | |
} | |
.resize { | |
resize: both; | |
border: 1px solid #ccc; | |
overflow: auto; | |
} | |
.foreignBody { | |
font-family: "Noto Serif", serif; | |
font-weight: 400; | |
font-style: normal; | |
font-size: 1em; | |
line-height: 1.7em; | |
} | |
h1 { | |
font-size: 2em; | |
line-height: 1.2em; | |
margin: 0 0 1rem 0; | |
font-stretch: 50%; | |
text-transform: uppercase; | |
} | |
.author { | |
line-height: 1.2em; | |
font-style: italic; | |
margin-bottom: 0em; | |
} | |
p { | |
margin: 0 0 1rem 0; | |
hyphens: auto; | |
-webkit-hyphens: auto; | |
} | |
.columns { | |
column-count: 2; | |
column-gap: 1em; | |
} | |
textarea { | |
display: block; | |
width: 100%; | |
min-height: 15em; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment