Created
May 3, 2020 21:36
-
-
Save grakic/b25d54f27223154023cfb3042ab66d41 to your computer and use it in GitHub Desktop.
CloudFlare Workers HTMLRewriter mock based on cheerio
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cheerio from 'cheerio' | |
/** | |
* TODO: This code is not optimized for production! | |
* Usage of cheerio here only simulates stream-oriented parser! It is slow! | |
*/ | |
function replace(content, options) { | |
// TODO: Handle {html:true/false} in options | |
this[0].nodeValue = content | |
} | |
function hasAttribute(name) { | |
return (this.getAttribute(name) !== undefined) | |
} | |
function getAttribute(name) { | |
return this.attr(name) | |
} | |
function setAttribute(name, value) { | |
this.attr(name, value) | |
} | |
export default class HTMLRewriter { | |
constructor() { | |
this.selectors = [] | |
} | |
on(element, handler) { | |
this.selectors.push([element, handler]) | |
return this | |
} | |
async transform(response) { | |
const text = await response.text() | |
const $ = cheerio.load(text) | |
// Simulate stream-based parser | |
this.walk($, $.root()) | |
return new Response($.root().html(), response) | |
} | |
walk($, node) { | |
const $node = this.wrapElement($, node) | |
// Select matching HTMLRewrite handlers | |
const matchedHandlers = [] | |
for(const [selector, handler] of this.selectors) { | |
if ($node.is(selector)) { | |
matchedHandlers.push(handler) | |
} | |
} | |
// Trigger HTMLRewrite handlers on Element | |
for(const handler of matchedHandlers) { | |
handler.element && handler.element($node) | |
} | |
// Walk all children | |
const lastNode = $node.contents().length - 1 | |
const children = $node.contents().toArray() | |
for (const [i, child] of children.entries()) { | |
if (child.nodeType === 1) { | |
this.walk($, child) | |
} else { | |
const lastInNode = (i === lastNode) || (children[i+1].nodeType !== child.nodeType) | |
const $child = this.wrapOther($, child, lastInNode) | |
// Trigger HTMLRewrite handlers on Text and Comment | |
for(let handler of matchedHandlers) { | |
if (child.nodeType === 3) { | |
handler.text && handler.text($child) | |
} else if (child.nodeType === 8) { | |
handler.comment && handler.comment($child) | |
} | |
} | |
} | |
} | |
} | |
wrapElement($, node) { | |
const $node = $(node) | |
$node.tagName = node.name | |
$node.hasAttribute = hasAttribute.bind($node) | |
$node.getAttribute = getAttribute.bind($node) | |
$node.setAttribute = setAttribute.bind($node) | |
return $node | |
} | |
wrapOther($, node, lastInNode) { | |
const $node = $(node) | |
$node.text = node.nodeValue | |
$node.replace = replace.bind($node) | |
if (node.nodeType === 3) { | |
$node.lastInTextNode = lastInNode | |
} | |
return $node | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://gist.github.com/grakic/b25d54f27223154023cfb3042ab66d41#file-htmlrewriter-js-L76
handler.comment && handler.comment($child)
should be:handler.comments && handler.comments($child)
to match CloudFlare's API (yes, it's odd that they madeelement
andtext
singular but did pluralcomments
).