Instantly share code, notes, and snippets.
Last active
June 16, 2025 22:13
-
Star
0
(0)
You must be signed in to star a gist -
Fork
0
(0)
You must be signed in to fork a gist
-
Save bpeterso2000/860976ca578fac77e8851633ba58b99e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''Concise generation of HTML using Python.''' | |
import html | |
import json | |
import re | |
import sys | |
from collections.abc import Iterable, Mapping | |
from typing import Callable, Generator, NewType, Protocol, Union | |
from functools import partial | |
try: | |
from bs4 import BeautifulSoup, Comment | |
BEAUTIFUL_SOUP_INSTALLED = True | |
except ImportError: | |
BEAUTIFUL_SOUP_INSTALLED = False | |
# Tags for all elements in January 7 ,2025 HTML living standard | |
# http://html.spec.whatwg.org/multipage/indices.html | |
HTML_TAGS = { | |
'A', 'Abbr', 'Address', 'Area', 'Article', 'Aside', 'Audio', 'B', | |
'Base', 'Bdi', 'Bdo', 'Blockquote', 'Body', 'Br', 'Button', | |
'Canvas', 'Caption', 'Cite', 'Code', 'Col', 'Colgroup', 'Data', | |
'Datalist', 'Dd', 'Del', 'Details', 'Dfn', 'Dialog', 'Div', 'Dl', | |
'Dt', 'Em','Embed', 'Fieldset', 'Figcaption', 'Figure', 'Footer', | |
'Form', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'Head', 'Header', | |
'Hgroup', 'Hr', 'Html', 'I', 'Iframe', 'Img', 'Input', 'Ins', 'Kbd', | |
'Label', 'Legend', 'Li', 'Link', 'Main', 'Map', 'Mark', 'Math', | |
'Menu', 'Meta', 'Meter', 'Mathml', 'Nav', 'Noscript', 'Object', | |
'Ol', 'Optgroup', 'Option', 'Output', 'P', 'Picture', 'Pre', | |
'Progress', 'Q', 'Rp', 'Rt', 'Ruby', 'S', 'Samp', 'Script', | |
'Search', 'Section', 'Select', 'Slot', 'Small', 'Source', 'Span', | |
'Strong', 'Style', 'Sub', 'Summary', 'Svg', 'Sup', 'Table', 'Tbody', | |
'Td', 'Template', 'Textarea', 'Tfoot', 'Th', 'Thead', 'Time', | |
'Title', 'Tr', 'Track', 'U', 'Ul', 'Var', 'Video', 'Wbr' | |
} | |
# Tags for void elements; no closing tags; parsed from ... | |
# http://developer.mozilla.org/en-US/docs/Glossary/voidelement | |
VOID_ELEMENTS = { | |
'Area', 'Base', 'Br','Col', 'Embed', 'Hr', 'Iframe', 'Img', 'Input', | |
'Link', 'Meta', 'Source', 'Template', 'Track', 'Wbr' | |
} | |
# No attribute name translation performed if the name contains | |
# any of the following HTML ASCII printable special characters | |
# excluding alphanumeric characters and underscores. | |
# http://ascii.ci/htmlcodes.htm | |
SPECIAL_CHARS = set(' !"#$%&\'()*+,-./:;<=>?@[\\]^{|}~`') | |
DASH, UNDERSCORE = '-_' | |
SINGLE_QUOTE, DOUBLE_QUOTE = '\'"' | |
class FastTag(Protocol): | |
'''Protocol for fast tags; can be called & safely rendered as HTML.''' | |
tag: str | |
def __ft__(self) -> str: | |
'''"Fast Tag callable.''' | |
... | |
class SafeHtml(Protocol): | |
'''Protocol for objects that can be safely rendered as HTML.''' | |
def __html__(self) -> str: | |
'''"Return the HTML representation of the object.''' | |
... | |
class Printable(Protocol): | |
'''Protocol for objects that can be coverted to a string.''' | |
def __str__(self) -> str: | |
'''"Convert the object to a string.''' | |
... | |
Primitive = str | int | float | bool | None | |
Json = Primitive | dict[str, 'Json'] | list['Json'] | |
Element = ( | |
FastTag | SafeHtml | Printable | | |
Mapping[Primitive, Json] | | |
Iterable['Element'] | |
) | |
AttrValue = ( | |
None | bool | Printable | | |
Iterable[Printable] | | |
Mapping[str, Printable] | |
) | |
class Safe(str): | |
def __html__(self): | |
return self | |
# --- Utility functions for Fast Tags --- | |
def to_list(obj: object) -> list: | |
'''Convert an object to a list. | |
>>> to_list(None) | |
[] | |
>>> to_list([1, 2]) | |
[1, 2] | |
>>> to_list('ab') | |
['ab'] | |
>>> to_list(i for i in 'ab') | |
['a', 'b'] | |
>>> to_list(42) | |
[42] | |
''' | |
match obj: | |
case None: | |
return [] | |
case list(): | |
return obj | |
case str() | bytes(): | |
return [obj] | |
case Iterable(): | |
return list(obj) | |
case _: | |
return [obj] | |
def flatten(nested_collection: object) -> Generator: | |
'''Recursively flatten nested collection. | |
Flatten iterables excluding strings, mappings, and fast tags. | |
Examples: | |
>>> tuple(flatten((1, [2, 3], (4, [5, 6]), 7))) | |
(1, 2, 3, 4, 5, 6, 7) | |
>>> tuple(flatten([[1, 2], [3, [4, 5]], 6])) | |
(1, 2, 3, 4, 5, 6) | |
>>> tuple(flatten(())) | |
() | |
>>> tuple(flatten((1,))) | |
(1,) | |
''' | |
for item in nested_collection: | |
match nested_collection: | |
case FastTag() | str() | bytes() | Mapping(): | |
yield item | |
case Iterable(): | |
yield from flatten(item) | |
case _: | |
yield item | |
# -- Attribute mapping (translation) functions -- | |
def keymap(key: str) -> str: | |
'''Transform attribute name (key) string to a new string. | |
- If the key contains any special characters, return it unchanged. | |
- If the key is 'cls', return 'class'. | |
- Otherwise: | |
- Strip leading underscores | |
- Replace underscores with dashes, | |
- Strip trailing dashes | |
- Convert to lowercase. | |
- If final key is empty after above, return a single underscore. | |
Examples: | |
>>> keymap('simple_key') | |
'simple-key' | |
>>> keymap('_leading_underscore') | |
'leading-underscore' | |
>>> keymap('trailing_underscore_') | |
'trailing-underscore' | |
>>> keymap('__both__ends__') | |
'both--ends' | |
>>> keymap('cls') | |
'class' | |
>>> keymap('key!with$special#chars') | |
'key!with$special#chars' | |
>>> keymap('___') | |
'_' | |
>>> keymap('') | |
'_' | |
''' | |
# If attribute name contains special chars leave it intact. | |
if SPECIAL_CHARS & set(key): | |
return key | |
if key == 'cls': | |
return 'class' | |
k = key.lstrip(UNDERSCORE).replace(UNDERSCORE, DASH).rstrip(DASH) | |
return k.lower() if k else UNDERSCORE | |
def attrmap(attrs: dict[str, object]) -> dict[str, object]: | |
'''Map attribute names to new names using `keymap`.''' | |
return {keymap(k): v for k, v in attrs.items()} | |
def add_quotes(val: str) -> str: | |
''' | |
Formats an HTML attribute value with appropriate quoting. | |
- If attr value contains double quotes, switches to single quotes. | |
- If value contains both single & double quotes, use single quotes. | |
and any single quotes in the value are HTML-escaped as "'". | |
Examples: | |
>>> add_quotes("name", "value") | |
'name="value"' | |
>>> add_quotes("title", "This is a \"quoted\" string") | |
'title=\'This is a "quoted" string\' | |
' | |
>>> add_quotes("data", "Both 'single' and \"double\" quotes") | |
'data=\'Both 'single' and "double" quotes\'' | |
>>> add_quotes("empty", "") | |
'empty=""' | |
''' | |
quote_char = DOUBLE_QUOTE | |
if DOUBLE_QUOTE in val: | |
quote_char = SINGLE_QUOTE | |
if SINGLE_QUOTE in val: | |
val = val.replace(SINGLE_QUOTE, "'") | |
return f'{quote_char}{val}{quote_char}' | |
def to_attr(key: str, value: AttrValue) -> str: | |
match value: | |
case False | None | '': | |
return '' | |
case True: | |
return key | |
case str(): | |
result = value | |
case Mapping(): | |
result = '; '.join(f'{k}:{v}' for k, v in value.items()) | |
case Iterable(): | |
result = ' '.join(map(str, value)) | |
case Printable(): | |
result = str(value) | |
case _: | |
raise TypeError( | |
f'Unsupported type {type(value).__name__} for {key!r} ' | |
'attribute value.' | |
) | |
quote_char, return_val = add_quotes(result) | |
return f'{key}={quote_char}{return_val}{quote_char}' | |
def _to_xml(el: Element, escape: Callable | None = html.escape): | |
if not escape: | |
escape = lambda x: x | |
match el: | |
case FastTag(): | |
# openning tag | |
if el.attrs: | |
yield f'<{el.tag} {to_attr(el.attrs)}>' | |
else: | |
yield f'<{el.tag}>' | |
# contents | |
yield el.__ft__() | |
# closing tag | |
if not el.void: | |
yield f'</{el.tag}>' | |
case SafeHtml(): | |
yield el.__html__() | |
case str(): | |
yield escape(el) | |
case bytes(): | |
try: | |
yield escape(el.decode('utf-8')) | |
except UnicodeDecodeError: | |
raise | |
case Mapping(): | |
yield json.dumps(el) | |
case Iterable(): | |
yield '\n'.join(map(_to_xml, el)) | |
case None: | |
pass | |
case Printable(): | |
yield escape(str(el)) | |
case _: | |
raise TypeError( | |
f'Unsupported type {type(el).__name__} ' | |
'in HTML conversion.' | |
) | |
def to_xml(contents: Element, escape: Callable | None = html.escape): | |
'''Convert `ft` element tree into HTML.''' | |
return Safe(_to_xml('\n'.join(*contents), escape)) | |
def highlight(contents, lang="html"): | |
"Markdown to syntax-highlight `s` in language `lang`" | |
return f"```{lang}\n{to_xml(contents)}\n```" | |
def showtags(s): | |
return f'<code><pre>\n{to_xml(s)}</code></pre>' | |
def tidy(html: str) -> str: | |
if BEAUTIFUL_SOUP_INSTALLED: | |
return BeautifulSoup(html, 'html.parser').prettify() | |
print( | |
'Tidy requires Beautiful Soup:\n' | |
'`pip install beautifulsoup4`', | |
file=sys.sterr | |
) | |
class FT: | |
"A 'Fast Tag' structure, containing `tag`,`children`,and `attrs`" | |
internal_attrs = { | |
'tag', 'children', 'attrs', 'void', | |
'list', 'get', 'set', | |
'__getitem__', '__setitem__', '__iter__', '__call__', | |
'__repr__', '__str__', '__html__', '__ft__', | |
} | |
def __init__( | |
self, | |
tag: str, | |
*contents: Element, | |
void: bool = False, | |
**attrs: dict[str, object] | |
): | |
self.tag = tag.lower() | |
self.void = void | |
self.children = flatten(contents) | |
self.attrs = attrmap(attrs) | |
def __setattr__(self, key, val): | |
if key in FT.internal_attrs: | |
return super().__setattr__(key, val) | |
self.attrs[keymap(key)] = val | |
def __getattr__(self, key, default=None): | |
return self.attrs.get(keymap(key), default) | |
def __html__(self): | |
return to_xml(self, indent=False) | |
__str__ = __ft__ = __html__ | |
def __repr__(self): | |
return f"{self.tag}({self.children},{self.attrs})" | |
_repr_markdown_ = highlight | |
def __iter__(self): | |
return iter(self.children) | |
def __getitem__(self, idx: int): | |
return self.children[idx] | |
def __setitem__(self, idx: int, el: Element): | |
self.children = ( | |
self.children[:idx] | |
+ flatten(el,) | |
+ self.children[idx + 1:] | |
) | |
def __call__(self, *children: Element, **attrs: dict[str: object]): | |
if children: | |
# set to existing children followed by new children. | |
self.children = self.children + flatten(children) | |
if attrs: | |
self.attrs = {**self.attrs, **attrmap(attrs)} | |
def set( | |
self, | |
*children: Element, | |
keep_attrs: set = frozenset('id', 'name'), | |
**attrs: dict[str, object] | |
): | |
"Set children and/or attributes (chainable)" | |
if children: | |
# Set to new children only; old children are gone. | |
self.children = flatten(children) | |
if attrs: | |
save_selected_attrs = { | |
FT.keymap(k): self.attrs[k] | |
for k in keep_attrs & set(self.attrs) | |
} | |
# - Get preserved attributes (i.e., name & id) | |
# - Preserved attributes can be overwritten | |
# - Add new attributes. | |
# - All other attributes are gone. | |
self.attrs = {**save_selected_attrs, **FT.attrmap(attrs)} | |
@property | |
def list(self): | |
return [self.tag, self.children, self.attrs] | |
@property | |
def html(self): | |
'''Return the HTML representation of the Fast Tag.''' | |
return self.__ft__() | |
@property | |
def tidy(self): | |
'''Return a prettified HTML representation of the Fast Tag.''' | |
return tidy(self.html) | |
@property | |
def highlight(self): | |
'''Render HTML output in Markdown for syntax highlighting.''' | |
highlight(self.tidy) | |
@property | |
def showtags(self): | |
'''Render HTML output inside preformatted code block.''' | |
showtags(self.tidy) | |
def ft(*args, **kwargs): | |
'''Function to create a Fast Tag.''' | |
return FT(*args, **kwargs) | |
# Create an Fast Tag partial as a global variable for each tag | |
for tag in HTML_TAGS: | |
globals()[tag] = partial(ft, tag, void=tag in VOID_ELEMENTS) | |
def Html(*contents, doctype=True, **attrs) -> FT: | |
"An HTML tag, optionally preceeded by `!DOCTYPE HTML`" | |
res = ft('html', *contents, **attrs) | |
if not doctype: | |
return res | |
return (ft("!DOCTYPE", html=True, void=True), res) | |
HeadDefault = Head( | |
Meta(charset='utf-8'), | |
Meta( | |
name='viewport', | |
content='width=device-width, initial-scale=1.0', | |
), | |
) | |
def html_wrapper( | |
fast_tag: FT, | |
hdrs: Iterable[Element], | |
ftrs: Iterable[Element] | |
) -> FT: | |
'''Wrap a Fast Tag in HTML, Head, and/or Body tags if not provided.''' | |
match fast_tag.tag: | |
case 'html': | |
return fast_tag | |
case 'head': | |
return Html(fast_tag) | |
case 'body': | |
return Html(HeadDefault(hdrs), fast_tag) | |
case _: | |
return Html(HeadDefault(hdrs), Body(fast_tag, ftrs)) | |
''' | |
# %% ../nbs/09_xml.ipynb | |
def __getattr__(tag): | |
if tag.startswith("_") or tag[0].islower(): | |
raise AttributeError | |
tag = keymap(tag) | |
def _f(*c, target_id=None, **kwargs): | |
return ft(tag, *c, target_id=target_id, **kwargs) | |
return _f | |
''' | |
# %% ../nbs/01_basics.ipynb | |
def _risinstance(types, obj): | |
if any(isinstance(t,str) for t in types): | |
return any(t.__name__ in types for t in type(obj).__mro__) | |
return isinstance(obj, types) | |
def tuplify(o, use_list=False, match=None): | |
"Make `o` a tuple" | |
return tuple(to_list(o)) | |
def risinstance(types, obj=None): | |
"Curried `isinstance` but with args reversed" | |
types = tuplify(types) | |
if obj is None: | |
return partial(_risinstance,types) | |
return _risinstance(types, obj) | |
_re_h2x_attr_key = re.compile(r'^[A-Za-z_-][\w-]*$') | |
# %% ../nbs/api/01_components.ipynb | |
def html2ft(html: str) -> str: | |
'''Convert HTML to an `ft` expression''' | |
rev_keymap = {'class': 'cls', 'for': '_for'} | |
def _parse(el: str, lvl: int = 0, indent: int = 4): | |
match el: | |
case str(): | |
return repr(el.strip()) if el.strip() else '' | |
case list(): | |
return '\n'.join(_parse(e, lvl) for e in el) | |
tag_name = el.name.capitalize().replace(DASH, UNDERSCORE) | |
if tag_name=='[document]': | |
return _parse(list(el.children), lvl) | |
cts = el.contentss | |
cs = [ | |
repr(c.strip()) if isinstance(c, str) else _parse(c, lvl + 1) | |
for c in cts if str(c).strip() | |
] | |
attrs, exotic_attrs = [], {} | |
for key, value in sorted( | |
el.attrs.items(), key=lambda x: x[0]=='class' | |
): | |
if isinstance(value,(tuple,list)): | |
value = " ".join(value) | |
key, value = rev_keymap.get(key, key), value or True | |
if _re_h2x_attr_key.match(key): | |
attrs.append(f'{key.replace(DASH, UNDERSCORE)}={value!r}') | |
else: | |
exotic_attrs[key] = value | |
if exotic_attrs: | |
attrs.append(f'**{exotic_attrs!r}') | |
spc = ' ' * lvl * indent | |
onlychild = not cts or (len(cts)==1 and isinstance(cts[0], str)) | |
j = ', ' if onlychild else f',\n{spc}' | |
inner = j.join(filter(None, cs+attrs)) | |
if onlychild: | |
return f'{tag_name}({inner})' | |
if not attrs: | |
return f'{tag_name}(\n{spc}{inner}\n{" "*(lvl-1)*indent})' | |
inner_cs = j.join(filter(None, cs)) | |
inner_attrs = ', '.join(filter(None, attrs)) | |
return ( | |
f'{tag_name}({inner_attrs})(\n{spc}{inner_cs}\n' | |
f'{" " * (lvl -1 ) * indent})' | |
) | |
soup = BeautifulSoup(html.strip(), 'html.parser') | |
for c in soup.find_all(string=risinstance(Comment)): | |
c.extract() | |
return _parse(soup, 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment