Skip to content

Instantly share code, notes, and snippets.

@bpeterso2000
Last active June 16, 2025 22:13
Show Gist options
  • Save bpeterso2000/860976ca578fac77e8851633ba58b99e to your computer and use it in GitHub Desktop.
Save bpeterso2000/860976ca578fac77e8851633ba58b99e to your computer and use it in GitHub Desktop.
'''Concise generation of HTML using Python.'''
import html
import json
import re
import sys
from collections.abc import Iterable, Mapping
from typing import Callable, Generator, NewType, Protocol, Union
from functools import partial
try:
from bs4 import BeautifulSoup, Comment
BEAUTIFUL_SOUP_INSTALLED = True
except ImportError:
BEAUTIFUL_SOUP_INSTALLED = False
# Tags for all elements in January 7 ,2025 HTML living standard
# http://html.spec.whatwg.org/multipage/indices.html
HTML_TAGS = {
'A', 'Abbr', 'Address', 'Area', 'Article', 'Aside', 'Audio', 'B',
'Base', 'Bdi', 'Bdo', 'Blockquote', 'Body', 'Br', 'Button',
'Canvas', 'Caption', 'Cite', 'Code', 'Col', 'Colgroup', 'Data',
'Datalist', 'Dd', 'Del', 'Details', 'Dfn', 'Dialog', 'Div', 'Dl',
'Dt', 'Em','Embed', 'Fieldset', 'Figcaption', 'Figure', 'Footer',
'Form', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'Head', 'Header',
'Hgroup', 'Hr', 'Html', 'I', 'Iframe', 'Img', 'Input', 'Ins', 'Kbd',
'Label', 'Legend', 'Li', 'Link', 'Main', 'Map', 'Mark', 'Math',
'Menu', 'Meta', 'Meter', 'Mathml', 'Nav', 'Noscript', 'Object',
'Ol', 'Optgroup', 'Option', 'Output', 'P', 'Picture', 'Pre',
'Progress', 'Q', 'Rp', 'Rt', 'Ruby', 'S', 'Samp', 'Script',
'Search', 'Section', 'Select', 'Slot', 'Small', 'Source', 'Span',
'Strong', 'Style', 'Sub', 'Summary', 'Svg', 'Sup', 'Table', 'Tbody',
'Td', 'Template', 'Textarea', 'Tfoot', 'Th', 'Thead', 'Time',
'Title', 'Tr', 'Track', 'U', 'Ul', 'Var', 'Video', 'Wbr'
}
# Tags for void elements; no closing tags; parsed from ...
# http://developer.mozilla.org/en-US/docs/Glossary/voidelement
VOID_ELEMENTS = {
'Area', 'Base', 'Br','Col', 'Embed', 'Hr', 'Iframe', 'Img', 'Input',
'Link', 'Meta', 'Source', 'Template', 'Track', 'Wbr'
}
# No attribute name translation performed if the name contains
# any of the following HTML ASCII printable special characters
# excluding alphanumeric characters and underscores.
# http://ascii.ci/htmlcodes.htm
SPECIAL_CHARS = set(' !"#$%&\'()*+,-./:;<=>?@[\\]^{|}~`')
DASH, UNDERSCORE = '-_'
SINGLE_QUOTE, DOUBLE_QUOTE = '\'"'
class FastTag(Protocol):
'''Protocol for fast tags; can be called & safely rendered as HTML.'''
tag: str
def __ft__(self) -> str:
'''"Fast Tag callable.'''
...
class SafeHtml(Protocol):
'''Protocol for objects that can be safely rendered as HTML.'''
def __html__(self) -> str:
'''"Return the HTML representation of the object.'''
...
class Printable(Protocol):
'''Protocol for objects that can be coverted to a string.'''
def __str__(self) -> str:
'''"Convert the object to a string.'''
...
Primitive = str | int | float | bool | None
Json = Primitive | dict[str, 'Json'] | list['Json']
Element = (
FastTag | SafeHtml | Printable |
Mapping[Primitive, Json] |
Iterable['Element']
)
AttrValue = (
None | bool | Printable |
Iterable[Printable] |
Mapping[str, Printable]
)
class Safe(str):
def __html__(self):
return self
# --- Utility functions for Fast Tags ---
def to_list(obj: object) -> list:
'''Convert an object to a list.
>>> to_list(None)
[]
>>> to_list([1, 2])
[1, 2]
>>> to_list('ab')
['ab']
>>> to_list(i for i in 'ab')
['a', 'b']
>>> to_list(42)
[42]
'''
match obj:
case None:
return []
case list():
return obj
case str() | bytes():
return [obj]
case Iterable():
return list(obj)
case _:
return [obj]
def flatten(nested_collection: object) -> Generator:
'''Recursively flatten nested collection.
Flatten iterables excluding strings, mappings, and fast tags.
Examples:
>>> tuple(flatten((1, [2, 3], (4, [5, 6]), 7)))
(1, 2, 3, 4, 5, 6, 7)
>>> tuple(flatten([[1, 2], [3, [4, 5]], 6]))
(1, 2, 3, 4, 5, 6)
>>> tuple(flatten(()))
()
>>> tuple(flatten((1,)))
(1,)
'''
for item in nested_collection:
match nested_collection:
case FastTag() | str() | bytes() | Mapping():
yield item
case Iterable():
yield from flatten(item)
case _:
yield item
# -- Attribute mapping (translation) functions --
def keymap(key: str) -> str:
'''Transform attribute name (key) string to a new string.
- If the key contains any special characters, return it unchanged.
- If the key is 'cls', return 'class'.
- Otherwise:
- Strip leading underscores
- Replace underscores with dashes,
- Strip trailing dashes
- Convert to lowercase.
- If final key is empty after above, return a single underscore.
Examples:
>>> keymap('simple_key')
'simple-key'
>>> keymap('_leading_underscore')
'leading-underscore'
>>> keymap('trailing_underscore_')
'trailing-underscore'
>>> keymap('__both__ends__')
'both--ends'
>>> keymap('cls')
'class'
>>> keymap('key!with$special#chars')
'key!with$special#chars'
>>> keymap('___')
'_'
>>> keymap('')
'_'
'''
# If attribute name contains special chars leave it intact.
if SPECIAL_CHARS & set(key):
return key
if key == 'cls':
return 'class'
k = key.lstrip(UNDERSCORE).replace(UNDERSCORE, DASH).rstrip(DASH)
return k.lower() if k else UNDERSCORE
def attrmap(attrs: dict[str, object]) -> dict[str, object]:
'''Map attribute names to new names using `keymap`.'''
return {keymap(k): v for k, v in attrs.items()}
def add_quotes(val: str) -> str:
'''
Formats an HTML attribute value with appropriate quoting.
- If attr value contains double quotes, switches to single quotes.
- If value contains both single & double quotes, use single quotes.
and any single quotes in the value are HTML-escaped as "&#39;".
Examples:
>>> add_quotes("name", "value")
'name="value"'
>>> add_quotes("title", "This is a \"quoted\" string")
'title=\'This is a "quoted" string\'
'
>>> add_quotes("data", "Both 'single' and \"double\" quotes")
'data=\'Both &#39;single&#39; and "double" quotes\''
>>> add_quotes("empty", "")
'empty=""'
'''
quote_char = DOUBLE_QUOTE
if DOUBLE_QUOTE in val:
quote_char = SINGLE_QUOTE
if SINGLE_QUOTE in val:
val = val.replace(SINGLE_QUOTE, "&#39;")
return f'{quote_char}{val}{quote_char}'
def to_attr(key: str, value: AttrValue) -> str:
match value:
case False | None | '':
return ''
case True:
return key
case str():
result = value
case Mapping():
result = '; '.join(f'{k}:{v}' for k, v in value.items())
case Iterable():
result = ' '.join(map(str, value))
case Printable():
result = str(value)
case _:
raise TypeError(
f'Unsupported type {type(value).__name__} for {key!r} '
'attribute value.'
)
quote_char, return_val = add_quotes(result)
return f'{key}={quote_char}{return_val}{quote_char}'
def _to_xml(el: Element, escape: Callable | None = html.escape):
if not escape:
escape = lambda x: x
match el:
case FastTag():
# openning tag
if el.attrs:
yield f'<{el.tag} {to_attr(el.attrs)}>'
else:
yield f'<{el.tag}>'
# contents
yield el.__ft__()
# closing tag
if not el.void:
yield f'</{el.tag}>'
case SafeHtml():
yield el.__html__()
case str():
yield escape(el)
case bytes():
try:
yield escape(el.decode('utf-8'))
except UnicodeDecodeError:
raise
case Mapping():
yield json.dumps(el)
case Iterable():
yield '\n'.join(map(_to_xml, el))
case None:
pass
case Printable():
yield escape(str(el))
case _:
raise TypeError(
f'Unsupported type {type(el).__name__} '
'in HTML conversion.'
)
def to_xml(contents: Element, escape: Callable | None = html.escape):
'''Convert `ft` element tree into HTML.'''
return Safe(_to_xml('\n'.join(*contents), escape))
def highlight(contents, lang="html"):
"Markdown to syntax-highlight `s` in language `lang`"
return f"```{lang}\n{to_xml(contents)}\n```"
def showtags(s):
return f'<code><pre>\n{to_xml(s)}</code></pre>'
def tidy(html: str) -> str:
if BEAUTIFUL_SOUP_INSTALLED:
return BeautifulSoup(html, 'html.parser').prettify()
print(
'Tidy requires Beautiful Soup:\n'
'`pip install beautifulsoup4`',
file=sys.sterr
)
class FT:
"A 'Fast Tag' structure, containing `tag`,`children`,and `attrs`"
internal_attrs = {
'tag', 'children', 'attrs', 'void',
'list', 'get', 'set',
'__getitem__', '__setitem__', '__iter__', '__call__',
'__repr__', '__str__', '__html__', '__ft__',
}
def __init__(
self,
tag: str,
*contents: Element,
void: bool = False,
**attrs: dict[str, object]
):
self.tag = tag.lower()
self.void = void
self.children = flatten(contents)
self.attrs = attrmap(attrs)
def __setattr__(self, key, val):
if key in FT.internal_attrs:
return super().__setattr__(key, val)
self.attrs[keymap(key)] = val
def __getattr__(self, key, default=None):
return self.attrs.get(keymap(key), default)
def __html__(self):
return to_xml(self, indent=False)
__str__ = __ft__ = __html__
def __repr__(self):
return f"{self.tag}({self.children},{self.attrs})"
_repr_markdown_ = highlight
def __iter__(self):
return iter(self.children)
def __getitem__(self, idx: int):
return self.children[idx]
def __setitem__(self, idx: int, el: Element):
self.children = (
self.children[:idx]
+ flatten(el,)
+ self.children[idx + 1:]
)
def __call__(self, *children: Element, **attrs: dict[str: object]):
if children:
# set to existing children followed by new children.
self.children = self.children + flatten(children)
if attrs:
self.attrs = {**self.attrs, **attrmap(attrs)}
def set(
self,
*children: Element,
keep_attrs: set = frozenset('id', 'name'),
**attrs: dict[str, object]
):
"Set children and/or attributes (chainable)"
if children:
# Set to new children only; old children are gone.
self.children = flatten(children)
if attrs:
save_selected_attrs = {
FT.keymap(k): self.attrs[k]
for k in keep_attrs & set(self.attrs)
}
# - Get preserved attributes (i.e., name & id)
# - Preserved attributes can be overwritten
# - Add new attributes.
# - All other attributes are gone.
self.attrs = {**save_selected_attrs, **FT.attrmap(attrs)}
@property
def list(self):
return [self.tag, self.children, self.attrs]
@property
def html(self):
'''Return the HTML representation of the Fast Tag.'''
return self.__ft__()
@property
def tidy(self):
'''Return a prettified HTML representation of the Fast Tag.'''
return tidy(self.html)
@property
def highlight(self):
'''Render HTML output in Markdown for syntax highlighting.'''
highlight(self.tidy)
@property
def showtags(self):
'''Render HTML output inside preformatted code block.'''
showtags(self.tidy)
def ft(*args, **kwargs):
'''Function to create a Fast Tag.'''
return FT(*args, **kwargs)
# Create an Fast Tag partial as a global variable for each tag
for tag in HTML_TAGS:
globals()[tag] = partial(ft, tag, void=tag in VOID_ELEMENTS)
def Html(*contents, doctype=True, **attrs) -> FT:
"An HTML tag, optionally preceeded by `!DOCTYPE HTML`"
res = ft('html', *contents, **attrs)
if not doctype:
return res
return (ft("!DOCTYPE", html=True, void=True), res)
HeadDefault = Head(
Meta(charset='utf-8'),
Meta(
name='viewport',
content='width=device-width, initial-scale=1.0',
),
)
def html_wrapper(
fast_tag: FT,
hdrs: Iterable[Element],
ftrs: Iterable[Element]
) -> FT:
'''Wrap a Fast Tag in HTML, Head, and/or Body tags if not provided.'''
match fast_tag.tag:
case 'html':
return fast_tag
case 'head':
return Html(fast_tag)
case 'body':
return Html(HeadDefault(hdrs), fast_tag)
case _:
return Html(HeadDefault(hdrs), Body(fast_tag, ftrs))
'''
# %% ../nbs/09_xml.ipynb
def __getattr__(tag):
if tag.startswith("_") or tag[0].islower():
raise AttributeError
tag = keymap(tag)
def _f(*c, target_id=None, **kwargs):
return ft(tag, *c, target_id=target_id, **kwargs)
return _f
'''
# %% ../nbs/01_basics.ipynb
def _risinstance(types, obj):
if any(isinstance(t,str) for t in types):
return any(t.__name__ in types for t in type(obj).__mro__)
return isinstance(obj, types)
def tuplify(o, use_list=False, match=None):
"Make `o` a tuple"
return tuple(to_list(o))
def risinstance(types, obj=None):
"Curried `isinstance` but with args reversed"
types = tuplify(types)
if obj is None:
return partial(_risinstance,types)
return _risinstance(types, obj)
_re_h2x_attr_key = re.compile(r'^[A-Za-z_-][\w-]*$')
# %% ../nbs/api/01_components.ipynb
def html2ft(html: str) -> str:
'''Convert HTML to an `ft` expression'''
rev_keymap = {'class': 'cls', 'for': '_for'}
def _parse(el: str, lvl: int = 0, indent: int = 4):
match el:
case str():
return repr(el.strip()) if el.strip() else ''
case list():
return '\n'.join(_parse(e, lvl) for e in el)
tag_name = el.name.capitalize().replace(DASH, UNDERSCORE)
if tag_name=='[document]':
return _parse(list(el.children), lvl)
cts = el.contentss
cs = [
repr(c.strip()) if isinstance(c, str) else _parse(c, lvl + 1)
for c in cts if str(c).strip()
]
attrs, exotic_attrs = [], {}
for key, value in sorted(
el.attrs.items(), key=lambda x: x[0]=='class'
):
if isinstance(value,(tuple,list)):
value = " ".join(value)
key, value = rev_keymap.get(key, key), value or True
if _re_h2x_attr_key.match(key):
attrs.append(f'{key.replace(DASH, UNDERSCORE)}={value!r}')
else:
exotic_attrs[key] = value
if exotic_attrs:
attrs.append(f'**{exotic_attrs!r}')
spc = ' ' * lvl * indent
onlychild = not cts or (len(cts)==1 and isinstance(cts[0], str))
j = ', ' if onlychild else f',\n{spc}'
inner = j.join(filter(None, cs+attrs))
if onlychild:
return f'{tag_name}({inner})'
if not attrs:
return f'{tag_name}(\n{spc}{inner}\n{" "*(lvl-1)*indent})'
inner_cs = j.join(filter(None, cs))
inner_attrs = ', '.join(filter(None, attrs))
return (
f'{tag_name}({inner_attrs})(\n{spc}{inner_cs}\n'
f'{" " * (lvl -1 ) * indent})'
)
soup = BeautifulSoup(html.strip(), 'html.parser')
for c in soup.find_all(string=risinstance(Comment)):
c.extract()
return _parse(soup, 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment