Skip to content

Instantly share code, notes, and snippets.

@qknight
Created February 19, 2025 12:37
Show Gist options
  • Save qknight/968e11fe2b1c7531493a3665ddcd7aad to your computer and use it in GitHub Desktop.
Save qknight/968e11fe2b1c7531493a3665ddcd7aad to your computer and use it in GitHub Desktop.
# main.rs
#[macro_use]
extern crate html5ever;
extern crate markup5ever_rcdom as rcdom;
use html5ever::parse_document;
use html5ever::tendril::TendrilSink;
use rcdom::{Handle, NodeData, RcDom};
use std::io::Cursor;
// This is not proper HTML serialization, of course.
fn walk(indent: usize, handle: &Handle) {
let node = handle;
for _ in 0..indent {
print!(" ");
}
let close_tag = match node.data {
NodeData::Document => {println!(""); None },
NodeData::Doctype {
ref name,
ref public_id,
ref system_id,
} => {
let mut doctype = format!("<!DOCTYPE {}", name);
if !public_id.is_empty() {
doctype += &format!(" \"{}\"", public_id);
}
if !system_id.is_empty() {
doctype += &format!(" \"{}\"", system_id);
}
doctype += ">";
println!("{}", doctype);
None
},
NodeData::Text { ref contents } => {
println!("{}", contents.borrow().escape_default());
None
},
NodeData::Comment { ref contents } => {
println!("<!-- {} -->", contents.escape_default());
None
},
NodeData::Element {
ref name,
ref attrs,
..
} => {
assert!(name.ns == ns!(html));
print!("<{}", name.local);
for attr in attrs.borrow().iter() {
assert!(attr.name.ns == ns!());
print!(" {}=\"{}\"", attr.name.local, attr.value);
}
println!(">");
Some(format!("</{}>", name.local))
},
NodeData::ProcessingInstruction { .. } => {
unreachable!();
None
},
};
for child in node.children.borrow().iter() {
walk(indent + 2, child);
}
match close_tag {
Some(tag) => {
println!("{}{}", " ".repeat(indent), tag);
},
None => {}
}
}
fn main() {
let html = "<!DOCTYPE html><!-- asdf --><html><head><title>Test</title></head><body><h1>Hello, World!</h1></body></html>";
let dom = parse_document(RcDom::default(), Default::default())
.from_utf8()
.read_from(&mut Cursor::new(html.as_bytes()))
.unwrap();
walk(0, &dom.document);
if !dom.errors.borrow().is_empty() {
println!("\nParse errors:");
for err in dom.errors.borrow().iter() {
println!(" {err}");
}
}
}
# Cargo.toml
[package]
name = "z"
version = "0.1.0"
edition = "2021"
[dependencies]
chrono = "0.4.39"
html5ever = { version = "0.29.0", features = ["trace_tokenizer"] }
lazy_static = "1.5.0"
markup5ever = "0.14.0"
markup5ever_rcdom = "0.5.0-unofficial"
prettyish-html = "0.1.1"
regex = "1.11.1"
time-util = { version = "0.3.4", features = ["chrono"] }
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.41s
Running `target/debug/z`
# output
<!DOCTYPE html>
<!-- asdf -->
<html>
<head>
<title>
Test
</title>
</head>
<body>
<h1>
Hello, World!
</h1>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment