Skip to content

Instantly share code, notes, and snippets.

@mbrock
Created November 29, 2024 23:44
Show Gist options
  • Save mbrock/682e71c4c29705a2f67a82b5a515cbbe to your computer and use it in GitHub Desktop.
Save mbrock/682e71c4c29705a2f67a82b5a515cbbe to your computer and use it in GitHub Desktop.
/**
* Copyright (c) 2024 Mikael Brockman <https://github.com/mbrock>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/**
* TypeWriter reveals text gradually using the CSS Highlight API,
* creating a smooth typing animation.
*
* The typing speed varies based on punctuation and position in the text,
* with natural pauses at punctuation marks and acceleration as it progresses.
*/
class TypeWriter extends HTMLElement {
constructor() {
super()
// Counter for how many characters to reveal
this.limit = 0
// Range object used to hide unrevealed text
this.blind = new Range()
// Observer that watches for content changes and triggers updates
this.scout = new MutationObserver(() => {
this.update()
if (!this.timer) this.proceed()
})
this.lastTranscriptTime = Date.now()
}
// Typing speed delays for different punctuation marks (in relative units)
static delays = {
" ": 3,
",": 8,
";": 8,
":": 9,
".": 10,
"—": 12,
"–": 7,
"!": 15,
"?": 15,
"\n": 20,
}
// Base typing speed configuration
static get speedConfig() {
return {
min: 30, // Minimum characters per second
max: 80, // Maximum characters per second
curve: 2, // Acceleration curve power
}
}
connectedCallback() {
// Set up CSS highlight API for revealing text gradually
const css = new CSSStyleSheet()
css.replaceSync(`::highlight(transparent) { color: transparent }`)
document.adoptedStyleSheets = [...document.adoptedStyleSheets, css]
// Initialize the blind range to cover all content
this.blind.selectNodeContents(this)
// Create or get the highlight for unrevealed text
const highlight = CSS.highlights.get("transparent") ?? new Highlight()
highlight.add(this.blind)
CSS.highlights.set("transparent", highlight)
// Start observing content changes
this.scout.observe(this, {
childList: true,
subtree: true,
characterData: true,
})
this.proceed()
}
disconnectedCallback() {
this.scout.disconnect()
CSS.highlights.get("transparent")?.delete(this.blind)
clearTimeout(this.timer)
}
update() {
// Walk through text nodes to find where to place the blind range
const walk = document.createTreeWalker(this, NodeFilter.SHOW_TEXT)
let node = null
let limit = this.limit
while ((node = walk.nextNode())) {
const length = node.data.slice(0, limit).length
limit -= length
if (limit <= 0) {
// Found the node where the reveal cutoff should be
this.blind.setStart(node, length)
break
}
}
if (limit > 0) {
// If we've revealed all text, reset blind to start
this.blind.setStart(this, 0)
}
// Always set blind to end after all content
this.blind.setEndAfter(this)
}
proceed() {
if (this.blind.toString().trim() === "") {
this.timer = undefined
this.dispatchEvent(new CustomEvent("typingComplete"))
return
}
this.limit = Math.min(this.limit + 1, this.innerText.length)
this.update()
const remainingText = this.blind.toString()
const totalLength = this.innerText.length
const speed = this.calculateSpeed(totalLength, remainingText)
this.timer = setTimeout(() => this.proceed(), 1000 / speed)
}
calculateSpeed(totalLength, remainingText) {
const { min, max, curve } = TypeWriter.speedConfig
const speedRange = max - min
const progress = 1 - remainingText.length / totalLength
const baseSpeed = min + speedRange * progress ** curve
const nextChar = remainingText[0]
return baseSpeed / (TypeWriter.delays[nextChar] ?? 1)
}
setSpeed(multiplier) {
const { min, max } = TypeWriter.speedConfig
TypeWriter.speedConfig.min = min * multiplier
TypeWriter.speedConfig.max = max * multiplier
}
}
customElements.define("type-writer", TypeWriter)
/**
* VoiceWriter is a custom element that combines speech recognition with typewriter effects.
* It captures audio input, streams it to a WebSocket server for real-time transcription,
* and displays the results with a typewriter effect. It handles both interim results
* (shown faded) and final transcriptions (shown solid), creating a natural voice-to-text
* experience with visual feedback.
*/
class VoiceWriter extends HTMLElement {
constructor() {
super()
this.handleTranscript = this.handleTranscript.bind(this)
this.processAudio = this.processAudio.bind(this)
this.reconnectAttempts = 0
this.maxReconnectAttempts = 5
this.reconnectDelay = 1000
// Track both instant rate and moving average
this.bytesSent = 0
this.uploadRate = 0
this.movingAverage = 0
this.alpha = 0.2 // Smoothing factor (0.2 gives more weight to recent values)
this.uploadRateInterval = null
this.isRecording = true
this.lastTranscriptTime = Date.now()
}
static get observedAttributes() {
return ["language", "server"]
}
get language() {
return this.getAttribute("language") ?? "en-US"
}
get server() {
return this.getAttribute("server") ?? "wss://swa.sh"
}
get wsUrl() {
return `${this.server}/transcribe?language=${this.language}`
}
connectedCallback() {
this.writer = document.createElement("type-writer")
this.writer.className = "block p-4"
// Create status bar
this.statusBar = document.createElement("div")
this.statusBar.className =
"flex items-center font-sans py-1 justify-between gap-2 px-1 text-sm bg-gray-100 dark:bg-gray-800/30 border-b border-gray-300 dark:border-gray-700"
// Create status section
this.statusSection = document.createElement("div")
this.statusSection.className = "flex items-center gap-2"
this.statusBar.appendChild(this.statusSection)
// Create recording toggle
const toggleContainer = document.createElement("div")
toggleContainer.className = "flex items-center gap-2"
const toggle = document.createElement("input")
toggle.type = "checkbox"
toggle.id = "recordingToggle"
toggle.checked = true
toggle.className =
"form-checkbox h-4 w-4 text-blue-600 transition duration-150 ease-in-out"
toggle.addEventListener("change", (e) => {
this.isRecording = e.target.checked
this.updateStatus(
this.isRecording ? "Listening..." : "Paused",
this.isRecording
? "text-emerald-600 dark:text-emerald-400"
: "text-yellow-600 dark:text-yellow-400"
)
})
const label = document.createElement("label")
label.htmlFor = "recordingToggle"
label.className = "text-sm text-gray-600 dark:text-gray-400"
label.textContent = ""
toggleContainer.appendChild(toggle)
toggleContainer.appendChild(label)
this.statusBar.prepend(toggleContainer)
// Add debug button to status bar if debug mode is enabled
if (this.hasAttribute("debug")) {
const debugButton = document.createElement("button")
debugButton.className =
"px-2 hover:bg-slate-200 dark:hover:bg-slate-800 transition-colors"
debugButton.textContent = "🔌"
debugButton.addEventListener("click", () => {
console.log("🎤 Debug: Simulating WebSocket disconnect")
if (this.ws?.readyState === WebSocket.OPEN) {
this.ws.close()
}
})
this.statusBar.appendChild(debugButton)
}
this.updateStatus("Initializing...")
this.appendChild(this.statusBar)
this.appendChild(this.writer)
navigator.mediaDevices
.getUserMedia({ audio: true })
.then((stream) => this.beginListening(stream))
.catch(console.error)
}
disconnectedCallback() {
clearInterval(this.uploadRateInterval)
this.encoder?.close()
this.processor?.disconnect()
this.source?.disconnect()
this.context?.close()
this.ws?.close()
}
setWebSocketState(state) {
this.setAttribute("data-ws-state", state)
const statusMessages = {
connecting: "Connecting...",
connected: "Listening...",
disconnected: "Reconnecting...",
}
const statusColors = {
connecting: "text-yellow-600 dark:text-yellow-400",
connected: "text-emerald-600 dark:text-emerald-400",
disconnected: "text-red-600 dark:text-red-400",
}
this.updateStatus(
statusMessages[state] || state,
statusColors[state] || "text-gray-600 dark:text-gray-400"
)
}
updateStatus(message, colorClass = "text-gray-600 dark:text-gray-400") {
// Clear existing content
this.statusSection.innerHTML = ""
// Add status message
const text = document.createElement("span")
text.className = colorClass
text.textContent = message
this.statusSection.appendChild(text)
}
async connectWebSocket() {
try {
console.log("🎤 Connecting to transcription service...")
this.setWebSocketState("connecting")
this.ws = new WebSocket(this.wsUrl)
this.ws.binaryType = "arraybuffer"
this.ws.addEventListener("message", this.handleTranscript)
this.ws.addEventListener("open", () => {
console.log("🎤 Connected to transcription service")
this.setWebSocketState("connected")
this.reconnectAttempts = 0
this.reconnectDelay = 1000
})
this.ws.addEventListener("close", () => {
console.log("🎤 Disconnected from transcription service")
this.setWebSocketState("disconnected")
this.attemptReconnect()
})
this.ws.addEventListener("error", (error) => {
console.error("🎤 WebSocket error:", error)
this.setWebSocketState("disconnected")
})
} catch (error) {
console.error("🎤 Failed to connect:", error)
this.setWebSocketState("disconnected")
}
}
attemptReconnect() {
if (this.reconnectAttempts >= this.maxReconnectAttempts) {
console.error("🎤 Max reconnection attempts reached, giving up")
return
}
const attempt = this.reconnectAttempts + 1
const delay = this.reconnectDelay / 1000
console.log(
`🎤 Attempting to reconnect (attempt ${attempt}/${this.maxReconnectAttempts}) in ${delay}s...`
)
setTimeout(() => {
this.reconnectAttempts++
this.connectWebSocket()
// Exponential backoff
this.reconnectDelay = Math.min(this.reconnectDelay * 2, 10000)
}, this.reconnectDelay)
}
async beginListening(stream) {
await this.connectWebSocket()
// Set up audio context and nodes
this.context = new AudioContext()
this.source = this.context.createMediaStreamSource(stream)
const channels = stream.getAudioTracks()[0].getSettings().channelCount ?? 1
this.processor = this.context.createScriptProcessor(16384, channels, 1)
// Set up encoder
this.encoder = new AudioEncoder({
output: (packet) => {
if (this.ws?.readyState === WebSocket.OPEN) {
const buffer = new ArrayBuffer(packet.byteLength)
packet.copyTo(buffer)
this.ws.send(buffer)
this.bytesSent += buffer.byteLength
}
},
error: console.error,
})
// Sample rate and update moving average every second
this.uploadRateInterval = setInterval(() => {
this.uploadRate = this.bytesSent * 2
this.movingAverage =
this.alpha * this.uploadRate + (1 - this.alpha) * this.movingAverage
this.bytesSent = 0 // reset counter
this.updateUploadRate()
}, 500)
// Set up encoder
await this.encoder.configure({
codec: "opus",
sampleRate: 48000,
numberOfChannels: 1,
opus: {
application: "lowdelay",
signal: "voice",
},
})
// Wire up audio pipeline
this.source.connect(this.processor)
this.processor.connect(this.context.destination)
this.processor.addEventListener("audioprocess", this.processAudio)
}
processAudio(event) {
if (this.ws?.readyState !== WebSocket.OPEN || !this.isRecording) return
const inputData = event.inputBuffer.getChannelData(0)
const buffer = new ArrayBuffer(inputData.length * 4)
const view = new DataView(buffer)
for (let i = 0; i < inputData.length; i++) {
view.setFloat32(i * 4, inputData[i], true)
}
this.encoder?.encode(
new AudioData({
data: buffer,
timestamp: event.playbackTime * 1000000,
format: "f32",
numberOfChannels: 1,
numberOfFrames: inputData.length,
sampleRate: 48000,
})
)
}
updateUploadRate() {
const instantRate = this.uploadRate
const avgRate = this.movingAverage
const formatRate = (rate) => {
if (rate > 1024) {
return `${(rate / 1024).toFixed(1)} KB/s`
} else {
return `${rate.toFixed(1)} B/s`
}
}
// Update or create upload rate element
if (!this.uploadRateElement) {
this.uploadRateElement = document.createElement("span")
this.uploadRateElement.className =
"font-mono text-blue-600 dark:text-blue-400 ml-auto mr-2"
const debugButton = this.statusBar.querySelector("button")
if (debugButton) {
this.statusBar.insertBefore(this.uploadRateElement, debugButton)
} else {
this.statusBar.appendChild(this.uploadRateElement)
}
}
// Show moving average rate
this.uploadRateElement.textContent = `${formatRate(avgRate)}`
}
handleTranscript(event) {
if (typeof event.data !== "string") return
try {
const result = JSON.parse(event.data)
if (
result.type !== "Results" ||
!result.channel?.alternatives?.[0]?.transcript
)
return
let text = result.channel.alternatives[0].transcript
if (!text) return
const currentTime = Date.now()
const timeSinceLastTranscript = currentTime - this.lastTranscriptTime
console.log(`🎤 Time since last transcript: ${timeSinceLastTranscript}ms`)
// Add line break if more than 5 seconds have passed
if (timeSinceLastTranscript > 5000 && this.writer.lastElementChild) {
const lineBreak = document.createElement("br")
this.writer.appendChild(lineBreak)
}
this.lastTranscriptTime = currentTime
let element = this.writer.lastElementChild
// Use <ins> for interim results that may change
if (!element || !element.matches("ins")) {
element = document.createElement("ins")
this.writer.appendChild(element)
}
element.textContent = text
if (result.is_final) {
// Convert interim <ins> to final <span> when transcription is confirmed
const span = document.createElement("span")
if (!text.match(/[.!?]$/)) text += "—"
span.textContent = text + " "
// Use view transitions API for smooth visual update
document.startViewTransition(() => {
element.replaceWith(span)
})
}
} catch (error) {
console.error("Error parsing transcript:", error)
}
}
}
const sheet = new CSSStyleSheet()
sheet.replaceSync(`
voice-writer ins {
text-decoration: none;
opacity: 0.6;
}
`)
document.adoptedStyleSheets = [...document.adoptedStyleSheets, sheet]
customElements.define("voice-writer", VoiceWriter)
// Usage:
// <voice-writer language="en-US" server="wss://swa.sh"></voice-writer>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment