Created
November 29, 2024 23:44
-
-
Save mbrock/682e71c4c29705a2f67a82b5a515cbbe to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Copyright (c) 2024 Mikael Brockman <https://github.com/mbrock> | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining a copy | |
* of this software and associated documentation files (the "Software"), to deal | |
* in the Software without restriction, including without limitation the rights | |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
* copies of the Software, and to permit persons to whom the Software is | |
* furnished to do so, subject to the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be included in all | |
* copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
* SOFTWARE. | |
*/ | |
/** | |
* TypeWriter reveals text gradually using the CSS Highlight API, | |
* creating a smooth typing animation. | |
* | |
* The typing speed varies based on punctuation and position in the text, | |
* with natural pauses at punctuation marks and acceleration as it progresses. | |
*/ | |
class TypeWriter extends HTMLElement { | |
constructor() { | |
super() | |
// Counter for how many characters to reveal | |
this.limit = 0 | |
// Range object used to hide unrevealed text | |
this.blind = new Range() | |
// Observer that watches for content changes and triggers updates | |
this.scout = new MutationObserver(() => { | |
this.update() | |
if (!this.timer) this.proceed() | |
}) | |
this.lastTranscriptTime = Date.now() | |
} | |
// Typing speed delays for different punctuation marks (in relative units) | |
static delays = { | |
" ": 3, | |
",": 8, | |
";": 8, | |
":": 9, | |
".": 10, | |
"—": 12, | |
"–": 7, | |
"!": 15, | |
"?": 15, | |
"\n": 20, | |
} | |
// Base typing speed configuration | |
static get speedConfig() { | |
return { | |
min: 30, // Minimum characters per second | |
max: 80, // Maximum characters per second | |
curve: 2, // Acceleration curve power | |
} | |
} | |
connectedCallback() { | |
// Set up CSS highlight API for revealing text gradually | |
const css = new CSSStyleSheet() | |
css.replaceSync(`::highlight(transparent) { color: transparent }`) | |
document.adoptedStyleSheets = [...document.adoptedStyleSheets, css] | |
// Initialize the blind range to cover all content | |
this.blind.selectNodeContents(this) | |
// Create or get the highlight for unrevealed text | |
const highlight = CSS.highlights.get("transparent") ?? new Highlight() | |
highlight.add(this.blind) | |
CSS.highlights.set("transparent", highlight) | |
// Start observing content changes | |
this.scout.observe(this, { | |
childList: true, | |
subtree: true, | |
characterData: true, | |
}) | |
this.proceed() | |
} | |
disconnectedCallback() { | |
this.scout.disconnect() | |
CSS.highlights.get("transparent")?.delete(this.blind) | |
clearTimeout(this.timer) | |
} | |
update() { | |
// Walk through text nodes to find where to place the blind range | |
const walk = document.createTreeWalker(this, NodeFilter.SHOW_TEXT) | |
let node = null | |
let limit = this.limit | |
while ((node = walk.nextNode())) { | |
const length = node.data.slice(0, limit).length | |
limit -= length | |
if (limit <= 0) { | |
// Found the node where the reveal cutoff should be | |
this.blind.setStart(node, length) | |
break | |
} | |
} | |
if (limit > 0) { | |
// If we've revealed all text, reset blind to start | |
this.blind.setStart(this, 0) | |
} | |
// Always set blind to end after all content | |
this.blind.setEndAfter(this) | |
} | |
proceed() { | |
if (this.blind.toString().trim() === "") { | |
this.timer = undefined | |
this.dispatchEvent(new CustomEvent("typingComplete")) | |
return | |
} | |
this.limit = Math.min(this.limit + 1, this.innerText.length) | |
this.update() | |
const remainingText = this.blind.toString() | |
const totalLength = this.innerText.length | |
const speed = this.calculateSpeed(totalLength, remainingText) | |
this.timer = setTimeout(() => this.proceed(), 1000 / speed) | |
} | |
calculateSpeed(totalLength, remainingText) { | |
const { min, max, curve } = TypeWriter.speedConfig | |
const speedRange = max - min | |
const progress = 1 - remainingText.length / totalLength | |
const baseSpeed = min + speedRange * progress ** curve | |
const nextChar = remainingText[0] | |
return baseSpeed / (TypeWriter.delays[nextChar] ?? 1) | |
} | |
setSpeed(multiplier) { | |
const { min, max } = TypeWriter.speedConfig | |
TypeWriter.speedConfig.min = min * multiplier | |
TypeWriter.speedConfig.max = max * multiplier | |
} | |
} | |
customElements.define("type-writer", TypeWriter) | |
/** | |
* VoiceWriter is a custom element that combines speech recognition with typewriter effects. | |
* It captures audio input, streams it to a WebSocket server for real-time transcription, | |
* and displays the results with a typewriter effect. It handles both interim results | |
* (shown faded) and final transcriptions (shown solid), creating a natural voice-to-text | |
* experience with visual feedback. | |
*/ | |
class VoiceWriter extends HTMLElement { | |
constructor() { | |
super() | |
this.handleTranscript = this.handleTranscript.bind(this) | |
this.processAudio = this.processAudio.bind(this) | |
this.reconnectAttempts = 0 | |
this.maxReconnectAttempts = 5 | |
this.reconnectDelay = 1000 | |
// Track both instant rate and moving average | |
this.bytesSent = 0 | |
this.uploadRate = 0 | |
this.movingAverage = 0 | |
this.alpha = 0.2 // Smoothing factor (0.2 gives more weight to recent values) | |
this.uploadRateInterval = null | |
this.isRecording = true | |
this.lastTranscriptTime = Date.now() | |
} | |
static get observedAttributes() { | |
return ["language", "server"] | |
} | |
get language() { | |
return this.getAttribute("language") ?? "en-US" | |
} | |
get server() { | |
return this.getAttribute("server") ?? "wss://swa.sh" | |
} | |
get wsUrl() { | |
return `${this.server}/transcribe?language=${this.language}` | |
} | |
connectedCallback() { | |
this.writer = document.createElement("type-writer") | |
this.writer.className = "block p-4" | |
// Create status bar | |
this.statusBar = document.createElement("div") | |
this.statusBar.className = | |
"flex items-center font-sans py-1 justify-between gap-2 px-1 text-sm bg-gray-100 dark:bg-gray-800/30 border-b border-gray-300 dark:border-gray-700" | |
// Create status section | |
this.statusSection = document.createElement("div") | |
this.statusSection.className = "flex items-center gap-2" | |
this.statusBar.appendChild(this.statusSection) | |
// Create recording toggle | |
const toggleContainer = document.createElement("div") | |
toggleContainer.className = "flex items-center gap-2" | |
const toggle = document.createElement("input") | |
toggle.type = "checkbox" | |
toggle.id = "recordingToggle" | |
toggle.checked = true | |
toggle.className = | |
"form-checkbox h-4 w-4 text-blue-600 transition duration-150 ease-in-out" | |
toggle.addEventListener("change", (e) => { | |
this.isRecording = e.target.checked | |
this.updateStatus( | |
this.isRecording ? "Listening..." : "Paused", | |
this.isRecording | |
? "text-emerald-600 dark:text-emerald-400" | |
: "text-yellow-600 dark:text-yellow-400" | |
) | |
}) | |
const label = document.createElement("label") | |
label.htmlFor = "recordingToggle" | |
label.className = "text-sm text-gray-600 dark:text-gray-400" | |
label.textContent = "" | |
toggleContainer.appendChild(toggle) | |
toggleContainer.appendChild(label) | |
this.statusBar.prepend(toggleContainer) | |
// Add debug button to status bar if debug mode is enabled | |
if (this.hasAttribute("debug")) { | |
const debugButton = document.createElement("button") | |
debugButton.className = | |
"px-2 hover:bg-slate-200 dark:hover:bg-slate-800 transition-colors" | |
debugButton.textContent = "🔌" | |
debugButton.addEventListener("click", () => { | |
console.log("🎤 Debug: Simulating WebSocket disconnect") | |
if (this.ws?.readyState === WebSocket.OPEN) { | |
this.ws.close() | |
} | |
}) | |
this.statusBar.appendChild(debugButton) | |
} | |
this.updateStatus("Initializing...") | |
this.appendChild(this.statusBar) | |
this.appendChild(this.writer) | |
navigator.mediaDevices | |
.getUserMedia({ audio: true }) | |
.then((stream) => this.beginListening(stream)) | |
.catch(console.error) | |
} | |
disconnectedCallback() { | |
clearInterval(this.uploadRateInterval) | |
this.encoder?.close() | |
this.processor?.disconnect() | |
this.source?.disconnect() | |
this.context?.close() | |
this.ws?.close() | |
} | |
setWebSocketState(state) { | |
this.setAttribute("data-ws-state", state) | |
const statusMessages = { | |
connecting: "Connecting...", | |
connected: "Listening...", | |
disconnected: "Reconnecting...", | |
} | |
const statusColors = { | |
connecting: "text-yellow-600 dark:text-yellow-400", | |
connected: "text-emerald-600 dark:text-emerald-400", | |
disconnected: "text-red-600 dark:text-red-400", | |
} | |
this.updateStatus( | |
statusMessages[state] || state, | |
statusColors[state] || "text-gray-600 dark:text-gray-400" | |
) | |
} | |
updateStatus(message, colorClass = "text-gray-600 dark:text-gray-400") { | |
// Clear existing content | |
this.statusSection.innerHTML = "" | |
// Add status message | |
const text = document.createElement("span") | |
text.className = colorClass | |
text.textContent = message | |
this.statusSection.appendChild(text) | |
} | |
async connectWebSocket() { | |
try { | |
console.log("🎤 Connecting to transcription service...") | |
this.setWebSocketState("connecting") | |
this.ws = new WebSocket(this.wsUrl) | |
this.ws.binaryType = "arraybuffer" | |
this.ws.addEventListener("message", this.handleTranscript) | |
this.ws.addEventListener("open", () => { | |
console.log("🎤 Connected to transcription service") | |
this.setWebSocketState("connected") | |
this.reconnectAttempts = 0 | |
this.reconnectDelay = 1000 | |
}) | |
this.ws.addEventListener("close", () => { | |
console.log("🎤 Disconnected from transcription service") | |
this.setWebSocketState("disconnected") | |
this.attemptReconnect() | |
}) | |
this.ws.addEventListener("error", (error) => { | |
console.error("🎤 WebSocket error:", error) | |
this.setWebSocketState("disconnected") | |
}) | |
} catch (error) { | |
console.error("🎤 Failed to connect:", error) | |
this.setWebSocketState("disconnected") | |
} | |
} | |
attemptReconnect() { | |
if (this.reconnectAttempts >= this.maxReconnectAttempts) { | |
console.error("🎤 Max reconnection attempts reached, giving up") | |
return | |
} | |
const attempt = this.reconnectAttempts + 1 | |
const delay = this.reconnectDelay / 1000 | |
console.log( | |
`🎤 Attempting to reconnect (attempt ${attempt}/${this.maxReconnectAttempts}) in ${delay}s...` | |
) | |
setTimeout(() => { | |
this.reconnectAttempts++ | |
this.connectWebSocket() | |
// Exponential backoff | |
this.reconnectDelay = Math.min(this.reconnectDelay * 2, 10000) | |
}, this.reconnectDelay) | |
} | |
async beginListening(stream) { | |
await this.connectWebSocket() | |
// Set up audio context and nodes | |
this.context = new AudioContext() | |
this.source = this.context.createMediaStreamSource(stream) | |
const channels = stream.getAudioTracks()[0].getSettings().channelCount ?? 1 | |
this.processor = this.context.createScriptProcessor(16384, channels, 1) | |
// Set up encoder | |
this.encoder = new AudioEncoder({ | |
output: (packet) => { | |
if (this.ws?.readyState === WebSocket.OPEN) { | |
const buffer = new ArrayBuffer(packet.byteLength) | |
packet.copyTo(buffer) | |
this.ws.send(buffer) | |
this.bytesSent += buffer.byteLength | |
} | |
}, | |
error: console.error, | |
}) | |
// Sample rate and update moving average every second | |
this.uploadRateInterval = setInterval(() => { | |
this.uploadRate = this.bytesSent * 2 | |
this.movingAverage = | |
this.alpha * this.uploadRate + (1 - this.alpha) * this.movingAverage | |
this.bytesSent = 0 // reset counter | |
this.updateUploadRate() | |
}, 500) | |
// Set up encoder | |
await this.encoder.configure({ | |
codec: "opus", | |
sampleRate: 48000, | |
numberOfChannels: 1, | |
opus: { | |
application: "lowdelay", | |
signal: "voice", | |
}, | |
}) | |
// Wire up audio pipeline | |
this.source.connect(this.processor) | |
this.processor.connect(this.context.destination) | |
this.processor.addEventListener("audioprocess", this.processAudio) | |
} | |
processAudio(event) { | |
if (this.ws?.readyState !== WebSocket.OPEN || !this.isRecording) return | |
const inputData = event.inputBuffer.getChannelData(0) | |
const buffer = new ArrayBuffer(inputData.length * 4) | |
const view = new DataView(buffer) | |
for (let i = 0; i < inputData.length; i++) { | |
view.setFloat32(i * 4, inputData[i], true) | |
} | |
this.encoder?.encode( | |
new AudioData({ | |
data: buffer, | |
timestamp: event.playbackTime * 1000000, | |
format: "f32", | |
numberOfChannels: 1, | |
numberOfFrames: inputData.length, | |
sampleRate: 48000, | |
}) | |
) | |
} | |
updateUploadRate() { | |
const instantRate = this.uploadRate | |
const avgRate = this.movingAverage | |
const formatRate = (rate) => { | |
if (rate > 1024) { | |
return `${(rate / 1024).toFixed(1)} KB/s` | |
} else { | |
return `${rate.toFixed(1)} B/s` | |
} | |
} | |
// Update or create upload rate element | |
if (!this.uploadRateElement) { | |
this.uploadRateElement = document.createElement("span") | |
this.uploadRateElement.className = | |
"font-mono text-blue-600 dark:text-blue-400 ml-auto mr-2" | |
const debugButton = this.statusBar.querySelector("button") | |
if (debugButton) { | |
this.statusBar.insertBefore(this.uploadRateElement, debugButton) | |
} else { | |
this.statusBar.appendChild(this.uploadRateElement) | |
} | |
} | |
// Show moving average rate | |
this.uploadRateElement.textContent = `${formatRate(avgRate)}` | |
} | |
handleTranscript(event) { | |
if (typeof event.data !== "string") return | |
try { | |
const result = JSON.parse(event.data) | |
if ( | |
result.type !== "Results" || | |
!result.channel?.alternatives?.[0]?.transcript | |
) | |
return | |
let text = result.channel.alternatives[0].transcript | |
if (!text) return | |
const currentTime = Date.now() | |
const timeSinceLastTranscript = currentTime - this.lastTranscriptTime | |
console.log(`🎤 Time since last transcript: ${timeSinceLastTranscript}ms`) | |
// Add line break if more than 5 seconds have passed | |
if (timeSinceLastTranscript > 5000 && this.writer.lastElementChild) { | |
const lineBreak = document.createElement("br") | |
this.writer.appendChild(lineBreak) | |
} | |
this.lastTranscriptTime = currentTime | |
let element = this.writer.lastElementChild | |
// Use <ins> for interim results that may change | |
if (!element || !element.matches("ins")) { | |
element = document.createElement("ins") | |
this.writer.appendChild(element) | |
} | |
element.textContent = text | |
if (result.is_final) { | |
// Convert interim <ins> to final <span> when transcription is confirmed | |
const span = document.createElement("span") | |
if (!text.match(/[.!?]$/)) text += "—" | |
span.textContent = text + " " | |
// Use view transitions API for smooth visual update | |
document.startViewTransition(() => { | |
element.replaceWith(span) | |
}) | |
} | |
} catch (error) { | |
console.error("Error parsing transcript:", error) | |
} | |
} | |
} | |
const sheet = new CSSStyleSheet() | |
sheet.replaceSync(` | |
voice-writer ins { | |
text-decoration: none; | |
opacity: 0.6; | |
} | |
`) | |
document.adoptedStyleSheets = [...document.adoptedStyleSheets, sheet] | |
customElements.define("voice-writer", VoiceWriter) | |
// Usage: | |
// <voice-writer language="en-US" server="wss://swa.sh"></voice-writer> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment