Created
November 29, 2024 23:44
Revisions
-
mbrock created this gist
Nov 29, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,528 @@ /** * Copyright (c) 2024 Mikael Brockman <https://github.com/mbrock> * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /** * TypeWriter reveals text gradually using the CSS Highlight API, * creating a smooth typing animation. * * The typing speed varies based on punctuation and position in the text, * with natural pauses at punctuation marks and acceleration as it progresses. */ class TypeWriter extends HTMLElement { constructor() { super() // Counter for how many characters to reveal this.limit = 0 // Range object used to hide unrevealed text this.blind = new Range() // Observer that watches for content changes and triggers updates this.scout = new MutationObserver(() => { this.update() if (!this.timer) this.proceed() }) this.lastTranscriptTime = Date.now() } // Typing speed delays for different punctuation marks (in relative units) static delays = { " ": 3, ",": 8, ";": 8, ":": 9, ".": 10, "—": 12, "–": 7, "!": 15, "?": 15, "\n": 20, } // Base typing speed configuration static get speedConfig() { return { min: 30, // Minimum characters per second max: 80, // Maximum characters per second curve: 2, // Acceleration curve power } } connectedCallback() { // Set up CSS highlight API for revealing text gradually const css = new CSSStyleSheet() css.replaceSync(`::highlight(transparent) { color: transparent }`) document.adoptedStyleSheets = [...document.adoptedStyleSheets, css] // Initialize the blind range to cover all content this.blind.selectNodeContents(this) // Create or get the highlight for unrevealed text const highlight = CSS.highlights.get("transparent") ?? new Highlight() highlight.add(this.blind) CSS.highlights.set("transparent", highlight) // Start observing content changes this.scout.observe(this, { childList: true, subtree: true, characterData: true, }) this.proceed() } disconnectedCallback() { this.scout.disconnect() CSS.highlights.get("transparent")?.delete(this.blind) clearTimeout(this.timer) } update() { // Walk through text nodes to find where to place the blind range const walk = document.createTreeWalker(this, NodeFilter.SHOW_TEXT) let node = null let limit = this.limit while ((node = walk.nextNode())) { const length = node.data.slice(0, limit).length limit -= length if (limit <= 0) { // Found the node where the reveal cutoff should be this.blind.setStart(node, length) break } } if (limit > 0) { // If we've revealed all text, reset blind to start this.blind.setStart(this, 0) } // Always set blind to end after all content this.blind.setEndAfter(this) } proceed() { if (this.blind.toString().trim() === "") { this.timer = undefined this.dispatchEvent(new CustomEvent("typingComplete")) return } this.limit = Math.min(this.limit + 1, this.innerText.length) this.update() const remainingText = this.blind.toString() const totalLength = this.innerText.length const speed = this.calculateSpeed(totalLength, remainingText) this.timer = setTimeout(() => this.proceed(), 1000 / speed) } calculateSpeed(totalLength, remainingText) { const { min, max, curve } = TypeWriter.speedConfig const speedRange = max - min const progress = 1 - remainingText.length / totalLength const baseSpeed = min + speedRange * progress ** curve const nextChar = remainingText[0] return baseSpeed / (TypeWriter.delays[nextChar] ?? 1) } setSpeed(multiplier) { const { min, max } = TypeWriter.speedConfig TypeWriter.speedConfig.min = min * multiplier TypeWriter.speedConfig.max = max * multiplier } } customElements.define("type-writer", TypeWriter) /** * VoiceWriter is a custom element that combines speech recognition with typewriter effects. * It captures audio input, streams it to a WebSocket server for real-time transcription, * and displays the results with a typewriter effect. It handles both interim results * (shown faded) and final transcriptions (shown solid), creating a natural voice-to-text * experience with visual feedback. */ class VoiceWriter extends HTMLElement { constructor() { super() this.handleTranscript = this.handleTranscript.bind(this) this.processAudio = this.processAudio.bind(this) this.reconnectAttempts = 0 this.maxReconnectAttempts = 5 this.reconnectDelay = 1000 // Track both instant rate and moving average this.bytesSent = 0 this.uploadRate = 0 this.movingAverage = 0 this.alpha = 0.2 // Smoothing factor (0.2 gives more weight to recent values) this.uploadRateInterval = null this.isRecording = true this.lastTranscriptTime = Date.now() } static get observedAttributes() { return ["language", "server"] } get language() { return this.getAttribute("language") ?? "en-US" } get server() { return this.getAttribute("server") ?? "wss://swa.sh" } get wsUrl() { return `${this.server}/transcribe?language=${this.language}` } connectedCallback() { this.writer = document.createElement("type-writer") this.writer.className = "block p-4" // Create status bar this.statusBar = document.createElement("div") this.statusBar.className = "flex items-center font-sans py-1 justify-between gap-2 px-1 text-sm bg-gray-100 dark:bg-gray-800/30 border-b border-gray-300 dark:border-gray-700" // Create status section this.statusSection = document.createElement("div") this.statusSection.className = "flex items-center gap-2" this.statusBar.appendChild(this.statusSection) // Create recording toggle const toggleContainer = document.createElement("div") toggleContainer.className = "flex items-center gap-2" const toggle = document.createElement("input") toggle.type = "checkbox" toggle.id = "recordingToggle" toggle.checked = true toggle.className = "form-checkbox h-4 w-4 text-blue-600 transition duration-150 ease-in-out" toggle.addEventListener("change", (e) => { this.isRecording = e.target.checked this.updateStatus( this.isRecording ? "Listening..." : "Paused", this.isRecording ? "text-emerald-600 dark:text-emerald-400" : "text-yellow-600 dark:text-yellow-400" ) }) const label = document.createElement("label") label.htmlFor = "recordingToggle" label.className = "text-sm text-gray-600 dark:text-gray-400" label.textContent = "" toggleContainer.appendChild(toggle) toggleContainer.appendChild(label) this.statusBar.prepend(toggleContainer) // Add debug button to status bar if debug mode is enabled if (this.hasAttribute("debug")) { const debugButton = document.createElement("button") debugButton.className = "px-2 hover:bg-slate-200 dark:hover:bg-slate-800 transition-colors" debugButton.textContent = "🔌" debugButton.addEventListener("click", () => { console.log("🎤 Debug: Simulating WebSocket disconnect") if (this.ws?.readyState === WebSocket.OPEN) { this.ws.close() } }) this.statusBar.appendChild(debugButton) } this.updateStatus("Initializing...") this.appendChild(this.statusBar) this.appendChild(this.writer) navigator.mediaDevices .getUserMedia({ audio: true }) .then((stream) => this.beginListening(stream)) .catch(console.error) } disconnectedCallback() { clearInterval(this.uploadRateInterval) this.encoder?.close() this.processor?.disconnect() this.source?.disconnect() this.context?.close() this.ws?.close() } setWebSocketState(state) { this.setAttribute("data-ws-state", state) const statusMessages = { connecting: "Connecting...", connected: "Listening...", disconnected: "Reconnecting...", } const statusColors = { connecting: "text-yellow-600 dark:text-yellow-400", connected: "text-emerald-600 dark:text-emerald-400", disconnected: "text-red-600 dark:text-red-400", } this.updateStatus( statusMessages[state] || state, statusColors[state] || "text-gray-600 dark:text-gray-400" ) } updateStatus(message, colorClass = "text-gray-600 dark:text-gray-400") { // Clear existing content this.statusSection.innerHTML = "" // Add status message const text = document.createElement("span") text.className = colorClass text.textContent = message this.statusSection.appendChild(text) } async connectWebSocket() { try { console.log("🎤 Connecting to transcription service...") this.setWebSocketState("connecting") this.ws = new WebSocket(this.wsUrl) this.ws.binaryType = "arraybuffer" this.ws.addEventListener("message", this.handleTranscript) this.ws.addEventListener("open", () => { console.log("🎤 Connected to transcription service") this.setWebSocketState("connected") this.reconnectAttempts = 0 this.reconnectDelay = 1000 }) this.ws.addEventListener("close", () => { console.log("🎤 Disconnected from transcription service") this.setWebSocketState("disconnected") this.attemptReconnect() }) this.ws.addEventListener("error", (error) => { console.error("🎤 WebSocket error:", error) this.setWebSocketState("disconnected") }) } catch (error) { console.error("🎤 Failed to connect:", error) this.setWebSocketState("disconnected") } } attemptReconnect() { if (this.reconnectAttempts >= this.maxReconnectAttempts) { console.error("🎤 Max reconnection attempts reached, giving up") return } const attempt = this.reconnectAttempts + 1 const delay = this.reconnectDelay / 1000 console.log( `🎤 Attempting to reconnect (attempt ${attempt}/${this.maxReconnectAttempts}) in ${delay}s...` ) setTimeout(() => { this.reconnectAttempts++ this.connectWebSocket() // Exponential backoff this.reconnectDelay = Math.min(this.reconnectDelay * 2, 10000) }, this.reconnectDelay) } async beginListening(stream) { await this.connectWebSocket() // Set up audio context and nodes this.context = new AudioContext() this.source = this.context.createMediaStreamSource(stream) const channels = stream.getAudioTracks()[0].getSettings().channelCount ?? 1 this.processor = this.context.createScriptProcessor(16384, channels, 1) // Set up encoder this.encoder = new AudioEncoder({ output: (packet) => { if (this.ws?.readyState === WebSocket.OPEN) { const buffer = new ArrayBuffer(packet.byteLength) packet.copyTo(buffer) this.ws.send(buffer) this.bytesSent += buffer.byteLength } }, error: console.error, }) // Sample rate and update moving average every second this.uploadRateInterval = setInterval(() => { this.uploadRate = this.bytesSent * 2 this.movingAverage = this.alpha * this.uploadRate + (1 - this.alpha) * this.movingAverage this.bytesSent = 0 // reset counter this.updateUploadRate() }, 500) // Set up encoder await this.encoder.configure({ codec: "opus", sampleRate: 48000, numberOfChannels: 1, opus: { application: "lowdelay", signal: "voice", }, }) // Wire up audio pipeline this.source.connect(this.processor) this.processor.connect(this.context.destination) this.processor.addEventListener("audioprocess", this.processAudio) } processAudio(event) { if (this.ws?.readyState !== WebSocket.OPEN || !this.isRecording) return const inputData = event.inputBuffer.getChannelData(0) const buffer = new ArrayBuffer(inputData.length * 4) const view = new DataView(buffer) for (let i = 0; i < inputData.length; i++) { view.setFloat32(i * 4, inputData[i], true) } this.encoder?.encode( new AudioData({ data: buffer, timestamp: event.playbackTime * 1000000, format: "f32", numberOfChannels: 1, numberOfFrames: inputData.length, sampleRate: 48000, }) ) } updateUploadRate() { const instantRate = this.uploadRate const avgRate = this.movingAverage const formatRate = (rate) => { if (rate > 1024) { return `${(rate / 1024).toFixed(1)} KB/s` } else { return `${rate.toFixed(1)} B/s` } } // Update or create upload rate element if (!this.uploadRateElement) { this.uploadRateElement = document.createElement("span") this.uploadRateElement.className = "font-mono text-blue-600 dark:text-blue-400 ml-auto mr-2" const debugButton = this.statusBar.querySelector("button") if (debugButton) { this.statusBar.insertBefore(this.uploadRateElement, debugButton) } else { this.statusBar.appendChild(this.uploadRateElement) } } // Show moving average rate this.uploadRateElement.textContent = `${formatRate(avgRate)}` } handleTranscript(event) { if (typeof event.data !== "string") return try { const result = JSON.parse(event.data) if ( result.type !== "Results" || !result.channel?.alternatives?.[0]?.transcript ) return let text = result.channel.alternatives[0].transcript if (!text) return const currentTime = Date.now() const timeSinceLastTranscript = currentTime - this.lastTranscriptTime console.log(`🎤 Time since last transcript: ${timeSinceLastTranscript}ms`) // Add line break if more than 5 seconds have passed if (timeSinceLastTranscript > 5000 && this.writer.lastElementChild) { const lineBreak = document.createElement("br") this.writer.appendChild(lineBreak) } this.lastTranscriptTime = currentTime let element = this.writer.lastElementChild // Use <ins> for interim results that may change if (!element || !element.matches("ins")) { element = document.createElement("ins") this.writer.appendChild(element) } element.textContent = text if (result.is_final) { // Convert interim <ins> to final <span> when transcription is confirmed const span = document.createElement("span") if (!text.match(/[.!?]$/)) text += "—" span.textContent = text + " " // Use view transitions API for smooth visual update document.startViewTransition(() => { element.replaceWith(span) }) } } catch (error) { console.error("Error parsing transcript:", error) } } } const sheet = new CSSStyleSheet() sheet.replaceSync(` voice-writer ins { text-decoration: none; opacity: 0.6; } `) document.adoptedStyleSheets = [...document.adoptedStyleSheets, sheet] customElements.define("voice-writer", VoiceWriter) // Usage: // <voice-writer language="en-US" server="wss://swa.sh"></voice-writer>