Created
November 29, 2024 21:49
-
-
Save mbrock/3d5cdeab8107d7c92ca4fa24680fd1bc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// transcription-client.js | |
class TranscriptionClient { | |
constructor(options = {}) { | |
this.language = options.language || 'en-US'; | |
this.websocketUrl = options.websocketUrl || this._getDefaultWebsocketUrl(); | |
this.onTranscript = options.onTranscript || (() => {}); | |
this.onInterimTranscript = options.onInterimTranscript || (() => {}); | |
this.onError = options.onError || console.error; | |
this.websocket = null; | |
this.mediaStream = null; | |
this.audioContext = null; | |
this.audioEncoder = null; | |
this.sourceNode = null; | |
this.processorNode = null; | |
} | |
_getDefaultWebsocketUrl() { | |
const protocol = document.location.protocol === 'https:' ? 'wss:' : 'ws:'; | |
return `${protocol}//${document.location.host}/transcribe?language=${this.language}`; | |
} | |
async start() { | |
try { | |
// Get microphone access | |
this.mediaStream = await navigator.mediaDevices.getUserMedia({ | |
audio: true, | |
video: false | |
}); | |
// Set up WebSocket | |
this.websocket = new WebSocket(this.websocketUrl); | |
this.websocket.binaryType = 'arraybuffer'; | |
this._setupWebSocketHandlers(); | |
// Set up audio processing | |
await this._setupAudioProcessing(); | |
} catch (error) { | |
this.onError(error); | |
throw error; | |
} | |
} | |
stop() { | |
if (this.audioEncoder) { | |
this.audioEncoder.close(); | |
} | |
if (this.processorNode) { | |
this.processorNode.disconnect(); | |
} | |
if (this.sourceNode) { | |
this.sourceNode.disconnect(); | |
} | |
if (this.audioContext) { | |
this.audioContext.close(); | |
} | |
if (this.mediaStream) { | |
this.mediaStream.getTracks().forEach(track => track.stop()); | |
} | |
if (this.websocket) { | |
this.websocket.close(); | |
} | |
this.audioEncoder = null; | |
this.processorNode = null; | |
this.sourceNode = null; | |
this.audioContext = null; | |
this.mediaStream = null; | |
this.websocket = null; | |
} | |
async _setupAudioProcessing() { | |
// Create audio context | |
this.audioContext = new AudioContext(); | |
this.sourceNode = this.audioContext.createMediaStreamSource(this.mediaStream); | |
// Get channel count from media stream | |
const streamChannels = this.mediaStream.getAudioTracks()[0].getSettings().channelCount || 1; | |
// Create processor node | |
this.processorNode = this.audioContext.createScriptProcessor(16384, streamChannels, 1); | |
// Create audio encoder | |
this.audioEncoder = new AudioEncoder({ | |
output: (encodedPacket) => { | |
if (this.websocket && this.websocket.readyState === WebSocket.OPEN) { | |
const arrayBuffer = new ArrayBuffer(encodedPacket.byteLength); | |
encodedPacket.copyTo(arrayBuffer); | |
this.websocket.send(arrayBuffer); | |
} | |
}, | |
error: this.onError | |
}); | |
// Configure encoder | |
this.audioEncoder.configure({ | |
codec: 'opus', | |
sampleRate: 48000, | |
numberOfChannels: 1, | |
opus: { | |
application: 'lowdelay', | |
signal: 'voice' | |
} | |
}); | |
// Connect nodes | |
this.sourceNode.connect(this.processorNode); | |
this.processorNode.connect(this.audioContext.destination); | |
// Set up audio processing | |
this.processorNode.addEventListener('audioprocess', (event) => { | |
const inputData = event.inputBuffer.getChannelData(0); | |
const inputBuffer = new ArrayBuffer(event.inputBuffer.length * 4); | |
const inputView = new DataView(inputBuffer); | |
for (let i = 0; i < event.inputBuffer.length; i++) { | |
inputView.setFloat32(i * 4, inputData[i], true); | |
} | |
this.audioEncoder.encode(new AudioData({ | |
data: inputBuffer, | |
timestamp: event.playbackTime * 1000000, | |
format: 'f32', | |
numberOfChannels: 1, | |
numberOfFrames: event.inputBuffer.length, | |
sampleRate: 48000 | |
})); | |
}); | |
} | |
_setupWebSocketHandlers() { | |
this.websocket.onmessage = (event) => { | |
if (typeof event.data === 'string') { | |
try { | |
const result = JSON.parse(event.data); | |
if (result.type === 'Results' && result.channel?.alternatives?.[0]) { | |
const transcript = { | |
text: result.channel.alternatives[0].transcript, | |
isFinal: result.is_final, | |
words: result.channel.alternatives[0].words | |
}; | |
if (result.is_final) { | |
this.onTranscript(transcript); | |
} else { | |
this.onInterimTranscript(transcript); | |
} | |
} | |
} catch (error) { | |
this.onError(error); | |
} | |
} | |
}; | |
this.websocket.onerror = (error) => { | |
this.onError(error); | |
}; | |
} | |
} | |
// Example usage: | |
/* | |
const transcriber = new TranscriptionClient({ | |
onTranscript: (transcript) => { | |
console.log('Final transcript:', transcript.text); | |
}, | |
onInterimTranscript: (transcript) => { | |
console.log('Interim transcript:', transcript.text); | |
} | |
}); | |
// Start transcription | |
await transcriber.start(); | |
// Stop transcription | |
transcriber.stop(); | |
*/ | |
export default TranscriptionClient; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment