Skip to content

Instantly share code, notes, and snippets.

@mbrock
Created November 29, 2024 21:49
Show Gist options
  • Save mbrock/3d5cdeab8107d7c92ca4fa24680fd1bc to your computer and use it in GitHub Desktop.
Save mbrock/3d5cdeab8107d7c92ca4fa24680fd1bc to your computer and use it in GitHub Desktop.
// transcription-client.js
class TranscriptionClient {
constructor(options = {}) {
this.language = options.language || 'en-US';
this.websocketUrl = options.websocketUrl || this._getDefaultWebsocketUrl();
this.onTranscript = options.onTranscript || (() => {});
this.onInterimTranscript = options.onInterimTranscript || (() => {});
this.onError = options.onError || console.error;
this.websocket = null;
this.mediaStream = null;
this.audioContext = null;
this.audioEncoder = null;
this.sourceNode = null;
this.processorNode = null;
}
_getDefaultWebsocketUrl() {
const protocol = document.location.protocol === 'https:' ? 'wss:' : 'ws:';
return `${protocol}//${document.location.host}/transcribe?language=${this.language}`;
}
async start() {
try {
// Get microphone access
this.mediaStream = await navigator.mediaDevices.getUserMedia({
audio: true,
video: false
});
// Set up WebSocket
this.websocket = new WebSocket(this.websocketUrl);
this.websocket.binaryType = 'arraybuffer';
this._setupWebSocketHandlers();
// Set up audio processing
await this._setupAudioProcessing();
} catch (error) {
this.onError(error);
throw error;
}
}
stop() {
if (this.audioEncoder) {
this.audioEncoder.close();
}
if (this.processorNode) {
this.processorNode.disconnect();
}
if (this.sourceNode) {
this.sourceNode.disconnect();
}
if (this.audioContext) {
this.audioContext.close();
}
if (this.mediaStream) {
this.mediaStream.getTracks().forEach(track => track.stop());
}
if (this.websocket) {
this.websocket.close();
}
this.audioEncoder = null;
this.processorNode = null;
this.sourceNode = null;
this.audioContext = null;
this.mediaStream = null;
this.websocket = null;
}
async _setupAudioProcessing() {
// Create audio context
this.audioContext = new AudioContext();
this.sourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
// Get channel count from media stream
const streamChannels = this.mediaStream.getAudioTracks()[0].getSettings().channelCount || 1;
// Create processor node
this.processorNode = this.audioContext.createScriptProcessor(16384, streamChannels, 1);
// Create audio encoder
this.audioEncoder = new AudioEncoder({
output: (encodedPacket) => {
if (this.websocket && this.websocket.readyState === WebSocket.OPEN) {
const arrayBuffer = new ArrayBuffer(encodedPacket.byteLength);
encodedPacket.copyTo(arrayBuffer);
this.websocket.send(arrayBuffer);
}
},
error: this.onError
});
// Configure encoder
this.audioEncoder.configure({
codec: 'opus',
sampleRate: 48000,
numberOfChannels: 1,
opus: {
application: 'lowdelay',
signal: 'voice'
}
});
// Connect nodes
this.sourceNode.connect(this.processorNode);
this.processorNode.connect(this.audioContext.destination);
// Set up audio processing
this.processorNode.addEventListener('audioprocess', (event) => {
const inputData = event.inputBuffer.getChannelData(0);
const inputBuffer = new ArrayBuffer(event.inputBuffer.length * 4);
const inputView = new DataView(inputBuffer);
for (let i = 0; i < event.inputBuffer.length; i++) {
inputView.setFloat32(i * 4, inputData[i], true);
}
this.audioEncoder.encode(new AudioData({
data: inputBuffer,
timestamp: event.playbackTime * 1000000,
format: 'f32',
numberOfChannels: 1,
numberOfFrames: event.inputBuffer.length,
sampleRate: 48000
}));
});
}
_setupWebSocketHandlers() {
this.websocket.onmessage = (event) => {
if (typeof event.data === 'string') {
try {
const result = JSON.parse(event.data);
if (result.type === 'Results' && result.channel?.alternatives?.[0]) {
const transcript = {
text: result.channel.alternatives[0].transcript,
isFinal: result.is_final,
words: result.channel.alternatives[0].words
};
if (result.is_final) {
this.onTranscript(transcript);
} else {
this.onInterimTranscript(transcript);
}
}
} catch (error) {
this.onError(error);
}
}
};
this.websocket.onerror = (error) => {
this.onError(error);
};
}
}
// Example usage:
/*
const transcriber = new TranscriptionClient({
onTranscript: (transcript) => {
console.log('Final transcript:', transcript.text);
},
onInterimTranscript: (transcript) => {
console.log('Interim transcript:', transcript.text);
}
});
// Start transcription
await transcriber.start();
// Stop transcription
transcriber.stop();
*/
export default TranscriptionClient;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment