mbrock · November 29, 2024 21:49
diff --git a/deepgram-browser.js b/deepgram-browser.js
 // transcription-client.js

 class TranscriptionClient {
  constructor(options = {}) {
    this.language = options.language || 'en-US';
    this.websocketUrl = options.websocketUrl || this._getDefaultWebsocketUrl();
    this.onTranscript = options.onTranscript || (() => {});
    this.onInterimTranscript = options.onInterimTranscript || (() => {});
    this.onError = options.onError || console.error;
    
    this.websocket = null;
    this.mediaStream = null;
    this.audioContext = null;
    this.audioEncoder = null;
    this.sourceNode = null;
    this.processorNode = null;
  }

  _getDefaultWebsocketUrl() {
    const protocol = document.location.protocol === 'https:' ? 'wss:' : 'ws:';
    return `${protocol}//${document.location.host}/transcribe?language=${this.language}`;
  }

  async start() {
    try {
      // Get microphone access
      this.mediaStream = await navigator.mediaDevices.getUserMedia({ 
        audio: true, 
        video: false 
      });

      // Set up WebSocket
      this.websocket = new WebSocket(this.websocketUrl);
      this.websocket.binaryType = 'arraybuffer';
      this._setupWebSocketHandlers();

      // Set up audio processing
      await this._setupAudioProcessing();
    } catch (error) {
      this.onError(error);
      throw error;
    }
  }

  stop() {
    if (this.audioEncoder) {
      this.audioEncoder.close();
    }

    if (this.processorNode) {
      this.processorNode.disconnect();
    }

    if (this.sourceNode) {
      this.sourceNode.disconnect();
    }

    if (this.audioContext) {
      this.audioContext.close();
    }

    if (this.mediaStream) {
      this.mediaStream.getTracks().forEach(track => track.stop());
    }

    if (this.websocket) {
      this.websocket.close();
    }

    this.audioEncoder = null;
    this.processorNode = null;
    this.sourceNode = null;
    this.audioContext = null;
    this.mediaStream = null;
    this.websocket = null;
  }

  async _setupAudioProcessing() {
    // Create audio context
    this.audioContext = new AudioContext();
    this.sourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
    
    // Get channel count from media stream
    const streamChannels = this.mediaStream.getAudioTracks()[0].getSettings().channelCount || 1;
    
    // Create processor node
    this.processorNode = this.audioContext.createScriptProcessor(16384, streamChannels, 1);

    // Create audio encoder
    this.audioEncoder = new AudioEncoder({
      output: (encodedPacket) => {
        if (this.websocket && this.websocket.readyState === WebSocket.OPEN) {
          const arrayBuffer = new ArrayBuffer(encodedPacket.byteLength);
          encodedPacket.copyTo(arrayBuffer);
          this.websocket.send(arrayBuffer);
        }
      },
      error: this.onError
    });

    // Configure encoder
    this.audioEncoder.configure({
      codec: 'opus',
      sampleRate: 48000,
      numberOfChannels: 1,
      opus: {
        application: 'lowdelay',
        signal: 'voice'
      }
    });

    // Connect nodes
    this.sourceNode.connect(this.processorNode);
    this.processorNode.connect(this.audioContext.destination);

    // Set up audio processing
    this.processorNode.addEventListener('audioprocess', (event) => {
      const inputData = event.inputBuffer.getChannelData(0);
      const inputBuffer = new ArrayBuffer(event.inputBuffer.length * 4);
      const inputView = new DataView(inputBuffer);

      for (let i = 0; i < event.inputBuffer.length; i++) {
        inputView.setFloat32(i * 4, inputData[i], true);
      }

      this.audioEncoder.encode(new AudioData({
        data: inputBuffer,
        timestamp: event.playbackTime * 1000000,
        format: 'f32',
        numberOfChannels: 1,
        numberOfFrames: event.inputBuffer.length,
        sampleRate: 48000
      }));
    });
  }

  _setupWebSocketHandlers() {
    this.websocket.onmessage = (event) => {
      if (typeof event.data === 'string') {
        try {
          const result = JSON.parse(event.data);
          if (result.type === 'Results' && result.channel?.alternatives?.[0]) {
            const transcript = {
              text: result.channel.alternatives[0].transcript,
              isFinal: result.is_final,
              words: result.channel.alternatives[0].words
            };

            if (result.is_final) {
              this.onTranscript(transcript);
            } else {
              this.onInterimTranscript(transcript);
            }
          }
        } catch (error) {
          this.onError(error);
        }
      }
    };

    this.websocket.onerror = (error) => {
      this.onError(error);
    };
  }
 }

 // Example usage:
 /*
 const transcriber = new TranscriptionClient({
  onTranscript: (transcript) => {
    console.log('Final transcript:', transcript.text);
  },
  onInterimTranscript: (transcript) => {
    console.log('Interim transcript:', transcript.text);
  }
 });

 // Start transcription
 await transcriber.start();

 // Stop transcription
 transcriber.stop();
 */

 export default TranscriptionClient;
	// transcription-client.js

	class TranscriptionClient {
	constructor(options = {}) {
	this.language = options.language \|\| 'en-US';
	this.websocketUrl = options.websocketUrl \|\| this._getDefaultWebsocketUrl();
	this.onTranscript = options.onTranscript \|\| (() => {});
	this.onInterimTranscript = options.onInterimTranscript \|\| (() => {});
	this.onError = options.onError \|\| console.error;

	this.websocket = null;
	this.mediaStream = null;
	this.audioContext = null;
	this.audioEncoder = null;
	this.sourceNode = null;
	this.processorNode = null;
	}

	_getDefaultWebsocketUrl() {
	const protocol = document.location.protocol === 'https:' ? 'wss:' : 'ws:';
	return `${protocol}//${document.location.host}/transcribe?language=${this.language}`;
	}

	async start() {
	try {
	// Get microphone access
	this.mediaStream = await navigator.mediaDevices.getUserMedia({
	audio: true,
	video: false
	});

	// Set up WebSocket
	this.websocket = new WebSocket(this.websocketUrl);
	this.websocket.binaryType = 'arraybuffer';
	this._setupWebSocketHandlers();

	// Set up audio processing
	await this._setupAudioProcessing();
	} catch (error) {
	this.onError(error);
	throw error;
	}
	}

	stop() {
	if (this.audioEncoder) {
	this.audioEncoder.close();
	}

	if (this.processorNode) {
	this.processorNode.disconnect();
	}

	if (this.sourceNode) {
	this.sourceNode.disconnect();
	}

	if (this.audioContext) {
	this.audioContext.close();
	}

	if (this.mediaStream) {
	this.mediaStream.getTracks().forEach(track => track.stop());
	}

	if (this.websocket) {
	this.websocket.close();
	}

	this.audioEncoder = null;
	this.processorNode = null;
	this.sourceNode = null;
	this.audioContext = null;
	this.mediaStream = null;
	this.websocket = null;
	}

	async _setupAudioProcessing() {
	// Create audio context
	this.audioContext = new AudioContext();
	this.sourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);

	// Get channel count from media stream
	const streamChannels = this.mediaStream.getAudioTracks()[0].getSettings().channelCount \|\| 1;

	// Create processor node
	this.processorNode = this.audioContext.createScriptProcessor(16384, streamChannels, 1);

	// Create audio encoder
	this.audioEncoder = new AudioEncoder({
	output: (encodedPacket) => {
	if (this.websocket && this.websocket.readyState === WebSocket.OPEN) {
	const arrayBuffer = new ArrayBuffer(encodedPacket.byteLength);
	encodedPacket.copyTo(arrayBuffer);
	this.websocket.send(arrayBuffer);
	}
	},
	error: this.onError
	});

	// Configure encoder
	this.audioEncoder.configure({
	codec: 'opus',
	sampleRate: 48000,
	numberOfChannels: 1,
	opus: {
	application: 'lowdelay',
	signal: 'voice'
	}
	});

	// Connect nodes
	this.sourceNode.connect(this.processorNode);
	this.processorNode.connect(this.audioContext.destination);

	// Set up audio processing
	this.processorNode.addEventListener('audioprocess', (event) => {
	const inputData = event.inputBuffer.getChannelData(0);
	const inputBuffer = new ArrayBuffer(event.inputBuffer.length * 4);
	const inputView = new DataView(inputBuffer);

	for (let i = 0; i < event.inputBuffer.length; i++) {
	inputView.setFloat32(i * 4, inputData[i], true);
	}

	this.audioEncoder.encode(new AudioData({
	data: inputBuffer,
	timestamp: event.playbackTime * 1000000,
	format: 'f32',
	numberOfChannels: 1,
	numberOfFrames: event.inputBuffer.length,
	sampleRate: 48000
	}));
	});
	}

	_setupWebSocketHandlers() {
	this.websocket.onmessage = (event) => {
	if (typeof event.data === 'string') {
	try {
	const result = JSON.parse(event.data);
	if (result.type === 'Results' && result.channel?.alternatives?.[0]) {
	const transcript = {
	text: result.channel.alternatives[0].transcript,
	isFinal: result.is_final,
	words: result.channel.alternatives[0].words
	};

	if (result.is_final) {
	this.onTranscript(transcript);
	} else {
	this.onInterimTranscript(transcript);
	}
	}
	} catch (error) {
	this.onError(error);
	}
	}
	};

	this.websocket.onerror = (error) => {
	this.onError(error);
	};
	}
	}

	// Example usage:
	/*
	const transcriber = new TranscriptionClient({
	onTranscript: (transcript) => {
	console.log('Final transcript:', transcript.text);
	},
	onInterimTranscript: (transcript) => {
	console.log('Interim transcript:', transcript.text);
	}
	});

	// Start transcription
	await transcriber.start();

	// Stop transcription
	transcriber.stop();
	*/

	export default TranscriptionClient;