AchrafAmil · May 9, 2023 14:29 · gregoirenovel · May 24, 2023
diff --git a/copilot-api-sample.ts b/copilot-api-sample.ts
 #!/usr/bin/env tnode
 import { execSync } from "child_process";
 import fs from "fs";
 import WebSocket from "ws";

 const WINDOW_SIZE_MS = 50;
 const SAMPLE_RATE = 16000;
 const BYTES_PER_SAMPLE = 2;

 const API_KEY = "";
 const DOCTOR_AUDIO_FILE = "doctor.m4a";
 const PATIENT_AUDIO_FILE = "patient.m4a";

 const sleep = (duration: number) => new Promise((r) => setTimeout(r, duration));

 const main = async () => {
  execSync(
    `yes | ffmpeg -i ./${DOCTOR_AUDIO_FILE} -f s16le -ar ${SAMPLE_RATE} -ac 1 ./data/doctor.raw`,
  );
  execSync(
    `yes | ffmpeg -i ./${PATIENT_AUDIO_FILE} -f s16le -ar ${SAMPLE_RATE} -ac 1 ./data/patient.raw`,
  );
  const bytesDoctor = fs.readFileSync("./data/doctor.raw");
  const bytesPatient = fs.readFileSync("./data/patient.raw");
  console.log("Loaded raw audio from doctor and patient.");
  const config = {
    object: "listen_config",
    output_objects: ["transcript_item", "note"],
    encoding: "pcm_s16le",
    sample_rate: SAMPLE_RATE,
    language: "en",
    streams: [
      { id: "stream1", speaker_type: "doctor" },
      { id: "stream2", speaker_type: "patient" },
    ],
  };
  // noinspection JSMismatchedCollectionQueryUpdate
  let transcriptItems: {
    speaker: string;
    start_offset_ms: number;
    end_offset_ms: number;
    text: string;
  }[] = [];

  const query = `wss://api.nabla.com/v1/server/copilot/listen`;

  console.log(`starting ws on query: ${query}`);

  const ws = new WebSocket(query, ["copilot-listen-protocol"], {
    headers: {
      Authorization: `Bearer ${API_KEY}`,
    },
  });
  ws.onclose = (e) => {
    console.log(`Websocket closed: ${e.code} ${e.reason}`);
    console.log("\n### Full transcript:");
    transcriptItems.forEach((i) =>
      console.log(
        `${i.speaker} - ${msToTime(i.start_offset_ms)} to ${msToTime(
          i.end_offset_ms,
        )}: ${i.text}`,
      ),
    );
  };
  ws.onmessage = (mes) => {
    console.log(mes.data);
    if (typeof mes.data === "string") {
      const data = JSON.parse(mes.data);
      if (data.object === "transcript_item" && data.is_final) {
        transcriptItems = transcriptItems.concat(data);
        console.log("final transcript item remembered.");
      }
    }
  };

  // Await websocket being open
  for (let i = 0; i < 10; i++) {
    if (ws.readyState !== ws.OPEN) {
      await sleep(100);
    } else {
      break;
    }
  }
  if (ws.readyState !== ws.OPEN) throw new Error("Websocket did not open");

  ws.send(JSON.stringify(config));

  // Sends audio every 50ms. Sample rate is 16kHz. Each sample is 2 bytes.
  const chunkSize = (WINDOW_SIZE_MS / 1000) * SAMPLE_RATE * BYTES_PER_SAMPLE;
  const numUpdatesPerSecond = Math.floor(1000 / WINDOW_SIZE_MS);
  for (
    // for each chunk
    let i = 0;
    i < Math.min(bytesDoctor.length, bytesPatient.length) / chunkSize;
    i++
  ) {
    ws.send(
      JSON.stringify({
        object: "audio_chunk",
        payload: bytesDoctor
          .subarray(i * chunkSize, (i + 1) * chunkSize)
          .toString("base64"),
        stream_id: "stream1",
      }),
    );
    ws.send(
      JSON.stringify({
        object: "audio_chunk",
        payload: bytesPatient
          .subarray(i * chunkSize, (i + 1) * chunkSize)
          .toString("base64"),
        stream_id: "stream2",
      }),
    );
    if (i % (numUpdatesPerSecond * 10) === 0) {
      console.log(
        `Transmitted ${(i * WINDOW_SIZE_MS) / 1000} seconds of audio.`,
      );
    }
    await sleep(WINDOW_SIZE_MS);
  }
  console.log(
    "finished streaming, sending an 'end' event to generate the final note",
  );
  ws.send(
    JSON.stringify({
      object: "end",
    }),
  );
 };

 main();

 const msToTime = (milli: number) => {
  const seconds = Math.floor((milli / 1000) % 60);
  const minutes = Math.floor((milli / (60 * 1000)) % 60);

  return `${minutes}:${seconds}`;
 };
	#!/usr/bin/env tnode
	import { execSync } from "child_process";
	import fs from "fs";
	import WebSocket from "ws";

	const WINDOW_SIZE_MS = 50;
	const SAMPLE_RATE = 16000;
	const BYTES_PER_SAMPLE = 2;

	const API_KEY = "";
	const DOCTOR_AUDIO_FILE = "doctor.m4a";
	const PATIENT_AUDIO_FILE = "patient.m4a";

	const sleep = (duration: number) => new Promise((r) => setTimeout(r, duration));

	const main = async () => {
	execSync(
	`yes \| ffmpeg -i ./${DOCTOR_AUDIO_FILE} -f s16le -ar ${SAMPLE_RATE} -ac 1 ./data/doctor.raw`,
	);
	execSync(
	`yes \| ffmpeg -i ./${PATIENT_AUDIO_FILE} -f s16le -ar ${SAMPLE_RATE} -ac 1 ./data/patient.raw`,
	);
	const bytesDoctor = fs.readFileSync("./data/doctor.raw");
	const bytesPatient = fs.readFileSync("./data/patient.raw");
	console.log("Loaded raw audio from doctor and patient.");
	const config = {
	object: "listen_config",
	output_objects: ["transcript_item", "note"],
	encoding: "pcm_s16le",
	sample_rate: SAMPLE_RATE,
	language: "en",
	streams: [
	{ id: "stream1", speaker_type: "doctor" },
	{ id: "stream2", speaker_type: "patient" },
	],
	};
	// noinspection JSMismatchedCollectionQueryUpdate
	let transcriptItems: {
	speaker: string;
	start_offset_ms: number;
	end_offset_ms: number;
	text: string;
	}[] = [];

	const query = `wss://api.nabla.com/v1/server/copilot/listen`;

	console.log(`starting ws on query: ${query}`);

	const ws = new WebSocket(query, ["copilot-listen-protocol"], {
	headers: {
	Authorization: `Bearer ${API_KEY}`,
	},
	});
	ws.onclose = (e) => {
	console.log(`Websocket closed: ${e.code} ${e.reason}`);
	console.log("\n### Full transcript:");
	transcriptItems.forEach((i) =>
	console.log(
	`${i.speaker} - ${msToTime(i.start_offset_ms)} to ${msToTime(
	i.end_offset_ms,
	)}: ${i.text}`,
	),
	);
	};
	ws.onmessage = (mes) => {
	console.log(mes.data);
	if (typeof mes.data === "string") {
	const data = JSON.parse(mes.data);
	if (data.object === "transcript_item" && data.is_final) {
	transcriptItems = transcriptItems.concat(data);
	console.log("final transcript item remembered.");
	}
	}
	};

	// Await websocket being open
	for (let i = 0; i < 10; i++) {
	if (ws.readyState !== ws.OPEN) {
	await sleep(100);
	} else {
	break;
	}
	}
	if (ws.readyState !== ws.OPEN) throw new Error("Websocket did not open");

	ws.send(JSON.stringify(config));

	// Sends audio every 50ms. Sample rate is 16kHz. Each sample is 2 bytes.
	const chunkSize = (WINDOW_SIZE_MS / 1000) * SAMPLE_RATE * BYTES_PER_SAMPLE;
	const numUpdatesPerSecond = Math.floor(1000 / WINDOW_SIZE_MS);
	for (
	// for each chunk
	let i = 0;
	i < Math.min(bytesDoctor.length, bytesPatient.length) / chunkSize;
	i++
	) {
	ws.send(
	JSON.stringify({
	object: "audio_chunk",
	payload: bytesDoctor
	.subarray(i * chunkSize, (i + 1) * chunkSize)
	.toString("base64"),
	stream_id: "stream1",
	}),
	);
	ws.send(
	JSON.stringify({
	object: "audio_chunk",
	payload: bytesPatient
	.subarray(i * chunkSize, (i + 1) * chunkSize)
	.toString("base64"),
	stream_id: "stream2",
	}),
	);
	if (i % (numUpdatesPerSecond * 10) === 0) {
	console.log(
	`Transmitted ${(i * WINDOW_SIZE_MS) / 1000} seconds of audio.`,
	);
	}
	await sleep(WINDOW_SIZE_MS);
	}
	console.log(
	"finished streaming, sending an 'end' event to generate the final note",
	);
	ws.send(
	JSON.stringify({
	object: "end",
	}),
	);
	};

	main();

	const msToTime = (milli: number) => {
	const seconds = Math.floor((milli / 1000) % 60);
	const minutes = Math.floor((milli / (60 * 1000)) % 60);

	return `${minutes}:${seconds}`;
	};