Skip to content

Instantly share code, notes, and snippets.

@otmb
Last active February 10, 2025 07:34
Show Gist options
  • Save otmb/8ba8da24fc5b86f17603307d679f2fed to your computer and use it in GitHub Desktop.
Save otmb/8ba8da24fc5b86f17603307d679f2fed to your computer and use it in GitHub Desktop.
Sample of preprocessing Whisper speech data (PCM 16kHz 16bit monaural) and encoding it into Flac with javascript
from subprocess import CalledProcessError, run
import numpy as np
import soundfile as sf
SAMPLE_RATE = 16000
def load_audio(file: str, sr: int = SAMPLE_RATE):
cmd = [
"ffmpeg",
"-nostdin",
"-threads", "0",
"-i", file,
"-f", "s16le",
"-ac", "1",
"-acodec", "pcm_s16le",
"-ar", str(sr),
"-"
]
try:
out = run(cmd, capture_output=True, check=True).stdout
except CalledProcessError as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
res = load_audio("output.flac")
print(res.shape)
print(res.dtype)
filepath = "output.wav"
sf.write(filepath, res, SAMPLE_RATE)
<html lang="ja">
<head>
<meta charset="utf-8">
<title>flac convert test</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<script src="js/libflac.wasm.js" type="text/javascript"></script>
</head>
<body>
<script>
let audioCtx;
let message;
async function main() {
message = document.getElementById("message");
if (!audioCtx) {
audioCtx = new AudioContext({ sampleRate: 16000 });
await loadAudio();
}
}
main();
const reader = new FileReader();
reader.addEventListener("progress", readerEvent);
function readerEvent(event) {
message.innerText = "fileReading: " + event.loaded;
}
// Debug
async function loadAudio(){
try {
const name = "test.mp4";
// const name = "test_long.mp4";
const response = await fetch(name);
decodeAudioData(await response.arrayBuffer());
} catch (err) {
decodeError(err);
}
}
function decodeAudioData(arrayBuffer){
audioCtx.decodeAudioData(arrayBuffer, transcode, decodeError);
}
function decodeError(error){
console.error(`Unable to fetch the audio file. Error: ${error.message}`);
}
async function transcode(audioBuffer){
const OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
const offlineAudioContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
const merger = offlineAudioContext.createChannelMerger(audioBuffer.numberOfChannels);
const source = offlineAudioContext.createBufferSource();
source.buffer = audioBuffer;
for (let i = 0; i < audioBuffer.numberOfChannels; i++) {
source.connect(merger, 0, i);
}
merger.connect(offlineAudioContext.destination);
source.start();
const mixedBuffer = await offlineAudioContext.startRendering();
const float32PcmData = mixedBuffer.getChannelData(0);
merger.disconnect();
source.disconnect();
// audioCtx.close();
const result = flac_encode(float32PcmData);
if (result['error']){
message = document.getElementById("message");
message.innerText = result['error'];
}
}
function flac_encode(buffer){
let flac_encoder,
CHANNELS = 1,
SAMPLERATE = 16000,
COMPRESSION = 5,
BPS = 16,
VERIFY = false,
BLOCK_SIZE = 0,
flac_ok = 1,
USE_OGG = false;
const buf_length = buffer.length;
let buffer_i32 = new Int32Array(buf_length);
let view = new DataView(buffer_i32.buffer);
const volume = 1;
let index = 0;
for (var i = 0; i < buf_length; i++){
view.setInt32(index, (buffer[i] * (0x7FFF * volume)), true);
index += 4;
}
let recBuffers = [];
let recLength = 0;
let meta_data;
function write_callback_fn(buffer, bytes, samples, current_frame){
recBuffers.push(buffer);
recLength += bytes;
}
function metadata_callback_fn(data){
console.info('meta data: ', data);
meta_data = data;
}
flac_encoder = Flac.create_libflac_encoder(SAMPLERATE, CHANNELS, BPS, COMPRESSION, 0, VERIFY, BLOCK_SIZE);
if (flac_encoder == 0){
Flac.FLAC__stream_encoder_delete(flac_encoder);
const msg = 'Error initializing the decoder.';
console.error(msg);
return {error: msg, status: 1};
}
try {
const init_status = Flac.init_encoder_stream(flac_encoder, write_callback_fn, metadata_callback_fn);
flac_ok &= init_status == 0;
if (flac_ok != true){
throw new Error('Error initializing the encoder.');
}
flac_return = Flac.FLAC__stream_encoder_process_interleaved(flac_encoder, buffer_i32, buffer_i32.length / 1);
if (flac_return != true){
throw new Error("Error: FLAC__stream_encoder_process_interleaved returned false. " + flac_return);
}
flac_ok &= Flac.FLAC__stream_encoder_finish(flac_encoder);
if (flac_ok != true){
throw new Error('Error Finish the encoder.');
}
Flac.FLAC__stream_encoder_delete(flac_encoder);
} catch(e){
console.error(e);
flac_ok = Flac.FLAC__stream_encoder_get_state(flac_encoder);
Flac.FLAC__stream_encoder_delete(flac_encoder);
return {error: e, status: flac_ok};
}
let samples = new Uint8Array(recLength);
let offset = 0;
recBuffers.forEach(item => {
samples.set(item, offset);
offset += item.length;
});
let audioBlob = URL.createObjectURL(new Blob([samples], { type: 'audio/flac' }));
const downloadLink = document.getElementById('download');
downloadLink.href = audioBlob;
downloadLink.download = 'output.flac';
downloadLink.style.display = 'inline';
const audio = document.getElementById('output-audio');
audio.src = audioBlob;
audio.style.display = 'inline';
return {error: "", status: 0}
}
</script>
<audio id="output-audio" controls style="display: none;"></audio>
<a id="download" class="btn btn-primary btn-lg" role="button" style="display: none;">Download</a>
<div id="message"></div>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment